Commit 3d14c5d2 authored by Yehuda Sadeh's avatar Yehuda Sadeh Committed by Sage Weil

ceph: factor out libceph from Ceph file system

This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph.  This
is mostly a matter of moving files around.  However, a few key pieces
of the interface change as well:

 - ceph_client becomes ceph_fs_client and ceph_client, where the latter
   captures the mon and osd clients, and the fs_client gets the mds client
   and file system specific pieces.
 - Mount option parsing and debugfs setup is correspondingly broken into
   two pieces.
 - The mon client gets a generic handler callback for otherwise unknown
   messages (mds map, in this case).
 - The basic supported/required feature bits can be expanded (and are by
   ceph_fs_client).

No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent ae1533b6
......@@ -1527,6 +1527,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
S: Supported
F: Documentation/filesystems/ceph.txt
F: fs/ceph
F: net/ceph
F: include/linux/ceph
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
M: David Vrabel <david.vrabel@csr.com>
......
config CEPH_FS
tristate "Ceph distributed file system (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select CEPH_LIB
select LIBCRC32C
select CRYPTO_AES
select CRYPTO
default n
help
Choose Y or M here to include support for mounting the
experimental Ceph distributed file system. Ceph is an extremely
......@@ -14,15 +16,3 @@ config CEPH_FS
If unsure, say N.
config CEPH_FS_PRETTYDEBUG
bool "Include file:line in ceph debug output"
depends on CEPH_FS
default n
help
If you say Y here, debug output will include a filename and
line to aid debugging. This icnreases kernel size and slows
execution slightly when debug call sites are enabled (e.g.,
via CONFIG_DYNAMIC_DEBUG).
If unsure, say N.
......@@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
export.o caps.o snap.o xattr.o \
messenger.o msgpool.o buffer.o pagelist.o \
mds_client.o mdsmap.o \
mon_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
debugfs.o \
auth.o auth_none.o \
crypto.o armor.o \
auth_x.o \
ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
mds_client.o mdsmap.o strings.o ceph_frag.o \
debugfs.o
else
#Otherwise we were called directly from the command
......
#
# The following files are shared by (and manually synchronized
# between) the Ceph userland and kernel client.
#
# userland kernel
src/include/ceph_fs.h fs/ceph/ceph_fs.h
src/include/ceph_fs.cc fs/ceph/ceph_fs.c
src/include/msgr.h fs/ceph/msgr.h
src/include/rados.h fs/ceph/rados.h
src/include/ceph_strings.cc fs/ceph/ceph_strings.c
src/include/ceph_frag.h fs/ceph/ceph_frag.h
src/include/ceph_frag.cc fs/ceph/ceph_frag.c
src/include/ceph_hash.h fs/ceph/ceph_hash.h
src/include/ceph_hash.cc fs/ceph/ceph_hash.c
src/crush/crush.c fs/ceph/crush/crush.c
src/crush/crush.h fs/ceph/crush/crush.h
src/crush/mapper.c fs/ceph/crush/mapper.c
src/crush/mapper.h fs/ceph/crush/mapper.h
src/crush/hash.h fs/ceph/crush/hash.h
src/crush/hash.c fs/ceph/crush/hash.c
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
......@@ -10,7 +10,8 @@
#include <linux/task_io_accounting_ops.h>
#include "super.h"
#include "osd_client.h"
#include "mds_client.h"
#include <linux/ceph/osd_client.h>
/*
* Ceph address space ops.
......@@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page)
{
struct inode *inode = filp->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
int err = 0;
u64 len = PAGE_CACHE_SIZE;
......@@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
int rc = 0;
struct page **pages;
loff_t offset;
......@@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
{
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_client *client;
struct ceph_fs_client *fsc;
struct ceph_osd_client *osdc;
loff_t page_off = page->index << PAGE_CACHE_SHIFT;
int len = PAGE_CACHE_SIZE;
......@@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
}
inode = page->mapping->host;
ci = ceph_inode(inode);
client = ceph_inode_to_client(inode);
osdc = &client->osdc;
fsc = ceph_inode_to_client(inode);
osdc = &fsc->client->osdc;
/* verify this is a writeable snap context */
snapc = (void *)page->private;
......@@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
inode, page, page->index, page_off, len, snapc);
writeback_stat = atomic_long_inc_return(&client->writeback_count);
writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
if (writeback_stat >
CONGESTION_ON_THRESH(client->mount_args->congestion_kb))
set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
set_page_writeback(page);
err = ceph_osdc_writepages(osdc, ceph_vino(inode),
......@@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req,
struct address_space *mapping = inode->i_mapping;
__s32 rc = -EIO;
u64 bytes = 0;
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
long writeback_stat;
unsigned issued = ceph_caps_issued(ci);
......@@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req,
WARN_ON(!PageUptodate(page));
writeback_stat =
atomic_long_dec_return(&client->writeback_count);
atomic_long_dec_return(&fsc->writeback_count);
if (writeback_stat <
CONGESTION_OFF_THRESH(client->mount_args->congestion_kb))
clear_bdi_congested(&client->backing_dev_info,
CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
clear_bdi_congested(&fsc->backing_dev_info,
BLK_RW_ASYNC);
ceph_put_snap_context((void *)page->private);
......@@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req,
* mempool. we avoid the mempool if we can because req->r_num_pages
* may be less than the maximum write size.
*/
static void alloc_page_vec(struct ceph_client *client,
static void alloc_page_vec(struct ceph_fs_client *fsc,
struct ceph_osd_request *req)
{
req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
GFP_NOFS);
if (!req->r_pages) {
req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS);
req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
req->r_pages_from_pool = 1;
WARN_ON(!req->r_pages);
}
......@@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = mapping->backing_dev_info;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client;
struct ceph_fs_client *fsc;
pgoff_t index, start, end;
int range_whole = 0;
int should_loop = 1;
......@@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
client = ceph_inode_to_client(inode);
if (client->mount_state == CEPH_MOUNT_SHUTDOWN) {
fsc = ceph_inode_to_client(inode);
if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
pr_warning("writepage_start %p on forced umount\n", inode);
return -EIO; /* we're in a forced umount, don't write! */
}
if (client->mount_args->wsize && client->mount_args->wsize < wsize)
wsize = client->mount_args->wsize;
if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
wsize = fsc->mount_options->wsize;
if (wsize < PAGE_CACHE_SIZE)
wsize = PAGE_CACHE_SIZE;
max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
......@@ -769,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping,
offset = (unsigned long long)page->index
<< PAGE_CACHE_SHIFT;
len = wsize;
req = ceph_osdc_new_request(&client->osdc,
req = ceph_osdc_new_request(&fsc->client->osdc,
&ci->i_layout,
ceph_vino(inode),
offset, &len,
......@@ -782,7 +785,7 @@ static int ceph_writepages_start(struct address_space *mapping,
&inode->i_mtime, true, 1);
max_pages = req->r_num_pages;
alloc_page_vec(client, req);
alloc_page_vec(fsc, req);
req->r_callback = writepages_finish;
req->r_inode = inode;
}
......@@ -794,10 +797,10 @@ static int ceph_writepages_start(struct address_space *mapping,
inode, page, page->index);
writeback_stat =
atomic_long_inc_return(&client->writeback_count);
atomic_long_inc_return(&fsc->writeback_count);
if (writeback_stat > CONGESTION_ON_THRESH(
client->mount_args->congestion_kb)) {
set_bdi_congested(&client->backing_dev_info,
fsc->mount_options->congestion_kb)) {
set_bdi_congested(&fsc->backing_dev_info,
BLK_RW_ASYNC);
}
......@@ -846,7 +849,7 @@ static int ceph_writepages_start(struct address_space *mapping,
op->payload_len = cpu_to_le32(len);
req->r_request->hdr.data_len = cpu_to_le32(len);
ceph_osdc_start_request(&client->osdc, req, true);
ceph_osdc_start_request(&fsc->client->osdc, req, true);
req = NULL;
/* continue? */
......@@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file,
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
loff_t page_off = pos & PAGE_CACHE_MASK;
int pos_in_page = pos & ~PAGE_CACHE_MASK;
int end_in_page = pos_in_page + len;
......@@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
unsigned from = pos & (PAGE_CACHE_SIZE - 1);
int check_cap = 0;
......@@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct inode *inode = vma->vm_file->f_dentry->d_inode;
struct page *page = vmf->page;
struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
loff_t off = page->index << PAGE_CACHE_SHIFT;
loff_t size, len;
int ret;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/fs.h>
#include <linux/kernel.h>
......@@ -9,8 +9,9 @@
#include <linux/writeback.h>
#include "super.h"
#include "decode.h"
#include "messenger.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
#include <linux/ceph/messenger.h>
/*
* Capability management
......@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
spin_unlock(&mdsc->caps_list_lock);
}
void ceph_reservation_status(struct ceph_client *client,
void ceph_reservation_status(struct ceph_fs_client *fsc,
int *total, int *avail, int *used, int *reserved,
int *min)
{
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_mds_client *mdsc = fsc->mdsc;
if (total)
*total = mdsc->caps_total_count;
......@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
{
struct ceph_mount_args *ma = mdsc->client->mount_args;
struct ceph_mount_options *ma = mdsc->fsc->mount_options;
ci->i_hold_caps_min = round_jiffies(jiffies +
ma->caps_wanted_delay_min * HZ);
......@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode,
unsigned seq, unsigned mseq, u64 realmino, int flags,
struct ceph_cap_reservation *caps_reservation)
{
struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *new_cap = NULL;
struct ceph_cap *cap;
......@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc =
&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
......@@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
int mds;
struct ceph_cap_snap *capsnap;
u32 mseq;
struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
session->s_mutex */
u64 next_follows = 0; /* keep track of how far we've gotten through the
......@@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
struct ceph_mds_client *mdsc =
&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct inode *inode = &ci->vfs_inode;
int was = ci->i_dirty_caps;
int dirty = 0;
......@@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
static int __mark_caps_flushing(struct inode *inode,
struct ceph_mds_session *session)
{
struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int flushing;
......@@ -1462,8 +1463,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session)
{
struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = &ci->vfs_inode;
struct ceph_cap *cap;
int file_wanted, used;
......@@ -1706,7 +1707,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
unsigned *flush_tid)
{
struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int unlock_session = session ? 0 : 1;
int flushing = 0;
......@@ -1872,7 +1873,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
caps_are_flushed(inode, flush_tid));
} else {
struct ceph_mds_client *mdsc =
&ceph_sb_to_client(inode->i_sb)->mdsc;
ceph_sb_to_client(inode->i_sb)->mdsc;
spin_lock(&inode->i_lock);
if (__ceph_caps_dirty(ci))
......@@ -2465,7 +2466,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
__releases(inode->i_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
unsigned seq = le32_to_cpu(m->seq);
int dirty = le32_to_cpu(m->dirty);
int cleaned = 0;
......@@ -2713,7 +2714,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_msg *msg)
{
struct ceph_mds_client *mdsc = session->s_mdsc;
struct super_block *sb = mdsc->client->sb;
struct super_block *sb = mdsc->fsc->sb;
struct inode *inode;
struct ceph_cap *cap;
struct ceph_mds_caps *h;
......
/*
* Ceph 'frag' type
*/
#include "types.h"
#include <linux/module.h>
#include <linux/ceph/types.h>
int ceph_frag_compare(__u32 a, __u32 b)
{
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/device.h>
#include <linux/slab.h>
......@@ -7,143 +7,48 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "super.h"
#include "mds_client.h"
#include "mon_client.h"
#include "auth.h"
#include <linux/ceph/libceph.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
#ifdef CONFIG_DEBUG_FS
/*
* Implement /sys/kernel/debug/ceph fun
*
* /sys/kernel/debug/ceph/client* - an instance of the ceph client
* .../osdmap - current osdmap
* .../mdsmap - current mdsmap
* .../monmap - current monmap
* .../osdc - active osd requests
* .../mdsc - active mds requests
* .../monc - mon client state
* .../dentry_lru - dump contents of dentry lru
* .../caps - expose cap (reservation) stats
* .../bdi - symlink to ../../bdi/something
*/
static struct dentry *ceph_debugfs_dir;
static int monmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_client *client = s->private;
if (client->monc.monmap == NULL)
return 0;
seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
for (i = 0; i < client->monc.monmap->num_mon; i++) {
struct ceph_entity_inst *inst =
&client->monc.monmap->mon_inst[i];
seq_printf(s, "\t%s%lld\t%s\n",
ENTITY_NAME(inst->name),
pr_addr(&inst->addr.in_addr));
}
return 0;
}
#include "super.h"
#include "mds_client.h"
static int mdsmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_client *client = s->private;
struct ceph_fs_client *fsc = s->private;
if (client->mdsc.mdsmap == NULL)
if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
return 0;
seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch);
seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root);
seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
seq_printf(s, "session_timeout %d\n",
client->mdsc.mdsmap->m_session_timeout);
fsc->mdsc->mdsmap->m_session_timeout);
seq_printf(s, "session_autoclose %d\n",
client->mdsc.mdsmap->m_session_autoclose);
for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) {
fsc->mdsc->mdsmap->m_session_autoclose);
for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
struct ceph_entity_addr *addr =
&client->mdsc.mdsmap->m_info[i].addr;
int state = client->mdsc.mdsmap->m_info[i].state;
&fsc->mdsc->mdsmap->m_info[i].addr;
int state = fsc->mdsc->mdsmap->m_info[i].state;
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr),
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
ceph_pr_addr(&addr->in_addr),
ceph_mds_state_name(state));
}
return 0;
}
static int osdmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_client *client = s->private;
struct rb_node *n;
if (client->osdc.osdmap == NULL)
return 0;
seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
seq_printf(s, "flags%s%s\n",
(client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
" NEARFULL" : "",
(client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
" FULL" : "");
for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
struct ceph_pg_pool_info *pool =
rb_entry(n, struct ceph_pg_pool_info, node);
seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
pool->id, pool->v.pg_num, pool->pg_num_mask,
pool->v.lpg_num, pool->lpg_num_mask);
}
for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
struct ceph_entity_addr *addr =
&client->osdc.osdmap->osd_addr[i];
int state = client->osdc.osdmap->osd_state[i];
char sb[64];
seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
i, pr_addr(&addr->in_addr),
((client->osdc.osdmap->osd_weight[i]*100) >> 16),
ceph_osdmap_state_str(sb, sizeof(sb), state));
}
return 0;
}
static int monc_show(struct seq_file *s, void *p)
{
struct ceph_client *client = s->private;
struct ceph_mon_generic_request *req;
struct ceph_mon_client *monc = &client->monc;
struct rb_node *rp;
mutex_lock(&monc->mutex);
if (monc->have_mdsmap)
seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
if (monc->have_osdmap)
seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
if (monc->want_next_osdmap)
seq_printf(s, "want next osdmap\n");
for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
__u16 op;
req = rb_entry(rp, struct ceph_mon_generic_request, node);
op = le16_to_cpu(req->request->hdr.type);
if (op == CEPH_MSG_STATFS)
seq_printf(s, "%lld statfs\n", req->tid);
else
seq_printf(s, "%lld unknown\n", req->tid);
}
mutex_unlock(&monc->mutex);
return 0;
}
/*
* mdsc debugfs
*/
static int mdsc_show(struct seq_file *s, void *p)
{
struct ceph_client *client = s->private;
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct rb_node *rp;
int pathlen;
......@@ -214,61 +119,12 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
static int osdc_show(struct seq_file *s, void *pp)
{
struct ceph_client *client = s->private;
struct ceph_osd_client *osdc = &client->osdc;
struct rb_node *p;
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
struct ceph_osd_request *req;
struct ceph_osd_request_head *head;
struct ceph_osd_op *op;
int num_ops;
int opcode, olen;
int i;
req = rb_entry(p, struct ceph_osd_request, r_node);
seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1,
le32_to_cpu(req->r_pgid.pool),
le16_to_cpu(req->r_pgid.ps));
head = req->r_request->front.iov_base;
op = (void *)(head + 1);
num_ops = le16_to_cpu(head->num_ops);
olen = le32_to_cpu(head->object_len);
seq_printf(s, "%.*s", olen,
(const char *)(head->ops + num_ops));
if (req->r_reassert_version.epoch)
seq_printf(s, "\t%u'%llu",
(unsigned)le32_to_cpu(req->r_reassert_version.epoch),
le64_to_cpu(req->r_reassert_version.version));
else
seq_printf(s, "\t");
for (i = 0; i < num_ops; i++) {
opcode = le16_to_cpu(op->op);
seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
op++;
}
seq_printf(s, "\n");
}
mutex_unlock(&osdc->request_mutex);
return 0;
}
static int caps_show(struct seq_file *s, void *p)
{
struct ceph_client *client = s->private;
struct ceph_fs_client *fsc = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n"
"avail\t\t%d\n"
"used\t\t%d\n"
......@@ -280,8 +136,8 @@ static int caps_show(struct seq_file *s, void *p)
static int dentry_lru_show(struct seq_file *s, void *ptr)
{
struct ceph_client *client = s->private;
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_dentry_info *di;
spin_lock(&mdsc->dentry_lru_lock);
......@@ -295,199 +151,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
return 0;
}
#define DEFINE_SHOW_FUNC(name) \
static int name##_open(struct inode *inode, struct file *file) \
{ \
struct seq_file *sf; \
int ret; \
\
ret = single_open(file, name, NULL); \
sf = file->private_data; \
sf->private = inode->i_private; \
return ret; \
} \
\
static const struct file_operations name##_fops = { \
.open = name##_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release, \
};
DEFINE_SHOW_FUNC(monmap_show)
DEFINE_SHOW_FUNC(mdsmap_show)
DEFINE_SHOW_FUNC(osdmap_show)
DEFINE_SHOW_FUNC(monc_show)
DEFINE_SHOW_FUNC(mdsc_show)
DEFINE_SHOW_FUNC(osdc_show)
DEFINE_SHOW_FUNC(dentry_lru_show)
DEFINE_SHOW_FUNC(caps_show)
CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
CEPH_DEFINE_SHOW_FUNC(mdsc_show)
CEPH_DEFINE_SHOW_FUNC(caps_show)
CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
/*
* debugfs
*/
static int congestion_kb_set(void *data, u64 val)
{
struct ceph_client *client = (struct ceph_client *)data;
if (client)
client->mount_args->congestion_kb = (int)val;
struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
fsc->mount_options->congestion_kb = (int)val;
return 0;
}
static int congestion_kb_get(void *data, u64 *val)
{
struct ceph_client *client = (struct ceph_client *)data;
if (client)
*val = (u64)client->mount_args->congestion_kb;
struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
*val = (u64)fsc->mount_options->congestion_kb;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
congestion_kb_set, "%llu\n");
int __init ceph_debugfs_init(void)
{
ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
if (!ceph_debugfs_dir)
return -ENOMEM;
return 0;
}
void ceph_debugfs_cleanup(void)
void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
{
debugfs_remove(ceph_debugfs_dir);
dout("ceph_fs_debugfs_cleanup\n");
debugfs_remove(fsc->debugfs_bdi);
debugfs_remove(fsc->debugfs_congestion_kb);
debugfs_remove(fsc->debugfs_mdsmap);
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_mdsc);
debugfs_remove(fsc->debugfs_dentry_lru);
}
int ceph_debugfs_client_init(struct ceph_client *client)
int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
{
int ret = 0;
char name[80];
snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
client->monc.auth->global_id);
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
if (!client->debugfs_dir)
goto out;
char name[100];
int err = -ENOMEM;
client->monc.debugfs_file = debugfs_create_file("monc",
0600,
client->debugfs_dir,
client,
&monc_show_fops);
if (!client->monc.debugfs_file)
goto out;
client->mdsc.debugfs_file = debugfs_create_file("mdsc",
0600,
client->debugfs_dir,
client,
&mdsc_show_fops);
if (!client->mdsc.debugfs_file)
dout("ceph_fs_debugfs_init\n");
fsc->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
fsc->client->debugfs_dir,
fsc,
&congestion_kb_fops);
if (!fsc->debugfs_congestion_kb)
goto out;
client->osdc.debugfs_file = debugfs_create_file("osdc",
0600,
client->debugfs_dir,
client,
&osdc_show_fops);
if (!client->osdc.debugfs_file)
goto out;
dout("a\n");
client->debugfs_monmap = debugfs_create_file("monmap",
0600,
client->debugfs_dir,
client,
&monmap_show_fops);
if (!client->debugfs_monmap)
snprintf(name, sizeof(name), "../../bdi/%s",
dev_name(fsc->backing_dev_info.dev));
fsc->debugfs_bdi =
debugfs_create_symlink("bdi",
fsc->client->debugfs_dir,
name);
if (!fsc->debugfs_bdi)
goto out;
client->debugfs_mdsmap = debugfs_create_file("mdsmap",
dout("b\n");
fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
0600,
client->debugfs_dir,
client,
fsc->client->debugfs_dir,
fsc,
&mdsmap_show_fops);
if (!client->debugfs_mdsmap)
if (!fsc->debugfs_mdsmap)
goto out;
client->debugfs_osdmap = debugfs_create_file("osdmap",
0600,
client->debugfs_dir,
client,
&osdmap_show_fops);
if (!client->debugfs_osdmap)
dout("ca\n");
fsc->debugfs_mdsc = debugfs_create_file("mdsc",
0600,
fsc->client->debugfs_dir,
fsc,
&mdsc_show_fops);
if (!fsc->debugfs_mdsc)
goto out;
client->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
0600,
client->debugfs_dir,
client,
&dentry_lru_show_fops);
if (!client->debugfs_dentry_lru)
goto out;
client->debugfs_caps = debugfs_create_file("caps",
dout("da\n");
fsc->debugfs_caps = debugfs_create_file("caps",
0400,
client->debugfs_dir,
client,
fsc->client->debugfs_dir,
fsc,
&caps_show_fops);
if (!client->debugfs_caps)
if (!fsc->debugfs_caps)
goto out;
client->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
client->debugfs_dir,
client,
&congestion_kb_fops);
if (!client->debugfs_congestion_kb)
dout("ea\n");
fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
0600,
fsc->client->debugfs_dir,
fsc,
&dentry_lru_show_fops);
if (!fsc->debugfs_dentry_lru)
goto out;
sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev));
client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir,
name);
return 0;
out:
ceph_debugfs_client_cleanup(client);
return ret;
ceph_fs_debugfs_cleanup(fsc);
return err;
}
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
debugfs_remove(client->debugfs_bdi);
debugfs_remove(client->debugfs_caps);
debugfs_remove(client->debugfs_dentry_lru);
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_mdsmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
debugfs_remove(client->mdsc.debugfs_file);
debugfs_remove(client->monc.debugfs_file);
debugfs_remove(client->debugfs_congestion_kb);
debugfs_remove(client->debugfs_dir);
}
#else /* CONFIG_DEBUG_FS */
int __init ceph_debugfs_init(void)
{
return 0;
}
void ceph_debugfs_cleanup(void)
{
}
int ceph_debugfs_client_init(struct ceph_client *client)
int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
{
return 0;
}
void ceph_debugfs_client_cleanup(struct ceph_client *client)
void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
{
}
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/spinlock.h>
#include <linux/fs_struct.h>
......@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include "super.h"
#include "mds_client.h"
/*
* Directory operations: readdir, lookup, create, link, unlink,
......@@ -227,15 +228,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct ceph_file_info *fi = filp->private_data;
struct inode *inode = filp->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
unsigned frag = fpos_frag(filp->f_pos);
int off = fpos_off(filp->f_pos);
int err;
u32 ftype;
struct ceph_mds_reply_info_parsed *rinfo;
const int max_entries = client->mount_args->max_readdir;
const int max_bytes = client->mount_args->max_readdir_bytes;
const int max_entries = fsc->mount_options->max_readdir;
const int max_bytes = fsc->mount_options->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
if (fi->at_end)
......@@ -267,7 +268,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
/* can we use the dcache? */
spin_lock(&inode->i_lock);
if ((filp->f_pos == 2 || fi->dentry) &&
!ceph_test_opt(client, NOASYNCREADDIR) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
......@@ -487,14 +488,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err)
{
struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = dentry->d_parent->d_inode;
/* .snap dir? */
if (err == -ENOENT &&
ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */
strcmp(dentry->d_name.name,
client->mount_args->snapdir_name) == 0) {
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
dentry, dentry->d_name.len, dentry->d_name.name, inode);
......@@ -539,8 +540,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int op;
int err;
......@@ -572,7 +573,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
spin_lock(&dir->i_lock);
dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
if (strncmp(dentry->d_name.name,
client->mount_args->snapdir_name,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
......@@ -629,8 +630,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
static int ceph_mknod(struct inode *dir, struct dentry *dentry,
int mode, dev_t rdev)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int err;
......@@ -685,8 +686,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
static int ceph_symlink(struct inode *dir, struct dentry *dentry,
const char *dest)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int err;
......@@ -716,8 +717,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int err = -EROFS;
int op;
......@@ -758,8 +759,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
static int ceph_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int err;
......@@ -813,8 +814,8 @@ static int drop_caps_for_unlink(struct inode *inode)
*/
static int ceph_unlink(struct inode *dir, struct dentry *dentry)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = dentry->d_inode;
struct ceph_mds_request *req;
int err = -EROFS;
......@@ -854,8 +855,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int err;
......@@ -1076,7 +1077,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
struct ceph_inode_info *ci = ceph_inode(inode);
int left;
if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
return -EISDIR;
if (!cf->dir_info) {
......@@ -1177,7 +1178,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
dn->d_name.len, dn->d_name.name);
if (di) {
mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_add_tail(&di->lru, &mdsc->dentry_lru);
mdsc->num_dentry++;
......@@ -1193,7 +1194,7 @@ void ceph_dentry_lru_touch(struct dentry *dn)
dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
dn->d_name.len, dn->d_name.name, di->offset);
if (di) {
mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_move_tail(&di->lru, &mdsc->dentry_lru);
spin_unlock(&mdsc->dentry_lru_lock);
......@@ -1208,7 +1209,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
dn->d_name.len, dn->d_name.name);
if (di) {
mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
spin_lock(&mdsc->dentry_lru_lock);
list_del_init(&di->lru);
mdsc->num_dentry--;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/exportfs.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
#include "super.h"
#include "mds_client.h"
/*
* NFS export support
......@@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
static struct dentry *__cfh_to_dentry(struct super_block *sb,
struct ceph_nfs_confh *cfh)
{
struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
struct dentry *dentry;
struct ceph_vino vino;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/file.h>
......@@ -38,8 +39,8 @@
static struct ceph_mds_request *
prepare_open_request(struct super_block *sb, int flags, int create_mode)
{
struct ceph_client *client = ceph_sb_to_client(sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int want_auth = USE_ANY_MDS;
int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
......@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
int ceph_open(struct inode *inode, struct file *file)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_file_info *cf = file->private_data;
struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
......@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
struct nameidata *nd, int mode,
int locked_dir)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct file *file = nd->intent.open.file;
struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
struct ceph_mds_request *req;
......@@ -269,163 +270,6 @@ int ceph_release(struct inode *inode, struct file *file)
return 0;
}
/*
* build a vector of user pages
*/
static struct page **get_direct_page_vector(const char __user *data,
int num_pages,
loff_t off, size_t len)
{
struct page **pages;
int rc;
pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
if (!pages)
return ERR_PTR(-ENOMEM);
down_read(&current->mm->mmap_sem);
rc = get_user_pages(current, current->mm, (unsigned long)data,
num_pages, 0, 0, pages, NULL);
up_read(&current->mm->mmap_sem);
if (rc < 0)
goto fail;
return pages;
fail:
kfree(pages);
return ERR_PTR(rc);
}
static void put_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
kfree(pages);
}
void ceph_release_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
__free_pages(pages[i], 0);
kfree(pages);
}
/*
* allocate a vector new pages
*/
static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
{
struct page **pages;
int i;
pages = kmalloc(sizeof(*pages) * num_pages, flags);
if (!pages)
return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++) {
pages[i] = __page_cache_alloc(flags);
if (pages[i] == NULL) {
ceph_release_page_vector(pages, i);
return ERR_PTR(-ENOMEM);
}
}
return pages;
}
/*
* copy user data into a page vector
*/
static int copy_user_to_page_vector(struct page **pages,
const char __user *data,
loff_t off, size_t len)
{
int i = 0;
int po = off & ~PAGE_CACHE_MASK;
int left = len;
int l, bad;
while (left > 0) {
l = min_t(int, PAGE_CACHE_SIZE-po, left);
bad = copy_from_user(page_address(pages[i]) + po, data, l);
if (bad == l)
return -EFAULT;
data += l - bad;
left -= l - bad;
po += l - bad;
if (po == PAGE_CACHE_SIZE) {
po = 0;
i++;
}
}
return len;
}
/*
* copy user data from a page vector into a user pointer
*/
static int copy_page_vector_to_user(struct page **pages, char __user *data,
loff_t off, size_t len)
{
int i = 0;
int po = off & ~PAGE_CACHE_MASK;
int left = len;
int l, bad;
while (left > 0) {
l = min_t(int, left, PAGE_CACHE_SIZE-po);
bad = copy_to_user(data, page_address(pages[i]) + po, l);
if (bad == l)
return -EFAULT;
data += l - bad;
left -= l - bad;
if (po) {
po += l - bad;
if (po == PAGE_CACHE_SIZE)
po = 0;
}
i++;
}
return len;
}
/*
* Zero an extent within a page vector. Offset is relative to the
* start of the first page.
*/
static void zero_page_vector_range(int off, int len, struct page **pages)
{
int i = off >> PAGE_CACHE_SHIFT;
off &= ~PAGE_CACHE_MASK;
dout("zero_page_vector_page %u~%u\n", off, len);
/* leading partial page? */
if (off) {
int end = min((int)PAGE_CACHE_SIZE, off + len);
dout("zeroing %d %p head from %d\n", i, pages[i],
(int)off);
zero_user_segment(pages[i], off, end);
len -= (end - off);
i++;
}
while (len >= PAGE_CACHE_SIZE) {
dout("zeroing %d %p len=%d\n", i, pages[i], len);
zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
len -= PAGE_CACHE_SIZE;
i++;
}
/* trailing partial page? */
if (len) {
dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
zero_user_segment(pages[i], 0, len);
}
}
/*
* Read a range of bytes striped over one or more objects. Iterate over
* objects we stripe over. (That's not atomic, but good enough for now.)
......@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode,
struct page **pages, int num_pages,
int *checkeof)
{
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len;
int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
......@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode,
more:
this_len = left;
ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode),
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
ci->i_truncate_seq,
ci->i_truncate_size,
......@@ -477,8 +321,8 @@ static int striped_read(struct inode *inode,
if (read < pos - off) {
dout(" zero gap %llu to %llu\n", off + read, pos);
zero_page_vector_range(page_off + read,
pos - off - read, pages);
ceph_zero_page_vector_range(page_off + read,
pos - off - read, pages);
}
pos += ret;
read = pos - off;
......@@ -495,8 +339,8 @@ static int striped_read(struct inode *inode,
/* was original extent fully inside i_size? */
if (pos + left <= inode->i_size) {
dout("zero tail\n");
zero_page_vector_range(page_off + read, len - read,
pages);
ceph_zero_page_vector_range(page_off + read, len - read,
pages);
read = len;
goto out;
}
......@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
if (file->f_flags & O_DIRECT) {
pages = get_direct_page_vector(data, num_pages, off, len);
pages = ceph_get_direct_page_vector(data, num_pages, off, len);
/*
* flush any page cache pages in this range. this
......@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
ret = striped_read(inode, off, len, pages, num_pages, checkeof);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
ret = copy_page_vector_to_user(pages, data, off, ret);
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
if (ret >= 0)
*poff = off + ret;
done:
if (file->f_flags & O_DIRECT)
put_page_vector(pages, num_pages);
ceph_put_page_vector(pages, num_pages);
else
ceph_release_page_vector(pages, num_pages);
dout("sync_read result %d\n", ret);
......@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req;
struct page **pages;
int num_pages;
......@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
*/
more:
len = left;
req = ceph_osdc_new_request(&client->osdc, &ci->i_layout,
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), pos, &len,
CEPH_OSD_OP_WRITE, flags,
ci->i_snap_realm->cached_context,
......@@ -655,7 +499,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
num_pages = calc_pages_for(pos, len);
if (file->f_flags & O_DIRECT) {
pages = get_direct_page_vector(data, num_pages, pos, len);
pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out;
......@@ -673,7 +517,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
ret = PTR_ERR(pages);
goto out;
}
ret = copy_user_to_page_vector(pages, data, pos, len);
ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
if (ret < 0) {
ceph_release_page_vector(pages, num_pages);
goto out;
......@@ -689,7 +533,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
req->r_num_pages = num_pages;
req->r_inode = inode;
ret = ceph_osdc_start_request(&client->osdc, req, false);
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!ret) {
if (req->r_safe_callback) {
/*
......@@ -701,11 +545,11 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
spin_unlock(&ci->i_unsafe_lock);
ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
}
ret = ceph_osdc_wait_request(&client->osdc, req);
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
}
if (file->f_flags & O_DIRECT)
put_page_vector(pages, num_pages);
ceph_put_page_vector(pages, num_pages);
else if (file->f_flags & O_SYNC)
ceph_release_page_vector(pages, num_pages);
......@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
loff_t endoff = pos + iov->iov_len;
int want, got = 0;
int ret, err;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/fs.h>
......@@ -13,7 +13,8 @@
#include <linux/pagevec.h>
#include "super.h"
#include "decode.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
/*
* Ceph inode operations
......@@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode)
*/
if (ci->i_snap_realm) {
struct ceph_mds_client *mdsc =
&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct ceph_snap_realm *realm = ci->i_snap_realm;
dout(" dropping residual ref to snap realm %p\n", realm);
......@@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode,
}
/* it may be better to set st_size in getattr instead? */
if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
inode->i_size = ci->i_rbytes;
break;
default:
......@@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct inode *in = NULL;
struct ceph_mds_reply_inode *ininfo;
struct ceph_vino vino;
struct ceph_client *client = ceph_sb_to_client(sb);
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int i = 0;
int err = 0;
......@@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
*/
if (rinfo->head->is_dentry && !req->r_aborted &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
client->mount_args->snapdir_name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
/*
* lookup link rename : null -> possibly existing inode
......@@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *parent_inode = dentry->d_parent->d_inode;
const unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
int issued;
int release = 0, dirtied = 0;
int mask = 0;
......@@ -1728,8 +1729,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
*/
int ceph_do_getattr(struct inode *inode, int mask)
{
struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int err;
......
#include <linux/in.h>
#include "ioctl.h"
#include "super.h"
#include "ceph_debug.h"
#include "mds_client.h"
#include <linux/ceph/ceph_debug.h>
#include "ioctl.h"
/*
......@@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{
struct inode *inode = file->f_dentry->d_inode;
struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
struct ceph_ioctl_layout l;
int err, i;
......@@ -98,7 +100,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
struct ceph_ioctl_dataloc dl;
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
u64 len = 1, olen;
u64 tmp;
struct ceph_object_layout ol;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/file.h>
#include <linux/namei.h>
#include "super.h"
#include "mds_client.h"
#include "pagelist.h"
#include <linux/ceph/pagelist.h>
/**
* Implement fcntl and flock locking functions.
......@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_mds_client *mdsc =
&ceph_sb_to_client(inode->i_sb)->mdsc;
ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
int err;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "mds_client.h"
#include "mon_client.h"
#include "super.h"
#include "messenger.h"
#include "decode.h"
#include "auth.h"
#include "pagelist.h"
#include "mds_client.h"
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/pagelist.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
/*
* A cluster of MDS (metadata server) daemons is responsible for
......@@ -286,8 +289,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
if (atomic_dec_and_test(&s->s_ref)) {
if (s->s_authorizer)
s->s_mdsc->client->monc.auth->ops->destroy_authorizer(
s->s_mdsc->client->monc.auth, s->s_authorizer);
s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
s->s_mdsc->fsc->client->monc.auth,
s->s_authorizer);
kfree(s);
}
}
......@@ -344,7 +348,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_seq = 0;
mutex_init(&s->s_mutex);
ceph_con_init(mdsc->client->msgr, &s->s_con);
ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
s->s_con.private = s;
s->s_con.ops = &mds_con_ops;
s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
......@@ -599,7 +603,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else if (req->r_dentry) {
struct inode *dir = req->r_dentry->d_parent->d_inode;
if (dir->i_sb != mdsc->client->sb) {
if (dir->i_sb != mdsc->fsc->sb) {
/* not this fs! */
inode = req->r_dentry->d_inode;
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
......@@ -884,7 +888,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
__ceph_remove_cap(cap);
if (!__ceph_is_any_real_caps(ci)) {
struct ceph_mds_client *mdsc =
&ceph_sb_to_client(inode->i_sb)->mdsc;
ceph_sb_to_client(inode->i_sb)->mdsc;
spin_lock(&mdsc->cap_dirty_lock);
if (!list_empty(&ci->i_dirty_item)) {
......@@ -1146,7 +1150,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_msg *msg, *partial = NULL;
struct ceph_mds_cap_release *head;
int err = -ENOMEM;
int extra = mdsc->client->mount_args->cap_release_safety;
int extra = mdsc->fsc->mount_options->cap_release_safety;
int num;
dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
......@@ -2085,7 +2089,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* insert trace into our cache */
mutex_lock(&req->r_fill_mutex);
err = ceph_fill_trace(mdsc->client->sb, req, req->r_session);
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
if (result == 0 && rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
......@@ -2613,7 +2617,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg)
{
struct super_block *sb = mdsc->client->sb;
struct super_block *sb = mdsc->fsc->sb;
struct inode *inode;
struct ceph_inode_info *ci;
struct dentry *parent, *dentry;
......@@ -2891,10 +2895,16 @@ static void delayed_work(struct work_struct *work)
schedule_delayed(mdsc);
}
int ceph_mdsc_init(struct ceph_fs_client *fsc)
int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
{
mdsc->client = client;
struct ceph_mds_client *mdsc;
mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
if (!mdsc)
return -ENOMEM;
mdsc->fsc = fsc;
fsc->mdsc = mdsc;
mutex_init(&mdsc->mutex);
mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
if (mdsc->mdsmap == NULL)
......@@ -2927,7 +2937,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
INIT_LIST_HEAD(&mdsc->dentry_lru);
ceph_caps_init(mdsc);
ceph_adjust_min_caps(mdsc, client->min_caps);
ceph_adjust_min_caps(mdsc, fsc->min_caps);
return 0;
}
......@@ -2939,7 +2949,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
static void wait_requests(struct ceph_mds_client *mdsc)
{
struct ceph_mds_request *req;
struct ceph_client *client = mdsc->client;
struct ceph_fs_client *fsc = mdsc->fsc;
mutex_lock(&mdsc->mutex);
if (__get_oldest_req(mdsc)) {
......@@ -2947,7 +2957,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
dout("wait_requests waiting for requests\n");
wait_for_completion_timeout(&mdsc->safe_umount_waiters,
client->mount_args->mount_timeout * HZ);
fsc->client->options->mount_timeout * HZ);
/* tear down remaining requests */
mutex_lock(&mdsc->mutex);
......@@ -3030,7 +3040,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
......@@ -3053,7 +3063,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc)
{
int i, n = 0;
if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
return true;
mutex_lock(&mdsc->mutex);
......@@ -3071,8 +3081,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
struct ceph_mds_session *session;
int i;
struct ceph_client *client = mdsc->client;
unsigned long timeout = client->mount_args->mount_timeout * HZ;
struct ceph_fs_client *fsc = mdsc->fsc;
unsigned long timeout = fsc->client->options->mount_timeout * HZ;
dout("close_sessions\n");
......@@ -3119,7 +3129,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
dout("stopped\n");
}
void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
{
dout("stop\n");
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
......@@ -3129,6 +3139,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
ceph_caps_finalize(mdsc);
}
void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
{
struct ceph_mds_client *mdsc = fsc->mdsc;
ceph_mdsc_stop(mdsc);
fsc->mdsc = NULL;
kfree(mdsc);
}
/*
* handle mds map update.
......@@ -3145,14 +3164,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
ceph_decode_copy(&p, &fsid, sizeof(fsid));
if (ceph_check_fsid(mdsc->client, &fsid) < 0)
if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
return;
epoch = ceph_decode_32(&p);
maplen = ceph_decode_32(&p);
dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
/* do we need it? */
ceph_monc_got_mdsmap(&mdsc->client->monc, epoch);
ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
mutex_lock(&mdsc->mutex);
if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
dout("handle_map epoch %u <= our %u\n",
......@@ -3176,7 +3195,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
} else {
mdsc->mdsmap = newmap; /* first mds map */
}
mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
__wake_requests(mdsc, &mdsc->waiting_for_map);
......@@ -3277,7 +3296,7 @@ static int get_authorizer(struct ceph_connection *con,
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->client->monc.auth;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
int ret = 0;
if (force_new && s->s_authorizer) {
......@@ -3311,7 +3330,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->client->monc.auth;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
}
......@@ -3320,12 +3339,12 @@ static int invalidate_authorizer(struct ceph_connection *con)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->client->monc.auth;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
if (ac->ops->invalidate_authorizer)
ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
return ceph_monc_validate_auth(&mdsc->client->monc);
return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
}
static const struct ceph_connection_operations mds_con_ops = {
......@@ -3338,7 +3357,4 @@ static const struct ceph_connection_operations mds_con_ops = {
.peer_reset = peer_reset,
};
/* eof */
......@@ -8,9 +8,9 @@
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include "types.h"
#include "messenger.h"
#include "mdsmap.h"
#include <linux/ceph/types.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/mdsmap.h>
/*
* Some lock dependencies:
......@@ -26,7 +26,7 @@
*
*/
struct ceph_client;
struct ceph_fs_client;
struct ceph_cap;
/*
......@@ -230,7 +230,7 @@ struct ceph_mds_request {
* mds client state
*/
struct ceph_mds_client {
struct ceph_client *client;
struct ceph_fs_client *fsc;
struct mutex mutex; /* all nested structures */
struct ceph_mdsmap *mdsmap;
......@@ -289,11 +289,6 @@ struct ceph_mds_client {
int caps_avail_count; /* unused, unreserved */
int caps_min_count; /* keep at least this many
(unreserved) */
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_file;
#endif
spinlock_t dentry_lru_lock;
struct list_head dentry_lru;
int num_dentry;
......@@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s);
extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
struct ceph_msg *msg, int mds);
extern int ceph_mdsc_init(struct ceph_mds_client *mdsc,
struct ceph_client *client);
extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/bug.h>
#include <linux/err.h>
......@@ -6,9 +6,9 @@
#include <linux/slab.h>
#include <linux/types.h>
#include "mdsmap.h"
#include "messenger.h"
#include "decode.h"
#include <linux/ceph/mdsmap.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
#include "super.h"
......@@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
}
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr),
i+1, n, global_id, mds, inc,
ceph_pr_addr(&addr.in_addr),
ceph_mds_state_name(state));
if (mds >= 0 && mds < m->m_max_mds && state > 0) {
m->m_info[mds].global_id = global_id;
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include "super.h"
#include "decode.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
/*
* Snapshots in ceph are driven in large part by cooperation from the
......@@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
BUG_ON(capsnap->writing);
capsnap->size = inode->i_size;
......@@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg)
{
struct super_block *sb = mdsc->client->sb;
struct super_block *sb = mdsc->fsc->sb;
int mds = session->s_mds;
u64 split;
int op;
......
/*
* Ceph string constants
* Ceph fs string constants
*/
#include "types.h"
#include <linux/module.h>
#include <linux/ceph/types.h>
const char *ceph_entity_type_name(int type)
{
switch (type) {
case CEPH_ENTITY_TYPE_MDS: return "mds";
case CEPH_ENTITY_TYPE_OSD: return "osd";
case CEPH_ENTITY_TYPE_MON: return "mon";
case CEPH_ENTITY_TYPE_CLIENT: return "client";
case CEPH_ENTITY_TYPE_AUTH: return "auth";
default: return "unknown";
}
}
const char *ceph_osd_op_name(int op)
{
switch (op) {
case CEPH_OSD_OP_READ: return "read";
case CEPH_OSD_OP_STAT: return "stat";
case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
case CEPH_OSD_OP_WRITE: return "write";
case CEPH_OSD_OP_DELETE: return "delete";
case CEPH_OSD_OP_TRUNCATE: return "truncate";
case CEPH_OSD_OP_ZERO: return "zero";
case CEPH_OSD_OP_WRITEFULL: return "writefull";
case CEPH_OSD_OP_ROLLBACK: return "rollback";
case CEPH_OSD_OP_APPEND: return "append";
case CEPH_OSD_OP_STARTSYNC: return "startsync";
case CEPH_OSD_OP_SETTRUNC: return "settrunc";
case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
case CEPH_OSD_OP_TMAPUP: return "tmapup";
case CEPH_OSD_OP_TMAPGET: return "tmapget";
case CEPH_OSD_OP_TMAPPUT: return "tmapput";
case CEPH_OSD_OP_GETXATTR: return "getxattr";
case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
case CEPH_OSD_OP_SETXATTR: return "setxattr";
case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
case CEPH_OSD_OP_RMXATTR: return "rmxattr";
case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
case CEPH_OSD_OP_PULL: return "pull";
case CEPH_OSD_OP_PUSH: return "push";
case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
case CEPH_OSD_OP_SCRUB: return "scrub";
case CEPH_OSD_OP_WRLOCK: return "wrlock";
case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
case CEPH_OSD_OP_RDLOCK: return "rdlock";
case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
case CEPH_OSD_OP_UPLOCK: return "uplock";
case CEPH_OSD_OP_DNLOCK: return "dnlock";
case CEPH_OSD_OP_CALL: return "call";
case CEPH_OSD_OP_PGLS: return "pgls";
}
return "???";
}
const char *ceph_mds_state_name(int s)
{
......@@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o)
}
return "???";
}
const char *ceph_pool_op_name(int op)
{
switch (op) {
case POOL_OP_CREATE: return "create";
case POOL_OP_DELETE: return "delete";
case POOL_OP_AUID_CHANGE: return "auid change";
case POOL_OP_CREATE_SNAP: return "create snap";
case POOL_OP_DELETE_SNAP: return "delete snap";
case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
}
return "???";
}
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/backing-dev.h>
#include <linux/ctype.h>
......@@ -15,10 +15,13 @@
#include <linux/statfs.h>
#include <linux/string.h>
#include "decode.h"
#include "super.h"
#include "mon_client.h"
#include "auth.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
/*
* Ceph superblock operations
......@@ -26,36 +29,22 @@
* Handle the basics of mounting, unmounting.
*/
/*
* find filename portion of a path (/foo/bar/baz -> baz)
*/
const char *ceph_file_part(const char *s, int len)
{
const char *e = s + len;
while (e != s && *(e-1) != '/')
e--;
return e;
}
/*
* super ops
*/
static void ceph_put_super(struct super_block *s)
{
struct ceph_client *client = ceph_sb_to_client(s);
struct ceph_fs_client *fsc = ceph_sb_to_client(s);
dout("put_super\n");
ceph_mdsc_close_sessions(&client->mdsc);
ceph_mdsc_close_sessions(fsc->mdsc);
/*
* ensure we release the bdi before put_anon_super releases
* the device name.
*/
if (s->s_bdi == &client->backing_dev_info) {
bdi_unregister(&client->backing_dev_info);
if (s->s_bdi == &fsc->backing_dev_info) {
bdi_unregister(&fsc->backing_dev_info);
s->s_bdi = NULL;
}
......@@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s)
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct ceph_client *client = ceph_inode_to_client(dentry->d_inode);
struct ceph_monmap *monmap = client->monc.monmap;
struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
struct ceph_monmap *monmap = fsc->client->monc.monmap;
struct ceph_statfs st;
u64 fsid;
int err;
dout("statfs\n");
err = ceph_monc_do_statfs(&client->monc, &st);
err = ceph_monc_do_statfs(&fsc->client->monc, &st);
if (err < 0)
return err;
......@@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
static int ceph_sync_fs(struct super_block *sb, int wait)
{
struct ceph_client *client = ceph_sb_to_client(sb);
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
if (!wait) {
dout("sync_fs (non-blocking)\n");
ceph_flush_dirty_caps(&client->mdsc);
ceph_flush_dirty_caps(fsc->mdsc);
dout("sync_fs (non-blocking) done\n");
return 0;
}
dout("sync_fs (blocking)\n");
ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
ceph_osdc_sync(&fsc->client->osdc);
ceph_mdsc_sync(fsc->mdsc);
dout("sync_fs (blocking) done\n");
return 0;
}
static int default_congestion_kb(void)
{
int congestion_kb;
/*
* Copied from NFS
*
* congestion size, scale with available memory.
*
* 64MB: 8192k
* 128MB: 11585k
* 256MB: 16384k
* 512MB: 23170k
* 1GB: 32768k
* 2GB: 46340k
* 4GB: 65536k
* 8GB: 92681k
* 16GB: 131072k
*
* This allows larger machines to have larger/more transfers.
* Limit the default to 256M
*/
congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
if (congestion_kb > 256*1024)
congestion_kb = 256*1024;
return congestion_kb;
}
/**
* ceph_show_options - Show mount options in /proc/mounts
* @m: seq_file to write to
* @mnt: mount descriptor
*/
static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
{
struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
struct ceph_mount_args *args = client->mount_args;
if (args->flags & CEPH_OPT_FSID)
seq_printf(m, ",fsid=%pU", &args->fsid);
if (args->flags & CEPH_OPT_NOSHARE)
seq_puts(m, ",noshare");
if (args->flags & CEPH_OPT_DIRSTAT)
seq_puts(m, ",dirstat");
if ((args->flags & CEPH_OPT_RBYTES) == 0)
seq_puts(m, ",norbytes");
if (args->flags & CEPH_OPT_NOCRC)
seq_puts(m, ",nocrc");
if (args->flags & CEPH_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, ",osdkeepalivetimeout=%d",
args->osd_keepalive_timeout);
if (args->wsize)
seq_printf(m, ",wsize=%d", args->wsize);
if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
seq_printf(m, ",rsize=%d", args->rsize);
if (args->congestion_kb != default_congestion_kb())
seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
seq_printf(m, ",caps_wanted_delay_min=%d",
args->caps_wanted_delay_min);
if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
seq_printf(m, ",caps_wanted_delay_max=%d",
args->caps_wanted_delay_max);
if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
seq_printf(m, ",cap_release_safety=%d",
args->cap_release_safety);
if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_printf(m, ",snapdirname=%s", args->snapdir_name);
if (args->name)
seq_printf(m, ",name=%s", args->name);
if (args->secret)
seq_puts(m, ",secret=<hidden>");
return 0;
}
/*
* caches
*/
struct kmem_cache *ceph_inode_cachep;
struct kmem_cache *ceph_cap_cachep;
struct kmem_cache *ceph_dentry_cachep;
struct kmem_cache *ceph_file_cachep;
static void ceph_inode_init_once(void *foo)
{
struct ceph_inode_info *ci = foo;
inode_init_once(&ci->vfs_inode);
}
static int __init init_caches(void)
{
ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
sizeof(struct ceph_inode_info),
__alignof__(struct ceph_inode_info),
(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
ceph_inode_init_once);
if (ceph_inode_cachep == NULL)
return -ENOMEM;
ceph_cap_cachep = KMEM_CACHE(ceph_cap,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_cap_cachep == NULL)
goto bad_cap;
ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_dentry_cachep == NULL)
goto bad_dentry;
ceph_file_cachep = KMEM_CACHE(ceph_file_info,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_file_cachep == NULL)
goto bad_file;
return 0;
bad_file:
kmem_cache_destroy(ceph_dentry_cachep);
bad_dentry:
kmem_cache_destroy(ceph_cap_cachep);
bad_cap:
kmem_cache_destroy(ceph_inode_cachep);
return -ENOMEM;
}
static void destroy_caches(void)
{
kmem_cache_destroy(ceph_inode_cachep);
kmem_cache_destroy(ceph_cap_cachep);
kmem_cache_destroy(ceph_dentry_cachep);
kmem_cache_destroy(ceph_file_cachep);
}
/*
* ceph_umount_begin - initiate forced umount. Tear down down the
* mount, skipping steps that may hang while waiting for server(s).
*/
static void ceph_umount_begin(struct super_block *sb)
{
struct ceph_client *client = ceph_sb_to_client(sb);
dout("ceph_umount_begin - starting forced umount\n");
if (!client)
return;
client->mount_state = CEPH_MOUNT_SHUTDOWN;
return;
}
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
.umount_begin = ceph_umount_begin,
};
const char *ceph_msg_type_name(int type)
{
switch (type) {
case CEPH_MSG_SHUTDOWN: return "shutdown";
case CEPH_MSG_PING: return "ping";
case CEPH_MSG_AUTH: return "auth";
case CEPH_MSG_AUTH_REPLY: return "auth_reply";
case CEPH_MSG_MON_MAP: return "mon_map";
case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
case CEPH_MSG_STATFS: return "statfs";
case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
case CEPH_MSG_CLIENT_REQUEST: return "client_request";
case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
case CEPH_MSG_CLIENT_REPLY: return "client_reply";
case CEPH_MSG_CLIENT_CAPS: return "client_caps";
case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
case CEPH_MSG_CLIENT_SNAP: return "client_snap";
case CEPH_MSG_CLIENT_LEASE: return "client_lease";
case CEPH_MSG_OSD_MAP: return "osd_map";
case CEPH_MSG_OSD_OP: return "osd_op";
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
default: return "unknown";
}
}
/*
* mount options
*/
enum {
Opt_wsize,
Opt_rsize,
Opt_osdtimeout,
Opt_osdkeepalivetimeout,
Opt_mount_timeout,
Opt_osd_idle_ttl,
Opt_caps_wanted_delay_min,
Opt_caps_wanted_delay_max,
Opt_cap_release_safety,
......@@ -344,29 +123,19 @@ enum {
Opt_congestion_kb,
Opt_last_int,
/* int args above */
Opt_fsid,
Opt_snapdirname,
Opt_name,
Opt_secret,
Opt_last_string,
/* string args above */
Opt_ip,
Opt_noshare,
Opt_dirstat,
Opt_nodirstat,
Opt_rbytes,
Opt_norbytes,
Opt_nocrc,
Opt_noasyncreaddir,
};
static match_table_t arg_tokens = {
static match_table_t fsopt_tokens = {
{Opt_wsize, "wsize=%d"},
{Opt_rsize, "rsize=%d"},
{Opt_osdtimeout, "osdtimeout=%d"},
{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
{Opt_mount_timeout, "mount_timeout=%d"},
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
{Opt_cap_release_safety, "cap_release_safety=%d"},
......@@ -374,403 +143,459 @@ static match_table_t arg_tokens = {
{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
{Opt_congestion_kb, "write_congestion_kb=%d"},
/* int args above */
{Opt_fsid, "fsid=%s"},
{Opt_snapdirname, "snapdirname=%s"},
{Opt_name, "name=%s"},
{Opt_secret, "secret=%s"},
/* string args above */
{Opt_ip, "ip=%s"},
{Opt_noshare, "noshare"},
{Opt_dirstat, "dirstat"},
{Opt_nodirstat, "nodirstat"},
{Opt_rbytes, "rbytes"},
{Opt_norbytes, "norbytes"},
{Opt_nocrc, "nocrc"},
{Opt_noasyncreaddir, "noasyncreaddir"},
{-1, NULL}
};
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
static int parse_fsopt_token(char *c, void *private)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
dout("parse_fsid '%s'\n", str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
str++;
continue;
struct ceph_mount_options *fsopt = private;
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
token = match_token((char *)c, fsopt_tokens, argstr);
if (token < 0)
return -EINVAL;
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
pr_err("bad mount option arg (not int) "
"at '%s'\n", c);
return ret;
}
if (!isxdigit(str[0]) || !isxdigit(str[1]))
break;
tmp[0] = str[0];
tmp[1] = str[1];
if (sscanf(tmp, "%x", &d) < 1)
break;
fsid->fsid[i] = d & 0xff;
i++;
str += 2;
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
dout("got string token %d val %s\n", token,
argstr[0].from);
} else {
dout("got token %d\n", token);
}
if (i == 16)
err = 0;
dout("parse_fsid ret %d got fsid %pU", err, fsid);
return err;
switch (token) {
case Opt_snapdirname:
kfree(fsopt->snapdir_name);
fsopt->snapdir_name = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
if (!fsopt->snapdir_name)
return -ENOMEM;
break;
/* misc */
case Opt_wsize:
fsopt->wsize = intval;
break;
case Opt_rsize:
fsopt->rsize = intval;
break;
case Opt_caps_wanted_delay_min:
fsopt->caps_wanted_delay_min = intval;
break;
case Opt_caps_wanted_delay_max:
fsopt->caps_wanted_delay_max = intval;
break;
case Opt_readdir_max_entries:
fsopt->max_readdir = intval;
break;
case Opt_readdir_max_bytes:
fsopt->max_readdir_bytes = intval;
break;
case Opt_congestion_kb:
fsopt->congestion_kb = intval;
break;
case Opt_dirstat:
fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
break;
case Opt_nodirstat:
fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
break;
case Opt_rbytes:
fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
break;
case Opt_norbytes:
fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
break;
case Opt_noasyncreaddir:
fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
break;
default:
BUG_ON(token);
}
return 0;
}
static struct ceph_mount_args *parse_mount_args(int flags, char *options,
const char *dev_name,
const char **path)
static void destroy_mount_options(struct ceph_mount_options *args)
{
struct ceph_mount_args *args;
const char *c;
int err = -ENOMEM;
substring_t argstr[MAX_OPT_ARGS];
dout("destroy_mount_options %p\n", args);
kfree(args->snapdir_name);
kfree(args);
}
args = kzalloc(sizeof(*args), GFP_KERNEL);
if (!args)
return ERR_PTR(-ENOMEM);
args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr),
GFP_KERNEL);
if (!args->mon_addr)
goto out;
static int strcmp_null(const char *s1, const char *s2)
{
if (!s1 && !s2)
return 0;
if (s1 && !s2)
return -1;
if (!s1 && s2)
return 1;
return strcmp(s1, s2);
}
dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name);
/* start with defaults */
args->sb_flags = flags;
args->flags = CEPH_OPT_DEFAULT;
args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
args->congestion_kb = default_congestion_kb();
/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
err = -EINVAL;
if (!dev_name)
goto out;
*path = strstr(dev_name, ":/");
if (*path == NULL) {
pr_err("device name is missing path (no :/ in %s)\n",
dev_name);
goto out;
}
static int compare_mount_options(struct ceph_mount_options *new_fsopt,
struct ceph_options *new_opt,
struct ceph_fs_client *fsc)
{
struct ceph_mount_options *fsopt1 = new_fsopt;
struct ceph_mount_options *fsopt2 = fsc->mount_options;
int ofs = offsetof(struct ceph_mount_options, snapdir_name);
int ret;
/* get mon ip(s) */
err = ceph_parse_ips(dev_name, *path, args->mon_addr,
CEPH_MAX_MON, &args->num_mon);
if (err < 0)
goto out;
ret = memcmp(fsopt1, fsopt2, ofs);
if (ret)
return ret;
ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
if (ret)
return ret;
return ceph_compare_options(new_opt, fsc->client);
}
static int parse_mount_options(struct ceph_mount_options **pfsopt,
struct ceph_options **popt,
int flags, char *options,
const char *dev_name,
const char **path)
{
struct ceph_mount_options *fsopt;
const char *dev_name_end;
int err = -ENOMEM;
fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
if (!fsopt)
return -ENOMEM;
dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
fsopt->sb_flags = flags;
fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
fsopt->congestion_kb = default_congestion_kb();
/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
err = -EINVAL;
if (!dev_name)
goto out;
*path = strstr(dev_name, ":/");
if (*path == NULL) {
pr_err("device name is missing path (no :/ in %s)\n",
dev_name);
goto out;
}
dev_name_end = *path;
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
/* path on server */
*path += 2;
dout("server path '%s'\n", *path);
/* parse mount options */
while ((c = strsep(&options, ",")) != NULL) {
int token, intval, ret;
if (!*c)
continue;
err = -EINVAL;
token = match_token((char *)c, arg_tokens, argstr);
if (token < 0) {
pr_err("bad mount option at '%s'\n", c);
goto out;
}
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
pr_err("bad mount option arg (not int) "
"at '%s'\n", c);
continue;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
dout("got string token %d val %s\n", token,
argstr[0].from);
} else {
dout("got token %d\n", token);
}
switch (token) {
case Opt_ip:
err = ceph_parse_ips(argstr[0].from,
argstr[0].to,
&args->my_addr,
1, NULL);
if (err < 0)
goto out;
args->flags |= CEPH_OPT_MYIP;
break;
case Opt_fsid:
err = parse_fsid(argstr[0].from, &args->fsid);
if (err == 0)
args->flags |= CEPH_OPT_FSID;
break;
case Opt_snapdirname:
kfree(args->snapdir_name);
args->snapdir_name = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
break;
case Opt_name:
args->name = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
break;
case Opt_secret:
args->secret = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
break;
/* misc */
case Opt_wsize:
args->wsize = intval;
break;
case Opt_rsize:
args->rsize = intval;
break;
case Opt_osdtimeout:
args->osd_timeout = intval;
break;
case Opt_osdkeepalivetimeout:
args->osd_keepalive_timeout = intval;
break;
case Opt_osd_idle_ttl:
args->osd_idle_ttl = intval;
break;
case Opt_mount_timeout:
args->mount_timeout = intval;
break;
case Opt_caps_wanted_delay_min:
args->caps_wanted_delay_min = intval;
break;
case Opt_caps_wanted_delay_max:
args->caps_wanted_delay_max = intval;
break;
case Opt_readdir_max_entries:
args->max_readdir = intval;
break;
case Opt_readdir_max_bytes:
args->max_readdir_bytes = intval;
break;
case Opt_congestion_kb:
args->congestion_kb = intval;
break;
case Opt_noshare:
args->flags |= CEPH_OPT_NOSHARE;
break;
case Opt_dirstat:
args->flags |= CEPH_OPT_DIRSTAT;
break;
case Opt_nodirstat:
args->flags &= ~CEPH_OPT_DIRSTAT;
break;
case Opt_rbytes:
args->flags |= CEPH_OPT_RBYTES;
break;
case Opt_norbytes:
args->flags &= ~CEPH_OPT_RBYTES;
break;
case Opt_nocrc:
args->flags |= CEPH_OPT_NOCRC;
break;
case Opt_noasyncreaddir:
args->flags |= CEPH_OPT_NOASYNCREADDIR;
break;
default:
BUG_ON(token);
}
}
return args;
err = ceph_parse_options(popt, options, dev_name, dev_name_end,
parse_fsopt_token, (void *)fsopt);
if (err)
goto out;
/* success */
*pfsopt = fsopt;
return 0;
out:
kfree(args->mon_addr);
kfree(args);
return ERR_PTR(err);
destroy_mount_options(fsopt);
return err;
}
static void destroy_mount_args(struct ceph_mount_args *args)
/**
* ceph_show_options - Show mount options in /proc/mounts
* @m: seq_file to write to
* @mnt: mount descriptor
*/
static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
{
dout("destroy_mount_args %p\n", args);
kfree(args->snapdir_name);
args->snapdir_name = NULL;
kfree(args->name);
args->name = NULL;
kfree(args->secret);
args->secret = NULL;
kfree(args);
struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
struct ceph_mount_options *fsopt = fsc->mount_options;
struct ceph_options *opt = fsc->client->options;
if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, ",fsid=%pU", &opt->fsid);
if (opt->flags & CEPH_OPT_NOSHARE)
seq_puts(m, ",noshare");
if (opt->flags & CEPH_OPT_NOCRC)
seq_puts(m, ",nocrc");
if (opt->name)
seq_printf(m, ",name=%s", opt->name);
if (opt->secret)
seq_puts(m, ",secret=<hidden>");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, ",osdkeepalivetimeout=%d",
opt->osd_keepalive_timeout);
if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
seq_puts(m, ",dirstat");
if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
seq_puts(m, ",norbytes");
if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
if (fsopt->wsize)
seq_printf(m, ",wsize=%d", fsopt->wsize);
if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
seq_printf(m, ",rsize=%d", fsopt->rsize);
if (fsopt->congestion_kb != default_congestion_kb())
seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
seq_printf(m, ",caps_wanted_delay_min=%d",
fsopt->caps_wanted_delay_min);
if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
seq_printf(m, ",caps_wanted_delay_max=%d",
fsopt->caps_wanted_delay_max);
if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
seq_printf(m, ",cap_release_safety=%d",
fsopt->cap_release_safety);
if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
return 0;
}
/*
* create a fresh client instance
* handle any mon messages the standard library doesn't understand.
* return error if we don't either.
*/
static struct ceph_client *ceph_create_client(struct ceph_mount_args *args)
static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
{
struct ceph_client *client;
struct ceph_fs_client *fsc = client->private;
int type = le16_to_cpu(msg->hdr.type);
switch (type) {
case CEPH_MSG_MDS_MAP:
ceph_mdsc_handle_map(fsc->mdsc, msg);
return 0;
default:
return -1;
}
}
/*
* create a new fs client
*/
struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
struct ceph_options *opt)
{
struct ceph_fs_client *fsc;
int err = -ENOMEM;
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (client == NULL)
fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
if (!fsc)
return ERR_PTR(-ENOMEM);
mutex_init(&client->mount_mutex);
init_waitqueue_head(&client->auth_wq);
fsc->client = ceph_create_client(opt, fsc);
if (IS_ERR(fsc->client)) {
err = PTR_ERR(fsc->client);
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
fsc->client->supported_features |= CEPH_FEATURE_FLOCK;
fsc->client->monc.want_mdsmap = 1;
client->sb = NULL;
client->mount_state = CEPH_MOUNT_MOUNTING;
client->mount_args = args;
fsc->mount_options = fsopt;
client->msgr = NULL;
fsc->sb = NULL;
fsc->mount_state = CEPH_MOUNT_MOUNTING;
client->auth_err = 0;
atomic_long_set(&client->writeback_count, 0);
atomic_long_set(&fsc->writeback_count, 0);
err = bdi_init(&client->backing_dev_info);
err = bdi_init(&fsc->backing_dev_info);
if (err < 0)
goto fail;
goto fail_client;
err = -ENOMEM;
client->wb_wq = create_workqueue("ceph-writeback");
if (client->wb_wq == NULL)
fsc->wb_wq = create_workqueue("ceph-writeback");
if (fsc->wb_wq == NULL)
goto fail_bdi;
client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
if (client->pg_inv_wq == NULL)
fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
if (fsc->pg_inv_wq == NULL)
goto fail_wb_wq;
client->trunc_wq = create_singlethread_workqueue("ceph-trunc");
if (client->trunc_wq == NULL)
fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc");
if (fsc->trunc_wq == NULL)
goto fail_pg_inv_wq;
/* set up mempools */
err = -ENOMEM;
client->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
client->mount_args->wsize >> PAGE_CACHE_SHIFT);
if (!client->wb_pagevec_pool)
fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
if (!fsc->wb_pagevec_pool)
goto fail_trunc_wq;
/* caps */
client->min_caps = args->max_readdir;
fsc->min_caps = fsopt->max_readdir;
return fsc;
/* subsystems */
err = ceph_monc_init(&client->monc, client);
if (err < 0)
goto fail_mempool;
err = ceph_osdc_init(&client->osdc, client);
if (err < 0)
goto fail_monc;
err = ceph_mdsc_init(&client->mdsc, client);
if (err < 0)
goto fail_osdc;
return client;
fail_osdc:
ceph_osdc_stop(&client->osdc);
fail_monc:
ceph_monc_stop(&client->monc);
fail_mempool:
mempool_destroy(client->wb_pagevec_pool);
fail_trunc_wq:
destroy_workqueue(client->trunc_wq);
destroy_workqueue(fsc->trunc_wq);
fail_pg_inv_wq:
destroy_workqueue(client->pg_inv_wq);
destroy_workqueue(fsc->pg_inv_wq);
fail_wb_wq:
destroy_workqueue(client->wb_wq);
destroy_workqueue(fsc->wb_wq);
fail_bdi:
bdi_destroy(&client->backing_dev_info);
bdi_destroy(&fsc->backing_dev_info);
fail_client:
ceph_destroy_client(fsc->client);
fail:
kfree(client);
kfree(fsc);
return ERR_PTR(err);
}
static void ceph_destroy_client(struct ceph_client *client)
void destroy_fs_client(struct ceph_fs_client *fsc)
{
dout("destroy_client %p\n", client);
dout("destroy_fs_client %p\n", fsc);
/* unmount */
ceph_mdsc_stop(&client->mdsc);
ceph_osdc_stop(&client->osdc);
destroy_workqueue(fsc->wb_wq);
destroy_workqueue(fsc->pg_inv_wq);
destroy_workqueue(fsc->trunc_wq);
/*
* make sure mds and osd connections close out before destroying
* the auth module, which is needed to free those connections'
* ceph_authorizers.
*/
ceph_msgr_flush();
ceph_monc_stop(&client->monc);
bdi_destroy(&fsc->backing_dev_info);
ceph_debugfs_client_cleanup(client);
destroy_workqueue(client->wb_wq);
destroy_workqueue(client->pg_inv_wq);
destroy_workqueue(client->trunc_wq);
mempool_destroy(fsc->wb_pagevec_pool);
bdi_destroy(&client->backing_dev_info);
destroy_mount_options(fsc->mount_options);
if (client->msgr)
ceph_messenger_destroy(client->msgr);
mempool_destroy(client->wb_pagevec_pool);
ceph_fs_debugfs_cleanup(fsc);
destroy_mount_args(client->mount_args);
ceph_destroy_client(fsc->client);
kfree(client);
dout("destroy_client %p done\n", client);
kfree(fsc);
dout("destroy_fs_client %p done\n", fsc);
}
/*
* Initially learn our fsid, or verify an fsid matches.
* caches
*/
int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
struct kmem_cache *ceph_inode_cachep;
struct kmem_cache *ceph_cap_cachep;
struct kmem_cache *ceph_dentry_cachep;
struct kmem_cache *ceph_file_cachep;
static void ceph_inode_init_once(void *foo)
{
if (client->have_fsid) {
if (ceph_fsid_compare(&client->fsid, fsid)) {
pr_err("bad fsid, had %pU got %pU",
&client->fsid, fsid);
return -1;
}
} else {
pr_info("client%lld fsid %pU\n", client->monc.auth->global_id,
fsid);
memcpy(&client->fsid, fsid, sizeof(*fsid));
ceph_debugfs_client_init(client);
client->have_fsid = true;
}
struct ceph_inode_info *ci = foo;
inode_init_once(&ci->vfs_inode);
}
static int __init init_caches(void)
{
ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
sizeof(struct ceph_inode_info),
__alignof__(struct ceph_inode_info),
(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
ceph_inode_init_once);
if (ceph_inode_cachep == NULL)
return -ENOMEM;
ceph_cap_cachep = KMEM_CACHE(ceph_cap,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_cap_cachep == NULL)
goto bad_cap;
ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_dentry_cachep == NULL)
goto bad_dentry;
ceph_file_cachep = KMEM_CACHE(ceph_file_info,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (ceph_file_cachep == NULL)
goto bad_file;
return 0;
bad_file:
kmem_cache_destroy(ceph_dentry_cachep);
bad_dentry:
kmem_cache_destroy(ceph_cap_cachep);
bad_cap:
kmem_cache_destroy(ceph_inode_cachep);
return -ENOMEM;
}
static void destroy_caches(void)
{
kmem_cache_destroy(ceph_inode_cachep);
kmem_cache_destroy(ceph_cap_cachep);
kmem_cache_destroy(ceph_dentry_cachep);
kmem_cache_destroy(ceph_file_cachep);
}
/*
* true if we have the mon map (and have thus joined the cluster)
* ceph_umount_begin - initiate forced umount. Tear down down the
* mount, skipping steps that may hang while waiting for server(s).
*/
static int have_mon_and_osd_map(struct ceph_client *client)
static void ceph_umount_begin(struct super_block *sb)
{
return client->monc.monmap && client->monc.monmap->epoch &&
client->osdc.osdmap && client->osdc.osdmap->epoch;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
dout("ceph_umount_begin - starting forced umount\n");
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
return;
}
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
.umount_begin = ceph_umount_begin,
};
/*
* Bootstrap mount by opening the root directory. Note the mount
* @started time from caller, and time out if this takes too long.
*/
static struct dentry *open_root_dentry(struct ceph_client *client,
static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
const char *path,
unsigned long started)
{
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req = NULL;
int err;
struct dentry *root;
......@@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started;
req->r_timeout = client->mount_args->mount_timeout * HZ;
req->r_timeout = fsc->client->options->mount_timeout * HZ;
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err == 0) {
dout("open_root_inode success\n");
if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
client->sb->s_root == NULL)
fsc->sb->s_root == NULL)
root = d_alloc_root(req->r_target_inode);
else
root = d_obtain_alias(req->r_target_inode);
......@@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
return root;
}
/*
* mount: join the ceph cluster, and open root directory.
*/
static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
const char *path)
{
struct ceph_entity_addr *myaddr = NULL;
int err;
unsigned long timeout = client->mount_args->mount_timeout * HZ;
unsigned long started = jiffies; /* note the start time */
struct dentry *root;
int first = 0; /* first vfsmount for this super_block */
dout("mount start\n");
mutex_lock(&client->mount_mutex);
/* initialize the messenger */
if (client->msgr == NULL) {
if (ceph_test_opt(client, MYIP))
myaddr = &client->mount_args->my_addr;
client->msgr = ceph_messenger_create(myaddr);
if (IS_ERR(client->msgr)) {
err = PTR_ERR(client->msgr);
client->msgr = NULL;
goto out;
}
client->msgr->nocrc = ceph_test_opt(client, NOCRC);
}
mutex_lock(&fsc->client->mount_mutex);
/* open session, and wait for mon, mds, and osd maps */
err = ceph_monc_open_session(&client->monc);
err = __ceph_open_session(fsc->client, started);
if (err < 0)
goto out;
while (!have_mon_and_osd_map(client)) {
err = -EIO;
if (timeout && time_after_eq(jiffies, started + timeout))
goto out;
/* wait */
dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0),
timeout);
if (err == -EINTR || err == -ERESTARTSYS)
goto out;
if (client->auth_err < 0) {
err = client->auth_err;
goto out;
}
}
dout("mount opening root\n");
root = open_root_dentry(client, "", started);
root = open_root_dentry(fsc, "", started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto out;
}
if (client->sb->s_root)
if (fsc->sb->s_root) {
dput(root);
else
client->sb->s_root = root;
} else {
fsc->sb->s_root = root;
first = 1;
err = ceph_fs_debugfs_init(fsc);
if (err < 0)
goto fail;
}
if (path[0] == 0) {
dget(root);
} else {
dout("mount opening base mountpoint\n");
root = open_root_dentry(client, path, started);
root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
dput(client->sb->s_root);
client->sb->s_root = NULL;
goto out;
goto fail;
}
}
mnt->mnt_root = root;
mnt->mnt_sb = client->sb;
mnt->mnt_sb = fsc->sb;
client->mount_state = CEPH_MOUNT_MOUNTED;
fsc->mount_state = CEPH_MOUNT_MOUNTED;
dout("mount success\n");
err = 0;
out:
mutex_unlock(&client->mount_mutex);
mutex_unlock(&fsc->client->mount_mutex);
return err;
fail:
if (first) {
dput(fsc->sb->s_root);
fsc->sb->s_root = NULL;
}
goto out;
}
static int ceph_set_super(struct super_block *s, void *data)
{
struct ceph_client *client = data;
struct ceph_fs_client *fsc = data;
int ret;
dout("set_super %p data %p\n", s, data);
s->s_flags = client->mount_args->sb_flags;
s->s_flags = fsc->mount_options->sb_flags;
s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
s->s_fs_info = client;
client->sb = s;
s->s_fs_info = fsc;
fsc->sb = s;
s->s_op = &ceph_super_ops;
s->s_export_op = &ceph_export_ops;
......@@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data)
fail:
s->s_fs_info = NULL;
client->sb = NULL;
fsc->sb = NULL;
return ret;
}
......@@ -926,30 +732,23 @@ static int ceph_set_super(struct super_block *s, void *data)
*/
static int ceph_compare_super(struct super_block *sb, void *data)
{
struct ceph_client *new = data;
struct ceph_mount_args *args = new->mount_args;
struct ceph_client *other = ceph_sb_to_client(sb);
int i;
struct ceph_fs_client *new = data;
struct ceph_mount_options *fsopt = new->mount_options;
struct ceph_options *opt = new->client->options;
struct ceph_fs_client *other = ceph_sb_to_client(sb);
dout("ceph_compare_super %p\n", sb);
if (args->flags & CEPH_OPT_FSID) {
if (ceph_fsid_compare(&args->fsid, &other->fsid)) {
dout("fsid doesn't match\n");
return 0;
}
} else {
/* do we share (a) monitor? */
for (i = 0; i < new->monc.monmap->num_mon; i++)
if (ceph_monmap_contains(other->monc.monmap,
&new->monc.monmap->mon_inst[i].addr))
break;
if (i == new->monc.monmap->num_mon) {
dout("mon ip not part of monmap\n");
return 0;
}
dout("mon ip matches existing sb %p\n", sb);
if (compare_mount_options(fsopt, opt, other)) {
dout("monitor(s)/mount options don't match\n");
return 0;
}
if (args->sb_flags != other->mount_args->sb_flags) {
if ((opt->flags & CEPH_OPT_FSID) &&
ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
dout("fsid doesn't match\n");
return 0;
}
if (fsopt->sb_flags != other->mount_options->sb_flags) {
dout("flags differ\n");
return 0;
}
......@@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data)
*/
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
static int ceph_register_bdi(struct super_block *sb,
struct ceph_fs_client *fsc)
{
int err;
/* set ra_pages based on rsize mount option? */
if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
client->backing_dev_info.ra_pages =
(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
fsc->backing_dev_info.ra_pages =
(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT;
err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
atomic_long_inc_return(&bdi_seq));
if (!err)
sb->s_bdi = &client->backing_dev_info;
sb->s_bdi = &fsc->backing_dev_info;
return err;
}
......@@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type,
struct vfsmount *mnt)
{
struct super_block *sb;
struct ceph_client *client;
struct ceph_fs_client *fsc;
int err;
int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
const char *path = NULL;
struct ceph_mount_args *args;
struct ceph_mount_options *fsopt = NULL;
struct ceph_options *opt = NULL;
dout("ceph_get_sb\n");
args = parse_mount_args(flags, data, dev_name, &path);
if (IS_ERR(args)) {
err = PTR_ERR(args);
err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
if (err < 0)
goto out_final;
}
/* create client (which we may/may not use) */
client = ceph_create_client(args);
if (IS_ERR(client)) {
err = PTR_ERR(client);
fsc = create_fs_client(fsopt, opt);
if (IS_ERR(fsc)) {
err = PTR_ERR(fsc);
kfree(fsopt);
kfree(opt);
goto out_final;
}
if (client->mount_args->flags & CEPH_OPT_NOSHARE)
err = ceph_mdsc_init(fsc);
if (err < 0)
goto out;
if (ceph_test_opt(fsc->client, NOSHARE))
compare_super = NULL;
sb = sget(fs_type, compare_super, ceph_set_super, client);
sb = sget(fs_type, compare_super, ceph_set_super, fsc);
if (IS_ERR(sb)) {
err = PTR_ERR(sb);
goto out;
}
if (ceph_sb_to_client(sb) != client) {
ceph_destroy_client(client);
client = ceph_sb_to_client(sb);
dout("get_sb got existing client %p\n", client);
if (ceph_sb_to_client(sb) != fsc) {
ceph_mdsc_destroy(fsc);
destroy_fs_client(fsc);
fsc = ceph_sb_to_client(sb);
dout("get_sb got existing client %p\n", fsc);
} else {
dout("get_sb using new client %p\n", client);
err = ceph_register_bdi(sb, client);
dout("get_sb using new client %p\n", fsc);
err = ceph_register_bdi(sb, fsc);
if (err < 0)
goto out_splat;
}
err = ceph_mount(client, mnt, path);
err = ceph_mount(fsc, mnt, path);
if (err < 0)
goto out_splat;
dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
......@@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type,
return 0;
out_splat:
ceph_mdsc_close_sessions(&client->mdsc);
ceph_mdsc_close_sessions(fsc->mdsc);
deactivate_locked_super(sb);
goto out_final;
out:
ceph_destroy_client(client);
ceph_mdsc_destroy(fsc);
destroy_fs_client(fsc);
out_final:
dout("ceph_get_sb fail %d\n", err);
return err;
......@@ -1042,11 +849,12 @@ static int ceph_get_sb(struct file_system_type *fs_type,
static void ceph_kill_sb(struct super_block *s)
{
struct ceph_client *client = ceph_sb_to_client(s);
struct ceph_fs_client *fsc = ceph_sb_to_client(s);
dout("kill_sb %p\n", s);
ceph_mdsc_pre_umount(&client->mdsc);
ceph_mdsc_pre_umount(fsc->mdsc);
kill_anon_super(s); /* will call put_super after sb is r/o */
ceph_destroy_client(client);
ceph_mdsc_destroy(fsc);
destroy_fs_client(fsc);
}
static struct file_system_type ceph_fs_type = {
......@@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = {
static int __init init_ceph(void)
{
int ret = 0;
ret = ceph_debugfs_init();
if (ret < 0)
goto out;
ret = ceph_msgr_init();
if (ret < 0)
goto out_debugfs;
ret = init_caches();
int ret = init_caches();
if (ret)
goto out_msgr;
goto out;
ret = register_filesystem(&ceph_fs_type);
if (ret)
goto out_icache;
pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
out_icache:
destroy_caches();
out_msgr:
ceph_msgr_exit();
out_debugfs:
ceph_debugfs_cleanup();
out:
return ret;
}
......@@ -1101,8 +893,6 @@ static void __exit exit_ceph(void)
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
destroy_caches();
ceph_msgr_exit();
ceph_debugfs_cleanup();
}
module_init(init_ceph);
......
#ifndef _FS_CEPH_SUPER_H
#define _FS_CEPH_SUPER_H
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <asm/unaligned.h>
#include <linux/backing-dev.h>
......@@ -14,13 +14,7 @@
#include <linux/writeback.h>
#include <linux/slab.h>
#include "types.h"
#include "messenger.h"
#include "msgpool.h"
#include "mon_client.h"
#include "mds_client.h"
#include "osd_client.h"
#include "ceph_fs.h"
#include <linux/ceph/libceph.h>
/* f_type in struct statfs */
#define CEPH_SUPER_MAGIC 0x00c36400
......@@ -30,42 +24,25 @@
#define CEPH_BLOCK_SHIFT 20 /* 1 MB */
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
/*
* Supported features
*/
#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK
#define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
/*
* mount options
*/
#define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */
#define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
#define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */
#define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
#define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES)
#define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
#define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
#define ceph_set_opt(client, opt) \
(client)->mount_args->flags |= CEPH_OPT_##opt;
#define ceph_test_opt(client, opt) \
(!!((client)->mount_args->flags & CEPH_OPT_##opt))
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
struct ceph_mount_args {
int sb_flags;
struct ceph_mount_options {
int flags;
struct ceph_fsid fsid;
struct ceph_entity_addr my_addr;
int num_mon;
struct ceph_entity_addr *mon_addr;
int mount_timeout;
int osd_idle_ttl;
int osd_timeout;
int osd_keepalive_timeout;
int sb_flags;
int wsize;
int rsize; /* max readahead */
int congestion_kb; /* max writeback in flight */
......@@ -73,82 +50,25 @@ struct ceph_mount_args {
int cap_release_safety;
int max_readdir; /* max readdir result (entires) */
int max_readdir_bytes; /* max readdir result (bytes) */
char *snapdir_name; /* default ".snap" */
char *name;
char *secret;
};
/*
* defaults
*/
#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
#define CEPH_OSD_KEEPALIVE_DEFAULT 5
#define CEPH_OSD_IDLE_TTL_DEFAULT 60
#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
#define CEPH_AUTH_NAME_DEFAULT "guest"
/*
* Delay telling the MDS we no longer want caps, in case we reopen
* the file. Delay a minimum amount of time, even if we send a cap
* message for some other reason. Otherwise, take the oppotunity to
* update the mds to avoid sending another message later.
*/
#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
/* mount state */
enum {
CEPH_MOUNT_MOUNTING,
CEPH_MOUNT_MOUNTED,
CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN,
};
/*
* subtract jiffies
*/
static inline unsigned long time_sub(unsigned long a, unsigned long b)
{
BUG_ON(time_after(b, a));
return (long)a - (long)b;
}
/*
* per-filesystem client state
*
* possibly shared by multiple mount points, if they are
* mounting the same ceph filesystem/cluster.
*/
struct ceph_client {
struct ceph_fsid fsid;
bool have_fsid;
/*
* everything above this point can be memcmp'd; everything below
* is handled in compare_mount_options()
*/
struct mutex mount_mutex; /* serialize mount attempts */
struct ceph_mount_args *mount_args;
char *snapdir_name; /* default ".snap" */
};
struct ceph_fs_client {
struct super_block *sb;
unsigned long mount_state;
wait_queue_head_t auth_wq;
int auth_err;
struct ceph_mount_options *mount_options;
struct ceph_client *client;
unsigned long mount_state;
int min_caps; /* min caps i added */
struct ceph_messenger *msgr; /* messenger instance */
struct ceph_mon_client monc;
struct ceph_mds_client mdsc;
struct ceph_osd_client osdc;
struct ceph_mds_client *mdsc;
/* writeback */
mempool_t *wb_pagevec_pool;
......@@ -160,14 +80,14 @@ struct ceph_client {
struct backing_dev_info backing_dev_info;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_monmap;
struct dentry *debugfs_mdsmap, *debugfs_osdmap;
struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps;
struct dentry *debugfs_dentry_lru, *debugfs_caps;
struct dentry *debugfs_congestion_kb;
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
#endif
};
/*
* File i/o capability. This tracks shared state with the metadata
* server that allows us to cache or writeback attributes or to read
......@@ -275,6 +195,20 @@ struct ceph_inode_xattr {
int should_free_val;
};
/*
* Ceph dentry state
*/
struct ceph_dentry_info {
struct ceph_mds_session *lease_session;
u32 lease_gen, lease_shared_gen;
u32 lease_seq;
unsigned long lease_renew_after, lease_renew_from;
struct list_head lru;
struct dentry *dentry;
u64 time;
u64 offset;
};
struct ceph_inode_xattrs_info {
/*
* (still encoded) xattr blob. we avoid the overhead of parsing
......@@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info {
/*
* Ceph inode.
*/
#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
#define CEPH_I_NODELAY 4 /* do not delay cap release */
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
struct ceph_inode_info {
struct ceph_vino i_vino; /* ceph ino + snap */
......@@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
return container_of(inode, struct ceph_inode_info, vfs_inode);
}
static inline struct ceph_vino ceph_vino(struct inode *inode)
{
return ceph_inode(inode)->i_vino;
}
/*
* ino_t is <64 bits on many architectures, blech.
*
* don't include snap in ino hash, at least for now.
*/
static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
{
ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
#if BITS_PER_LONG == 32
ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
if (!ino)
ino = 1;
#endif
return ino;
}
/* for printf-style formatting */
#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
static inline u64 ceph_ino(struct inode *inode)
{
return ceph_inode(inode)->i_vino.ino;
}
static inline u64 ceph_snap(struct inode *inode)
{
return ceph_inode(inode)->i_vino.snap;
}
static inline int ceph_ino_compare(struct inode *inode, void *data)
{
struct ceph_vino *pvino = (struct ceph_vino *)data;
struct ceph_inode_info *ci = ceph_inode(inode);
return ci->i_vino.ino == pvino->ino &&
ci->i_vino.snap == pvino->snap;
}
static inline struct inode *ceph_find_inode(struct super_block *sb,
struct ceph_vino vino)
{
ino_t t = ceph_vino_to_ino(vino);
return ilookup5(sb, t, ceph_ino_compare, &vino);
}
/*
* Ceph inode.
*/
#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
#define CEPH_I_NODELAY 4 /* do not delay cap release */
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
static inline void ceph_i_clear(struct inode *inode, unsigned mask)
{
struct ceph_inode_info *ci = ceph_inode(inode);
......@@ -432,20 +418,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
struct ceph_inode_frag *pfrag,
int *found);
/*
* Ceph dentry state
*/
struct ceph_dentry_info {
struct ceph_mds_session *lease_session;
u32 lease_gen, lease_shared_gen;
u32 lease_seq;
unsigned long lease_renew_after, lease_renew_from;
struct list_head lru;
struct dentry *dentry;
u64 time;
u64 offset;
};
static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
{
return (struct ceph_dentry_info *)dentry->d_fsdata;
......@@ -456,22 +428,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
return ((loff_t)frag << 32) | (loff_t)off;
}
/*
* ino_t is <64 bits on many architectures, blech.
*
* don't include snap in ino hash, at least for now.
*/
static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
{
ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
#if BITS_PER_LONG == 32
ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
if (!ino)
ino = 1;
#endif
return ino;
}
static inline int ceph_set_ino_cb(struct inode *inode, void *data)
{
ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
......@@ -479,39 +435,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data)
return 0;
}
static inline struct ceph_vino ceph_vino(struct inode *inode)
{
return ceph_inode(inode)->i_vino;
}
/* for printf-style formatting */
#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
static inline u64 ceph_ino(struct inode *inode)
{
return ceph_inode(inode)->i_vino.ino;
}
static inline u64 ceph_snap(struct inode *inode)
{
return ceph_inode(inode)->i_vino.snap;
}
static inline int ceph_ino_compare(struct inode *inode, void *data)
{
struct ceph_vino *pvino = (struct ceph_vino *)data;
struct ceph_inode_info *ci = ceph_inode(inode);
return ci->i_vino.ino == pvino->ino &&
ci->i_vino.snap == pvino->snap;
}
static inline struct inode *ceph_find_inode(struct super_block *sb,
struct ceph_vino vino)
{
ino_t t = ceph_vino_to_ino(vino);
return ilookup5(sb, t, ceph_ino_compare, &vino);
}
/*
* caps helpers
*/
......@@ -576,18 +499,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx, int need);
extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx);
extern void ceph_reservation_status(struct ceph_client *client,
extern void ceph_reservation_status(struct ceph_fs_client *client,
int *total, int *avail, int *used,
int *reserved, int *min);
static inline struct ceph_client *ceph_inode_to_client(struct inode *inode)
static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
{
return (struct ceph_client *)inode->i_sb->s_fs_info;
return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
}
static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb)
static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
{
return (struct ceph_client *)sb->s_fs_info;
return (struct ceph_fs_client *)sb->s_fs_info;
}
......@@ -616,51 +539,6 @@ struct ceph_file_info {
/*
* snapshots
*/
/*
* A "snap context" is the set of existing snapshots when we
* write data. It is used by the OSD to guide its COW behavior.
*
* The ceph_snap_context is refcounted, and attached to each dirty
* page, indicating which context the dirty data belonged when it was
* dirtied.
*/
struct ceph_snap_context {
atomic_t nref;
u64 seq;
int num_snaps;
u64 snaps[];
};
static inline struct ceph_snap_context *
ceph_get_snap_context(struct ceph_snap_context *sc)
{
/*
printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
atomic_read(&sc->nref)+1);
*/
if (sc)
atomic_inc(&sc->nref);
return sc;
}
static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
{
if (!sc)
return;
/*
printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
atomic_read(&sc->nref)-1);
*/
if (atomic_dec_and_test(&sc->nref)) {
/*printk(" deleting snap_context %p\n", sc);*/
kfree(sc);
}
}
/*
* A "snap realm" describes a subset of the file hierarchy sharing
* the same set of snapshots that apply to it. The realms themselves
......@@ -699,16 +577,33 @@ struct ceph_snap_realm {
spinlock_t inodes_with_caps_lock;
};
/*
* calculate the number of pages a given length and offset map onto,
* if we align the data.
*/
static inline int calc_pages_for(u64 off, u64 len)
static inline int default_congestion_kb(void)
{
return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
(off >> PAGE_CACHE_SHIFT);
int congestion_kb;
/*
* Copied from NFS
*
* congestion size, scale with available memory.
*
* 64MB: 8192k
* 128MB: 11585k
* 256MB: 16384k
* 512MB: 23170k
* 1GB: 32768k
* 2GB: 46340k
* 4GB: 65536k
* 8GB: 92681k
* 16GB: 131072k
*
* This allows larger machines to have larger/more transfers.
* Limit the default to 256M
*/
congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
if (congestion_kb > 256*1024)
congestion_kb = 256*1024;
return congestion_kb;
}
......@@ -741,16 +636,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
ci_item)->writing;
}
/* super.c */
extern struct kmem_cache *ceph_inode_cachep;
extern struct kmem_cache *ceph_cap_cachep;
extern struct kmem_cache *ceph_dentry_cachep;
extern struct kmem_cache *ceph_file_cachep;
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
/* inode.c */
extern const struct inode_operations ceph_file_iops;
......@@ -857,12 +742,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
/* file.c */
extern const struct file_operations ceph_file_fops;
extern const struct address_space_operations ceph_aops;
extern int ceph_copy_to_page_vector(struct page **pages,
const char *data,
loff_t off, size_t len);
extern int ceph_copy_from_page_vector(struct page **pages,
char *data,
loff_t off, size_t len);
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
extern int ceph_open(struct inode *inode, struct file *file);
extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
struct nameidata *nd, int mode,
int locked_dir);
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_release_page_vector(struct page **pages, int num_pages);
/* dir.c */
extern const struct file_operations ceph_dir_fops;
......@@ -892,12 +783,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* export.c */
extern const struct export_operations ceph_export_ops;
/* debugfs.c */
extern int ceph_debugfs_init(void);
extern void ceph_debugfs_cleanup(void);
extern int ceph_debugfs_client_init(struct ceph_client *client);
extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
/* locks.c */
extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
......@@ -914,4 +799,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
return NULL;
}
/* debugfs.c */
extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
#endif /* _FS_CEPH_SUPER_H */
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include "super.h"
#include "decode.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
#include <linux/xattr.h>
#include <linux/slab.h>
......@@ -620,12 +623,12 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
const char *value, size_t size, int flags)
{
struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct inode *parent_inode = dentry->d_parent->d_inode;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_mds_client *mdsc = fsc->mdsc;
int err;
int i, nr_pages;
struct page **pages = NULL;
......@@ -777,8 +780,8 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
static int ceph_send_removexattr(struct dentry *dentry, const char *name)
{
struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = dentry->d_inode;
struct inode *parent_inode = dentry->d_parent->d_inode;
struct ceph_mds_request *req;
......
#ifndef _FS_CEPH_AUTH_H
#define _FS_CEPH_AUTH_H
#include "types.h"
#include "buffer.h"
#include <linux/ceph/types.h>
#include <linux/ceph/buffer.h>
/*
* Abstract interface for communicating with the authenticate module.
......
......@@ -3,7 +3,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#ifdef CONFIG_CEPH_FS_PRETTYDEBUG
#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
/*
* wrap pr_debug to include a filename:lineno prefix on each line.
......@@ -14,7 +14,8 @@
# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
extern const char *ceph_file_part(const char *s, int len);
# define dout(fmt, ...) \
pr_debug(" %12.12s:%-4d : " fmt, \
pr_debug("%.*s %12.12s:%-4d : " fmt, \
8 - (int)sizeof(KBUILD_MODNAME), " ", \
ceph_file_part(__FILE__, sizeof(__FILE__)), \
__LINE__, ##__VA_ARGS__)
# else
......
#ifndef _FS_CEPH_DEBUGFS_H
#define _FS_CEPH_DEBUGFS_H
#include "ceph_debug.h"
#include "types.h"
#define CEPH_DEFINE_SHOW_FUNC(name) \
static int name##_open(struct inode *inode, struct file *file) \
{ \
struct seq_file *sf; \
int ret; \
\
ret = single_open(file, name, NULL); \
sf = file->private_data; \
sf->private = inode->i_private; \
return ret; \
} \
\
static const struct file_operations name##_fops = { \
.open = name##_open, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release, \
};
/* debugfs.c */
extern int ceph_debugfs_init(void);
extern void ceph_debugfs_cleanup(void);
extern int ceph_debugfs_client_init(struct ceph_client *client);
extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
#endif
#ifndef _FS_CEPH_LIBCEPH_H
#define _FS_CEPH_LIBCEPH_H
#include "ceph_debug.h"
#include <asm/unaligned.h>
#include <linux/backing-dev.h>
#include <linux/completion.h>
#include <linux/exportfs.h>
#include <linux/fs.h>
#include <linux/mempool.h>
#include <linux/pagemap.h>
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/slab.h>
#include "types.h"
#include "messenger.h"
#include "msgpool.h"
#include "mon_client.h"
#include "osd_client.h"
#include "ceph_fs.h"
/*
* Supported features
*/
#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR
/*
* mount options
*/
#define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
#define CEPH_OPT_DEFAULT (0);
#define ceph_set_opt(client, opt) \
(client)->options->flags |= CEPH_OPT_##opt;
#define ceph_test_opt(client, opt) \
(!!((client)->options->flags & CEPH_OPT_##opt))
struct ceph_options {
int flags;
struct ceph_fsid fsid;
struct ceph_entity_addr my_addr;
int mount_timeout;
int osd_idle_ttl;
int osd_timeout;
int osd_keepalive_timeout;
/*
* any type that can't be simply compared or doesn't need need
* to be compared should go beyond this point,
* ceph_compare_options() should be updated accordingly
*/
struct ceph_entity_addr *mon_addr; /* should be the first
pointer type of args */
int num_mon;
char *name;
char *secret;
};
/*
* defaults
*/
#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
#define CEPH_OSD_KEEPALIVE_DEFAULT 5
#define CEPH_OSD_IDLE_TTL_DEFAULT 60
#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
#define CEPH_AUTH_NAME_DEFAULT "guest"
/*
* Delay telling the MDS we no longer want caps, in case we reopen
* the file. Delay a minimum amount of time, even if we send a cap
* message for some other reason. Otherwise, take the oppotunity to
* update the mds to avoid sending another message later.
*/
#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
/* mount state */
enum {
CEPH_MOUNT_MOUNTING,
CEPH_MOUNT_MOUNTED,
CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN,
};
/*
* subtract jiffies
*/
static inline unsigned long time_sub(unsigned long a, unsigned long b)
{
BUG_ON(time_after(b, a));
return (long)a - (long)b;
}
struct ceph_mds_client;
/*
* per client state
*
* possibly shared by multiple mount points, if they are
* mounting the same ceph filesystem/cluster.
*/
struct ceph_client {
struct ceph_fsid fsid;
bool have_fsid;
void *private;
struct ceph_options *options;
struct mutex mount_mutex; /* serialize mount attempts */
wait_queue_head_t auth_wq;
int auth_err;
int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
u32 supported_features;
u32 required_features;
struct ceph_messenger *msgr; /* messenger instance */
struct ceph_mon_client monc;
struct ceph_osd_client osdc;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_dir;
struct dentry *debugfs_monmap;
struct dentry *debugfs_osdmap;
#endif
};
/*
* snapshots
*/
/*
* A "snap context" is the set of existing snapshots when we
* write data. It is used by the OSD to guide its COW behavior.
*
* The ceph_snap_context is refcounted, and attached to each dirty
* page, indicating which context the dirty data belonged when it was
* dirtied.
*/
struct ceph_snap_context {
atomic_t nref;
u64 seq;
int num_snaps;
u64 snaps[];
};
static inline struct ceph_snap_context *
ceph_get_snap_context(struct ceph_snap_context *sc)
{
/*
printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
atomic_read(&sc->nref)+1);
*/
if (sc)
atomic_inc(&sc->nref);
return sc;
}
static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
{
if (!sc)
return;
/*
printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
atomic_read(&sc->nref)-1);
*/
if (atomic_dec_and_test(&sc->nref)) {
/*printk(" deleting snap_context %p\n", sc);*/
kfree(sc);
}
}
/*
* calculate the number of pages a given length and offset map onto,
* if we align the data.
*/
static inline int calc_pages_for(u64 off, u64 len)
{
return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
(off >> PAGE_CACHE_SHIFT);
}
/* ceph_common.c */
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
extern struct kmem_cache *ceph_inode_cachep;
extern struct kmem_cache *ceph_cap_cachep;
extern struct kmem_cache *ceph_dentry_cachep;
extern struct kmem_cache *ceph_file_cachep;
extern int ceph_parse_options(struct ceph_options **popt, char *options,
const char *dev_name, const char *dev_name_end,
int (*parse_extra_token)(char *c, void *private),
void *private);
extern void ceph_destroy_options(struct ceph_options *opt);
extern int ceph_compare_options(struct ceph_options *new_opt,
struct ceph_client *client);
extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
void *private);
extern u64 ceph_client_id(struct ceph_client *client);
extern void ceph_destroy_client(struct ceph_client *client);
extern int __ceph_open_session(struct ceph_client *client,
unsigned long started);
extern int ceph_open_session(struct ceph_client *client);
/* pagevec.c */
extern void ceph_release_page_vector(struct page **pages, int num_pages);
extern struct page **ceph_get_direct_page_vector(const char __user *data,
int num_pages,
loff_t off, size_t len);
extern void ceph_put_page_vector(struct page **pages, int num_pages);
extern void ceph_release_page_vector(struct page **pages, int num_pages);
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
extern int ceph_copy_user_to_page_vector(struct page **pages,
const char __user *data,
loff_t off, size_t len);
extern int ceph_copy_to_page_vector(struct page **pages,
const char *data,
loff_t off, size_t len);
extern int ceph_copy_from_page_vector(struct page **pages,
char *data,
loff_t off, size_t len);
extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
loff_t off, size_t len);
extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
#endif /* _FS_CEPH_SUPER_H */
......@@ -65,6 +65,9 @@ struct ceph_messenger {
*/
u32 global_seq;
spinlock_t global_seq_lock;
u32 supported_features;
u32 required_features;
};
/*
......@@ -209,7 +212,7 @@ struct ceph_connection {
};
extern const char *pr_addr(const struct sockaddr_storage *ss);
extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
int max_count, int *count);
......@@ -220,7 +223,8 @@ extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void);
extern struct ceph_messenger *ceph_messenger_create(
struct ceph_entity_addr *myaddr);
struct ceph_entity_addr *myaddr,
u32 features, u32 required);
extern void ceph_messenger_destroy(struct ceph_messenger *);
extern void ceph_con_init(struct ceph_messenger *msgr,
......
......@@ -79,6 +79,7 @@ struct ceph_mon_client {
u64 last_tid;
/* mds/osd map */
int want_mdsmap;
int want_next_osdmap; /* 1 = want, 2 = want+asked */
u32 have_osdmap, have_mdsmap;
......
......@@ -69,6 +69,7 @@ struct ceph_osd_request {
struct list_head r_unsafe_item;
struct inode *r_inode; /* for use by callbacks */
void *r_priv; /* ditto */
char r_oid[40]; /* object name */
int r_oid_len;
......
......@@ -4,7 +4,7 @@
#include <linux/rbtree.h>
#include "types.h"
#include "ceph_fs.h"
#include "crush/crush.h"
#include <linux/crush/crush.h>
/*
* The osd map describes the current membership of the osd cluster and
......
......@@ -293,6 +293,7 @@ source "net/wimax/Kconfig"
source "net/rfkill/Kconfig"
source "net/9p/Kconfig"
source "net/caif/Kconfig"
source "net/ceph/Kconfig"
endif # if NET
......@@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o
endif
obj-$(CONFIG_WIMAX) += wimax/
obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
obj-$(CONFIG_CEPH_LIB) += ceph/
config CEPH_LIB
tristate "Ceph core library (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select LIBCRC32C
select CRYPTO_AES
select CRYPTO
default n
help
Choose Y or M here to include cephlib, which provides the
common functionality to both the Ceph filesystem and
to the rados block device (rbd).
More information at http://ceph.newdream.net/.
If unsure, say N.
config CEPH_LIB_PRETTYDEBUG
bool "Include file:line in ceph debug output"
depends on CEPH_LIB
default n
help
If you say Y here, debug output will include a filename and
line to aid debugging. This increases kernel size and slows
execution slightly when debug call sites are enabled (e.g.,
via CONFIG_DYNAMIC_DEBUG).
If unsure, say N.
#
# Makefile for CEPH filesystem.
#
ifneq ($(KERNELRELEASE),)
obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
debugfs.o \
auth.o auth_none.o \
crypto.o armor.o \
auth_x.o \
ceph_fs.o ceph_strings.o ceph_hash.o \
pagevec.o
else
#Otherwise we were called directly from the command
# line; invoke the kernel build system.
KERNELDIR ?= /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default: all
all:
$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
modules_install:
$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
clean:
$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
endif
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include "types.h"
#include <linux/ceph/types.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
#include "auth_none.h"
#include "auth_x.h"
#include "decode.h"
#include "super.h"
#include "messenger.h"
/*
* get protocol handler
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include "auth_none.h"
#include "auth.h"
#include "decode.h"
static void reset(struct ceph_auth_client *ac)
{
......
......@@ -2,8 +2,7 @@
#define _FS_CEPH_AUTH_NONE_H
#include <linux/slab.h>
#include "auth.h"
#include <linux/ceph/auth.h>
/*
* null security mode.
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include "crypto.h"
#include "auth_x.h"
#include "auth_x_protocol.h"
#include "crypto.h"
#include "auth.h"
#include "decode.h"
#define TEMP_TICKET_BUF_LEN 256
......
......@@ -3,8 +3,9 @@
#include <linux/rbtree.h>
#include <linux/ceph/auth.h>
#include "crypto.h"
#include "auth.h"
#include "auth_x_protocol.h"
/*
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/slab.h>
#include "buffer.h"
#include "decode.h"
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
......@@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
dout("buffer_new %p\n", b);
return b;
}
EXPORT_SYMBOL(ceph_buffer_new);
void ceph_buffer_release(struct kref *kref)
{
......@@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref)
}
kfree(b);
}
EXPORT_SYMBOL(ceph_buffer_release);
int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
{
......
#include <linux/ceph/ceph_debug.h>
#include <linux/backing-dev.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/inet.h>
#include <linux/in6.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/debugfs.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/auth.h>
/*
* find filename portion of a path (/foo/bar/baz -> baz)
*/
const char *ceph_file_part(const char *s, int len)
{
const char *e = s + len;
while (e != s && *(e-1) != '/')
e--;
return e;
}
EXPORT_SYMBOL(ceph_file_part);
const char *ceph_msg_type_name(int type)
{
switch (type) {
case CEPH_MSG_SHUTDOWN: return "shutdown";
case CEPH_MSG_PING: return "ping";
case CEPH_MSG_AUTH: return "auth";
case CEPH_MSG_AUTH_REPLY: return "auth_reply";
case CEPH_MSG_MON_MAP: return "mon_map";
case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
case CEPH_MSG_STATFS: return "statfs";
case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
case CEPH_MSG_CLIENT_REQUEST: return "client_request";
case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
case CEPH_MSG_CLIENT_REPLY: return "client_reply";
case CEPH_MSG_CLIENT_CAPS: return "client_caps";
case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
case CEPH_MSG_CLIENT_SNAP: return "client_snap";
case CEPH_MSG_CLIENT_LEASE: return "client_lease";
case CEPH_MSG_OSD_MAP: return "osd_map";
case CEPH_MSG_OSD_OP: return "osd_op";
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
default: return "unknown";
}
}
EXPORT_SYMBOL(ceph_msg_type_name);
/*
* Initially learn our fsid, or verify an fsid matches.
*/
int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
{
if (client->have_fsid) {
if (ceph_fsid_compare(&client->fsid, fsid)) {
pr_err("bad fsid, had %pU got %pU",
&client->fsid, fsid);
return -1;
}
} else {
pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
memcpy(&client->fsid, fsid, sizeof(*fsid));
ceph_debugfs_client_init(client);
client->have_fsid = true;
}
return 0;
}
EXPORT_SYMBOL(ceph_check_fsid);
static int strcmp_null(const char *s1, const char *s2)
{
if (!s1 && !s2)
return 0;
if (s1 && !s2)
return -1;
if (!s1 && s2)
return 1;
return strcmp(s1, s2);
}
int ceph_compare_options(struct ceph_options *new_opt,
struct ceph_client *client)
{
struct ceph_options *opt1 = new_opt;
struct ceph_options *opt2 = client->options;
int ofs = offsetof(struct ceph_options, mon_addr);
int i;
int ret;
ret = memcmp(opt1, opt2, ofs);
if (ret)
return ret;
ret = strcmp_null(opt1->name, opt2->name);
if (ret)
return ret;
ret = strcmp_null(opt1->secret, opt2->secret);
if (ret)
return ret;
/* any matching mon ip implies a match */
for (i = 0; i < opt1->num_mon; i++) {
if (ceph_monmap_contains(client->monc.monmap,
&opt1->mon_addr[i]))
return 0;
}
return -1;
}
EXPORT_SYMBOL(ceph_compare_options);
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
dout("parse_fsid '%s'\n", str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
str++;
continue;
}
if (!isxdigit(str[0]) || !isxdigit(str[1]))
break;
tmp[0] = str[0];
tmp[1] = str[1];
if (sscanf(tmp, "%x", &d) < 1)
break;
fsid->fsid[i] = d & 0xff;
i++;
str += 2;
}
if (i == 16)
err = 0;
dout("parse_fsid ret %d got fsid %pU", err, fsid);
return err;
}
/*
* ceph options
*/
enum {
Opt_osdtimeout,
Opt_osdkeepalivetimeout,
Opt_mount_timeout,
Opt_osd_idle_ttl,
Opt_last_int,
/* int args above */
Opt_fsid,
Opt_name,
Opt_secret,
Opt_ip,
Opt_last_string,
/* string args above */
Opt_noshare,
Opt_nocrc,
};
static match_table_t opt_tokens = {
{Opt_osdtimeout, "osdtimeout=%d"},
{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
{Opt_mount_timeout, "mount_timeout=%d"},
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
/* int args above */
{Opt_fsid, "fsid=%s"},
{Opt_name, "name=%s"},
{Opt_secret, "secret=%s"},
{Opt_ip, "ip=%s"},
/* string args above */
{Opt_noshare, "noshare"},
{Opt_nocrc, "nocrc"},
{-1, NULL}
};
void ceph_destroy_options(struct ceph_options *opt)
{
dout("destroy_options %p\n", opt);
kfree(opt->name);
kfree(opt->secret);
kfree(opt);
}
EXPORT_SYMBOL(ceph_destroy_options);
int ceph_parse_options(struct ceph_options **popt, char *options,
const char *dev_name, const char *dev_name_end,
int (*parse_extra_token)(char *c, void *private),
void *private)
{
struct ceph_options *opt;
const char *c;
int err = -ENOMEM;
substring_t argstr[MAX_OPT_ARGS];
opt = kzalloc(sizeof(*opt), GFP_KERNEL);
if (!opt)
return err;
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
GFP_KERNEL);
if (!opt->mon_addr)
goto out;
dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
dev_name);
/* start with defaults */
opt->flags = CEPH_OPT_DEFAULT;
opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
CEPH_MAX_MON, &opt->num_mon);
if (err < 0)
goto out;
/* parse mount options */
while ((c = strsep(&options, ",")) != NULL) {
int token, intval, ret;
if (!*c)
continue;
err = -EINVAL;
token = match_token((char *)c, opt_tokens, argstr);
if (token < 0) {
/* extra? */
err = parse_extra_token((char *)c, private);
if (err < 0) {
pr_err("bad option at '%s'\n", c);
goto out;
}
continue;
}
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
pr_err("bad mount option arg (not int) "
"at '%s'\n", c);
continue;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
dout("got string token %d val %s\n", token,
argstr[0].from);
} else {
dout("got token %d\n", token);
}
switch (token) {
case Opt_ip:
err = ceph_parse_ips(argstr[0].from,
argstr[0].to,
&opt->my_addr,
1, NULL);
if (err < 0)
goto out;
opt->flags |= CEPH_OPT_MYIP;
break;
case Opt_fsid:
err = parse_fsid(argstr[0].from, &opt->fsid);
if (err == 0)
opt->flags |= CEPH_OPT_FSID;
break;
case Opt_name:
opt->name = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
break;
case Opt_secret:
opt->secret = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
break;
/* misc */
case Opt_osdtimeout:
opt->osd_timeout = intval;
break;
case Opt_osdkeepalivetimeout:
opt->osd_keepalive_timeout = intval;
break;
case Opt_osd_idle_ttl:
opt->osd_idle_ttl = intval;
break;
case Opt_mount_timeout:
opt->mount_timeout = intval;
break;
case Opt_noshare:
opt->flags |= CEPH_OPT_NOSHARE;
break;
case Opt_nocrc:
opt->flags |= CEPH_OPT_NOCRC;
break;
default:
BUG_ON(token);
}
}
/* success */
*popt = opt;
return 0;
out:
ceph_destroy_options(opt);
return err;
}
EXPORT_SYMBOL(ceph_parse_options);
u64 ceph_client_id(struct ceph_client *client)
{
return client->monc.auth->global_id;
}
EXPORT_SYMBOL(ceph_client_id);
/*
* create a fresh client instance
*/
struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
{
struct ceph_client *client;
int err = -ENOMEM;
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (client == NULL)
return ERR_PTR(-ENOMEM);
client->private = private;
client->options = opt;
mutex_init(&client->mount_mutex);
init_waitqueue_head(&client->auth_wq);
client->auth_err = 0;
client->extra_mon_dispatch = NULL;
client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
client->msgr = NULL;
/* subsystems */
err = ceph_monc_init(&client->monc, client);
if (err < 0)
goto fail;
err = ceph_osdc_init(&client->osdc, client);
if (err < 0)
goto fail_monc;
return client;
fail_monc:
ceph_monc_stop(&client->monc);
fail:
kfree(client);
return ERR_PTR(err);
}
EXPORT_SYMBOL(ceph_create_client);
void ceph_destroy_client(struct ceph_client *client)
{
dout("destroy_client %p\n", client);
/* unmount */
ceph_osdc_stop(&client->osdc);
/*
* make sure mds and osd connections close out before destroying
* the auth module, which is needed to free those connections'
* ceph_authorizers.
*/
ceph_msgr_flush();
ceph_monc_stop(&client->monc);
ceph_debugfs_client_cleanup(client);
if (client->msgr)
ceph_messenger_destroy(client->msgr);
ceph_destroy_options(client->options);
kfree(client);
dout("destroy_client %p done\n", client);
}
EXPORT_SYMBOL(ceph_destroy_client);
/*
* true if we have the mon map (and have thus joined the cluster)
*/
static int have_mon_and_osd_map(struct ceph_client *client)
{
return client->monc.monmap && client->monc.monmap->epoch &&
client->osdc.osdmap && client->osdc.osdmap->epoch;
}
/*
* mount: join the ceph cluster, and open root directory.
*/
int __ceph_open_session(struct ceph_client *client, unsigned long started)
{
struct ceph_entity_addr *myaddr = NULL;
int err;
unsigned long timeout = client->options->mount_timeout * HZ;
/* initialize the messenger */
if (client->msgr == NULL) {
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
client->msgr = ceph_messenger_create(myaddr,
client->supported_features,
client->required_features);
if (IS_ERR(client->msgr)) {
client->msgr = NULL;
return PTR_ERR(client->msgr);
}
client->msgr->nocrc = ceph_test_opt(client, NOCRC);
}
/* open session, and wait for mon and osd maps */
err = ceph_monc_open_session(&client->monc);
if (err < 0)
return err;
while (!have_mon_and_osd_map(client)) {
err = -EIO;
if (timeout && time_after_eq(jiffies, started + timeout))
return err;
/* wait */
dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0),
timeout);
if (err == -EINTR || err == -ERESTARTSYS)
return err;
if (client->auth_err < 0)
return client->auth_err;
}
return 0;
}
EXPORT_SYMBOL(__ceph_open_session);
int ceph_open_session(struct ceph_client *client)
{
int ret;
unsigned long started = jiffies; /* note the start time */
dout("open_session start\n");
mutex_lock(&client->mount_mutex);
ret = __ceph_open_session(client, started);
mutex_unlock(&client->mount_mutex);
return ret;
}
EXPORT_SYMBOL(ceph_open_session);
static int __init init_ceph_lib(void)
{
int ret = 0;
ret = ceph_debugfs_init();
if (ret < 0)
goto out;
ret = ceph_msgr_init();
if (ret < 0)
goto out_debugfs;
pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
return 0;
out_debugfs:
ceph_debugfs_cleanup();
out:
return ret;
}
static void __exit exit_ceph_lib(void)
{
dout("exit_ceph_lib\n");
ceph_msgr_exit();
ceph_debugfs_cleanup();
}
module_init(init_ceph_lib);
module_exit(exit_ceph_lib);
MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
MODULE_DESCRIPTION("Ceph filesystem for Linux");
MODULE_LICENSE("GPL");
/*
* Some non-inline ceph helpers
*/
#include "types.h"
#include <linux/module.h>
#include <linux/ceph/types.h>
/*
* return true if @layout appears to be valid
......@@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags)
return mode;
}
EXPORT_SYMBOL(ceph_flags_to_mode);
int ceph_caps_for_mode(int mode)
{
......@@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode)
return caps;
}
EXPORT_SYMBOL(ceph_caps_for_mode);
#include "types.h"
#include <linux/ceph/types.h>
/*
* Robert Jenkin's hash function.
......
/*
* Ceph string constants
*/
#include <linux/module.h>
#include <linux/ceph/types.h>
const char *ceph_entity_type_name(int type)
{
switch (type) {
case CEPH_ENTITY_TYPE_MDS: return "mds";
case CEPH_ENTITY_TYPE_OSD: return "osd";
case CEPH_ENTITY_TYPE_MON: return "mon";
case CEPH_ENTITY_TYPE_CLIENT: return "client";
case CEPH_ENTITY_TYPE_AUTH: return "auth";
default: return "unknown";
}
}
const char *ceph_osd_op_name(int op)
{
switch (op) {
case CEPH_OSD_OP_READ: return "read";
case CEPH_OSD_OP_STAT: return "stat";
case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
case CEPH_OSD_OP_WRITE: return "write";
case CEPH_OSD_OP_DELETE: return "delete";
case CEPH_OSD_OP_TRUNCATE: return "truncate";
case CEPH_OSD_OP_ZERO: return "zero";
case CEPH_OSD_OP_WRITEFULL: return "writefull";
case CEPH_OSD_OP_ROLLBACK: return "rollback";
case CEPH_OSD_OP_APPEND: return "append";
case CEPH_OSD_OP_STARTSYNC: return "startsync";
case CEPH_OSD_OP_SETTRUNC: return "settrunc";
case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
case CEPH_OSD_OP_TMAPUP: return "tmapup";
case CEPH_OSD_OP_TMAPGET: return "tmapget";
case CEPH_OSD_OP_TMAPPUT: return "tmapput";
case CEPH_OSD_OP_GETXATTR: return "getxattr";
case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
case CEPH_OSD_OP_SETXATTR: return "setxattr";
case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
case CEPH_OSD_OP_RMXATTR: return "rmxattr";
case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
case CEPH_OSD_OP_PULL: return "pull";
case CEPH_OSD_OP_PUSH: return "push";
case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
case CEPH_OSD_OP_SCRUB: return "scrub";
case CEPH_OSD_OP_WRLOCK: return "wrlock";
case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
case CEPH_OSD_OP_RDLOCK: return "rdlock";
case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
case CEPH_OSD_OP_UPLOCK: return "uplock";
case CEPH_OSD_OP_DNLOCK: return "dnlock";
case CEPH_OSD_OP_CALL: return "call";
case CEPH_OSD_OP_PGLS: return "pgls";
}
return "???";
}
const char *ceph_pool_op_name(int op)
{
switch (op) {
case POOL_OP_CREATE: return "create";
case POOL_OP_DELETE: return "delete";
case POOL_OP_AUID_CHANGE: return "auid change";
case POOL_OP_CREATE_SNAP: return "create snap";
case POOL_OP_DELETE_SNAP: return "delete snap";
case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
}
return "???";
}
......@@ -8,7 +8,7 @@
# define BUG_ON(x) assert(!(x))
#endif
#include "crush.h"
#include <linux/crush/crush.h>
const char *crush_bucket_alg_name(int alg)
{
......
#include <linux/types.h>
#include "hash.h"
#include <linux/crush/hash.h>
/*
* Robert Jenkins' function for mixing 32-bit values
......
......@@ -18,8 +18,8 @@
# define kfree(x) free(x)
#endif
#include "crush.h"
#include "hash.h"
#include <linux/crush/crush.h>
#include <linux/crush/hash.h>
/*
* Implement the core CRUSH mapping algorithm.
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/err.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <crypto/hash.h>
#include <linux/ceph/decode.h>
#include "crypto.h"
#include "decode.h"
int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
{
......
#ifndef _FS_CEPH_CRYPTO_H
#define _FS_CEPH_CRYPTO_H
#include "types.h"
#include "buffer.h"
#include <linux/ceph/types.h>
#include <linux/ceph/buffer.h>
/*
* cryptographic secret
......
#include <linux/ceph/ceph_debug.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
#ifdef CONFIG_DEBUG_FS
/*
* Implement /sys/kernel/debug/ceph fun
*
* /sys/kernel/debug/ceph/client* - an instance of the ceph client
* .../osdmap - current osdmap
* .../monmap - current monmap
* .../osdc - active osd requests
* .../monc - mon client state
* .../dentry_lru - dump contents of dentry lru
* .../caps - expose cap (reservation) stats
* .../bdi - symlink to ../../bdi/something
*/
static struct dentry *ceph_debugfs_dir;
static int monmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_client *client = s->private;
if (client->monc.monmap == NULL)
return 0;
seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
for (i = 0; i < client->monc.monmap->num_mon; i++) {
struct ceph_entity_inst *inst =
&client->monc.monmap->mon_inst[i];
seq_printf(s, "\t%s%lld\t%s\n",
ENTITY_NAME(inst->name),
ceph_pr_addr(&inst->addr.in_addr));
}
return 0;
}
static int osdmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_client *client = s->private;
struct rb_node *n;
if (client->osdc.osdmap == NULL)
return 0;
seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
seq_printf(s, "flags%s%s\n",
(client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
" NEARFULL" : "",
(client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
" FULL" : "");
for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
struct ceph_pg_pool_info *pool =
rb_entry(n, struct ceph_pg_pool_info, node);
seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
pool->id, pool->v.pg_num, pool->pg_num_mask,
pool->v.lpg_num, pool->lpg_num_mask);
}
for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
struct ceph_entity_addr *addr =
&client->osdc.osdmap->osd_addr[i];
int state = client->osdc.osdmap->osd_state[i];
char sb[64];
seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
i, ceph_pr_addr(&addr->in_addr),
((client->osdc.osdmap->osd_weight[i]*100) >> 16),
ceph_osdmap_state_str(sb, sizeof(sb), state));
}
return 0;
}
static int monc_show(struct seq_file *s, void *p)
{
struct ceph_client *client = s->private;
struct ceph_mon_generic_request *req;
struct ceph_mon_client *monc = &client->monc;
struct rb_node *rp;
mutex_lock(&monc->mutex);
if (monc->have_mdsmap)
seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
if (monc->have_osdmap)
seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
if (monc->want_next_osdmap)
seq_printf(s, "want next osdmap\n");
for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
__u16 op;
req = rb_entry(rp, struct ceph_mon_generic_request, node);
op = le16_to_cpu(req->request->hdr.type);
if (op == CEPH_MSG_STATFS)
seq_printf(s, "%lld statfs\n", req->tid);
else
seq_printf(s, "%lld unknown\n", req->tid);
}
mutex_unlock(&monc->mutex);
return 0;
}
static int osdc_show(struct seq_file *s, void *pp)
{
struct ceph_client *client = s->private;
struct ceph_osd_client *osdc = &client->osdc;
struct rb_node *p;
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
struct ceph_osd_request *req;
struct ceph_osd_request_head *head;
struct ceph_osd_op *op;
int num_ops;
int opcode, olen;
int i;
req = rb_entry(p, struct ceph_osd_request, r_node);
seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1,
le32_to_cpu(req->r_pgid.pool),
le16_to_cpu(req->r_pgid.ps));
head = req->r_request->front.iov_base;
op = (void *)(head + 1);
num_ops = le16_to_cpu(head->num_ops);
olen = le32_to_cpu(head->object_len);
seq_printf(s, "%.*s", olen,
(const char *)(head->ops + num_ops));
if (req->r_reassert_version.epoch)
seq_printf(s, "\t%u'%llu",
(unsigned)le32_to_cpu(req->r_reassert_version.epoch),
le64_to_cpu(req->r_reassert_version.version));
else
seq_printf(s, "\t");
for (i = 0; i < num_ops; i++) {
opcode = le16_to_cpu(op->op);
seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
op++;
}
seq_printf(s, "\n");
}
mutex_unlock(&osdc->request_mutex);
return 0;
}
CEPH_DEFINE_SHOW_FUNC(monmap_show)
CEPH_DEFINE_SHOW_FUNC(osdmap_show)
CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
int ceph_debugfs_init(void)
{
ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
if (!ceph_debugfs_dir)
return -ENOMEM;
return 0;
}
void ceph_debugfs_cleanup(void)
{
debugfs_remove(ceph_debugfs_dir);
}
int ceph_debugfs_client_init(struct ceph_client *client)
{
int ret = -ENOMEM;
char name[80];
snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
client->monc.auth->global_id);
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
if (!client->debugfs_dir)
goto out;
client->monc.debugfs_file = debugfs_create_file("monc",
0600,
client->debugfs_dir,
client,
&monc_show_fops);
if (!client->monc.debugfs_file)
goto out;
client->osdc.debugfs_file = debugfs_create_file("osdc",
0600,
client->debugfs_dir,
client,
&osdc_show_fops);
if (!client->osdc.debugfs_file)
goto out;
client->debugfs_monmap = debugfs_create_file("monmap",
0600,
client->debugfs_dir,
client,
&monmap_show_fops);
if (!client->debugfs_monmap)
goto out;
client->debugfs_osdmap = debugfs_create_file("osdmap",
0600,
client->debugfs_dir,
client,
&osdmap_show_fops);
if (!client->debugfs_osdmap)
goto out;
return 0;
out:
ceph_debugfs_client_cleanup(client);
return ret;
}
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
debugfs_remove(client->monc.debugfs_file);
debugfs_remove(client->debugfs_dir);
}
#else /* CONFIG_DEBUG_FS */
int ceph_debugfs_init(void)
{
return 0;
}
void ceph_debugfs_cleanup(void)
{
}
int ceph_debugfs_client_init(struct ceph_client *client,
int (*module_debugfs_init)(struct ceph_client *))
{
return 0;
}
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
}
#endif /* CONFIG_DEBUG_FS */
EXPORT_SYMBOL(ceph_debugfs_init);
EXPORT_SYMBOL(ceph_debugfs_cleanup);
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/crc32c.h>
#include <linux/ctype.h>
......@@ -13,10 +13,10 @@
#include <linux/blkdev.h>
#include <net/tcp.h>
#include "super.h"
#include "messenger.h"
#include "decode.h"
#include "pagelist.h"
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/pagelist.h>
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
......@@ -50,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
static DEFINE_SPINLOCK(addr_str_lock);
static int last_addr_str;
const char *pr_addr(const struct sockaddr_storage *ss)
const char *ceph_pr_addr(const struct sockaddr_storage *ss)
{
int i;
char *s;
......@@ -81,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss)
return s;
}
EXPORT_SYMBOL(ceph_pr_addr);
static void encode_my_addr(struct ceph_messenger *msgr)
{
......@@ -93,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr)
*/
struct workqueue_struct *ceph_msgr_wq;
int __init ceph_msgr_init(void)
int ceph_msgr_init(void)
{
ceph_msgr_wq = create_workqueue("ceph-msgr");
if (IS_ERR(ceph_msgr_wq)) {
......@@ -104,16 +105,19 @@ int __init ceph_msgr_init(void)
}
return 0;
}
EXPORT_SYMBOL(ceph_msgr_init);
void ceph_msgr_exit(void)
{
destroy_workqueue(ceph_msgr_wq);
}
EXPORT_SYMBOL(ceph_msgr_exit);
void ceph_msgr_flush(void)
{
flush_workqueue(ceph_msgr_wq);
}
EXPORT_SYMBOL(ceph_msgr_flush);
/*
......@@ -223,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
set_sock_callbacks(sock, con);
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
pr_addr(&con->peer_addr.in_addr),
ceph_pr_addr(&con->peer_addr.in_addr),
sock->sk->sk_state);
ret = 0;
}
if (ret < 0) {
pr_err("connect %s error %d\n",
pr_addr(&con->peer_addr.in_addr), ret);
ceph_pr_addr(&con->peer_addr.in_addr), ret);
sock_release(sock);
con->sock = NULL;
con->error_msg = "connect error";
......@@ -336,7 +340,8 @@ static void reset_connection(struct ceph_connection *con)
*/
void ceph_con_close(struct ceph_connection *con)
{
dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr));
dout("con_close %p peer %s\n", con,
ceph_pr_addr(&con->peer_addr.in_addr));
set_bit(CLOSED, &con->state); /* in case there's queued work */
clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
......@@ -349,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con)
mutex_unlock(&con->mutex);
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_close);
/*
* Reopen a closed connection, with a new peer address.
*/
void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
{
dout("con_open %p %s\n", con, pr_addr(&addr->in_addr));
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
set_bit(OPENING, &con->state);
clear_bit(CLOSED, &con->state);
memcpy(&con->peer_addr, addr, sizeof(*addr));
con->delay = 0; /* reset backoff memory */
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_open);
/*
* return true if this connection ever successfully opened
......@@ -408,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
INIT_LIST_HEAD(&con->out_sent);
INIT_DELAYED_WORK(&con->work, con_work);
}
EXPORT_SYMBOL(ceph_con_init);
/*
......@@ -652,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED);
con->out_connect.features = cpu_to_le64(msgr->supported_features);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
......@@ -1013,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con)
{
if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
pr_err("connect to %s got bad banner\n",
pr_addr(&con->peer_addr.in_addr));
ceph_pr_addr(&con->peer_addr.in_addr));
con->error_msg = "protocol error, bad banner";
return -1;
}
......@@ -1116,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end,
addr_set_port(ss, port);
dout("parse_ips got %s\n", pr_addr(ss));
dout("parse_ips got %s\n", ceph_pr_addr(ss));
if (p == end)
break;
......@@ -1136,6 +1144,7 @@ int ceph_parse_ips(const char *c, const char *end,
pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
}
EXPORT_SYMBOL(ceph_parse_ips);
static int process_banner(struct ceph_connection *con)
{
......@@ -1157,9 +1166,9 @@ static int process_banner(struct ceph_connection *con)
!(addr_is_blank(&con->actual_peer_addr.in_addr) &&
con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
pr_warning("wrong peer, want %s/%d, got %s/%d\n",
pr_addr(&con->peer_addr.in_addr),
ceph_pr_addr(&con->peer_addr.in_addr),
(int)le32_to_cpu(con->peer_addr.nonce),
pr_addr(&con->actual_peer_addr.in_addr),
ceph_pr_addr(&con->actual_peer_addr.in_addr),
(int)le32_to_cpu(con->actual_peer_addr.nonce));
con->error_msg = "wrong peer at address";
return -1;
......@@ -1177,7 +1186,7 @@ static int process_banner(struct ceph_connection *con)
addr_set_port(&con->msgr->inst.addr.in_addr, port);
encode_my_addr(con->msgr);
dout("process_banner learned my addr is %s\n",
pr_addr(&con->msgr->inst.addr.in_addr));
ceph_pr_addr(&con->msgr->inst.addr.in_addr));
}
set_bit(NEGOTIATING, &con->state);
......@@ -1198,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con)
static int process_connect(struct ceph_connection *con)
{
u64 sup_feat = CEPH_FEATURE_SUPPORTED;
u64 req_feat = CEPH_FEATURE_REQUIRED;
u64 sup_feat = con->msgr->supported_features;
u64 req_feat = con->msgr->required_features;
u64 server_feat = le64_to_cpu(con->in_reply.features);
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
......@@ -1209,7 +1218,7 @@ static int process_connect(struct ceph_connection *con)
pr_err("%s%lld %s feature set mismatch,"
" my %llx < server's %llx, missing %llx\n",
ENTITY_NAME(con->peer_name),
pr_addr(&con->peer_addr.in_addr),
ceph_pr_addr(&con->peer_addr.in_addr),
sup_feat, server_feat, server_feat & ~sup_feat);
con->error_msg = "missing required protocol features";
fail_protocol(con);
......@@ -1219,7 +1228,7 @@ static int process_connect(struct ceph_connection *con)
pr_err("%s%lld %s protocol version mismatch,"
" my %d != server's %d\n",
ENTITY_NAME(con->peer_name),
pr_addr(&con->peer_addr.in_addr),
ceph_pr_addr(&con->peer_addr.in_addr),
le32_to_cpu(con->out_connect.protocol_version),
le32_to_cpu(con->in_reply.protocol_version));
con->error_msg = "protocol version mismatch";
......@@ -1253,7 +1262,7 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_connect.connect_seq));
pr_err("%s%lld %s connection reset\n",
ENTITY_NAME(con->peer_name),
pr_addr(&con->peer_addr.in_addr));
ceph_pr_addr(&con->peer_addr.in_addr));
reset_connection(con);
prepare_write_connect(con->msgr, con, 0);
prepare_read_connect(con);
......@@ -1298,7 +1307,7 @@ static int process_connect(struct ceph_connection *con)
pr_err("%s%lld %s protocol feature mismatch,"
" my required %llx > server's %llx, need %llx\n",
ENTITY_NAME(con->peer_name),
pr_addr(&con->peer_addr.in_addr),
ceph_pr_addr(&con->peer_addr.in_addr),
req_feat, server_feat, req_feat & ~server_feat);
con->error_msg = "missing required protocol features";
fail_protocol(con);
......@@ -1525,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con)
if ((s64)seq - (s64)con->in_seq < 1) {
pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
ENTITY_NAME(con->peer_name),
pr_addr(&con->peer_addr.in_addr),
ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
......@@ -2023,9 +2032,9 @@ static void con_work(struct work_struct *work)
static void ceph_fault(struct ceph_connection *con)
{
pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
pr_addr(&con->peer_addr.in_addr), con->error_msg);
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, pr_addr(&con->peer_addr.in_addr));
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
if (test_bit(LOSSYTX, &con->state)) {
dout("fault on LOSSYTX channel\n");
......@@ -2085,7 +2094,9 @@ static void ceph_fault(struct ceph_connection *con)
/*
* create a new messenger instance
*/
struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
u32 supported_features,
u32 required_features)
{
struct ceph_messenger *msgr;
......@@ -2093,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
if (msgr == NULL)
return ERR_PTR(-ENOMEM);
msgr->supported_features = supported_features;
msgr->required_features = required_features;
spin_lock_init(&msgr->global_seq_lock);
/* the zero page is needed if a request is "canceled" while the message
......@@ -2115,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
dout("messenger_create %p\n", msgr);
return msgr;
}
EXPORT_SYMBOL(ceph_messenger_create);
void ceph_messenger_destroy(struct ceph_messenger *msgr)
{
......@@ -2124,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
kfree(msgr);
dout("destroyed messenger %p\n", msgr);
}
EXPORT_SYMBOL(ceph_messenger_destroy);
/*
* Queue up an outgoing message on the given connection.
......@@ -2160,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_send);
/*
* Revoke a message that was previously queued for send
......@@ -2225,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con)
test_and_set_bit(WRITE_PENDING, &con->state) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_keepalive);
/*
......@@ -2299,6 +2317,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
pr_err("msg_new can't create type %d front %d\n", type, front_len);
return NULL;
}
EXPORT_SYMBOL(ceph_msg_new);
/*
* Allocate "middle" portion of a message, if it is needed and wasn't
......@@ -2410,6 +2429,7 @@ void ceph_msg_last_put(struct kref *kref)
else
ceph_msg_kfree(m);
}
EXPORT_SYMBOL(ceph_msg_last_put);
void ceph_msg_dump(struct ceph_msg *msg)
{
......@@ -2430,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg)
DUMP_PREFIX_OFFSET, 16, 1,
&msg->footer, sizeof(msg->footer), true);
}
EXPORT_SYMBOL(ceph_msg_dump);
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/sched.h>
#include "mon_client.h"
#include "super.h"
#include "auth.h"
#include "decode.h"
#include <linux/ceph/mon_client.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
/*
* Interact with Ceph monitor cluster. Handle requests for new map
......@@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
pr_addr(&m->mon_inst[i].addr.in_addr));
ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
return m;
bad:
......@@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc)
struct ceph_msg *msg = monc->m_subscribe;
struct ceph_mon_subscribe_item *i;
void *p, *end;
int num;
p = msg->front.iov_base;
end = p + msg->front_max;
dout("__send_subscribe to 'mdsmap' %u+\n",
(unsigned)monc->have_mdsmap);
num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
ceph_encode_32(&p, num);
if (monc->want_next_osdmap) {
dout("__send_subscribe to 'osdmap' %u\n",
(unsigned)monc->have_osdmap);
ceph_encode_32(&p, 3);
ceph_encode_string(&p, end, "osdmap", 6);
i = p;
i->have = cpu_to_le64(monc->have_osdmap);
i->onetime = 1;
p += sizeof(*i);
monc->want_next_osdmap = 2; /* requested */
} else {
ceph_encode_32(&p, 2);
}
ceph_encode_string(&p, end, "mdsmap", 6);
i = p;
i->have = cpu_to_le64(monc->have_mdsmap);
i->onetime = 0;
p += sizeof(*i);
if (monc->want_mdsmap) {
dout("__send_subscribe to 'mdsmap' %u+\n",
(unsigned)monc->have_mdsmap);
ceph_encode_string(&p, end, "mdsmap", 6);
i = p;
i->have = cpu_to_le64(monc->have_mdsmap);
i->onetime = 0;
p += sizeof(*i);
}
ceph_encode_string(&p, end, "monmap", 6);
i = p;
i->have = 0;
......@@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
mutex_lock(&monc->mutex);
if (monc->hunting) {
pr_info("mon%d %s session established\n",
monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr));
monc->cur_mon,
ceph_pr_addr(&monc->con->peer_addr.in_addr));
monc->hunting = false;
}
dout("handle_subscribe_ack after %d seconds\n", seconds);
......@@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
mutex_unlock(&monc->mutex);
return 0;
}
EXPORT_SYMBOL(ceph_monc_got_mdsmap);
int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
{
......@@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
mutex_unlock(&monc->mutex);
return 0;
}
EXPORT_SYMBOL(ceph_monc_open_session);
/*
* The monitor responds with mount ack indicate mount success. The
......@@ -540,6 +548,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
kref_put(&req->kref, release_generic_request);
return err;
}
EXPORT_SYMBOL(ceph_monc_do_statfs);
/*
* pool ops
......@@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc,
pool, 0, (char *)snapid, sizeof(*snapid));
}
EXPORT_SYMBOL(ceph_monc_create_snapid);
int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
u32 pool, u64 snapid)
......@@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work)
*/
static int build_initial_monmap(struct ceph_mon_client *monc)
{
struct ceph_mount_args *args = monc->client->mount_args;
struct ceph_entity_addr *mon_addr = args->mon_addr;
int num_mon = args->num_mon;
struct ceph_options *opt = monc->client->options;
struct ceph_entity_addr *mon_addr = opt->mon_addr;
int num_mon = opt->num_mon;
int i;
/* build initial monmap */
......@@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
}
monc->monmap->num_mon = num_mon;
monc->have_fsid = false;
/* release addr memory */
kfree(args->mon_addr);
args->mon_addr = NULL;
args->num_mon = 0;
return 0;
}
......@@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
monc->con = NULL;
/* authentication */
monc->auth = ceph_auth_init(cl->mount_args->name,
cl->mount_args->secret);
monc->auth = ceph_auth_init(cl->options->name,
cl->options->secret);
if (IS_ERR(monc->auth))
return PTR_ERR(monc->auth);
monc->auth->want_keys =
......@@ -808,6 +813,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
out:
return err;
}
EXPORT_SYMBOL(ceph_monc_init);
void ceph_monc_stop(struct ceph_mon_client *monc)
{
......@@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
kfree(monc->monmap);
}
EXPORT_SYMBOL(ceph_monc_stop);
static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
......@@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
mutex_unlock(&monc->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_monc_validate_auth);
/*
* handle incoming message
......@@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_monc_handle_map(monc, msg);
break;
case CEPH_MSG_MDS_MAP:
ceph_mdsc_handle_map(&monc->client->mdsc, msg);
break;
case CEPH_MSG_OSD_MAP:
ceph_osdc_handle_map(&monc->client->osdc, msg);
break;
default:
/* can the chained handler handle it? */
if (monc->client->extra_mon_dispatch &&
monc->client->extra_mon_dispatch(monc->client, msg) == 0)
break;
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
......@@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con)
if (monc->con && !monc->hunting)
pr_info("mon%d %s session lost, "
"hunting for new mon\n", monc->cur_mon,
pr_addr(&monc->con->peer_addr.in_addr));
ceph_pr_addr(&monc->con->peer_addr.in_addr));
__close_session(monc);
if (!monc->hunting) {
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
#include "msgpool.h"
#include <linux/ceph/msgpool.h>
static void *alloc_fn(gfp_t gfp_mask, void *arg)
{
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/highmem.h>
#include <linux/mm.h>
......@@ -10,12 +11,12 @@
#include <linux/bio.h>
#endif
#include "super.h"
#include "osd_client.h"
#include "messenger.h"
#include "decode.h"
#include "auth.h"
#include "pagelist.h"
#include <linux/ceph/libceph.h>
#include <linux/ceph/osd_client.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
#define OSD_OP_FRONT_LEN 4096
#define OSD_OPREPLY_FRONT_LEN 512
......@@ -70,11 +71,14 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
op->extent.length = objlen;
}
req->r_num_pages = calc_pages_for(off, *plen);
if (op->op == CEPH_OSD_OP_WRITE)
op->payload_len = *plen;
dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
*bno, objoff, objlen, req->r_num_pages);
}
EXPORT_SYMBOL(ceph_calc_raw_layout);
/*
* Implement client access to distributed object storage cluster.
......@@ -154,19 +158,7 @@ void ceph_osdc_release_request(struct kref *kref)
else
kfree(req);
}
static int op_needs_trail(int op)
{
switch (op) {
case CEPH_OSD_OP_GETXATTR:
case CEPH_OSD_OP_SETXATTR:
case CEPH_OSD_OP_CMPXATTR:
case CEPH_OSD_OP_CALL:
return 1;
default:
return 0;
}
}
EXPORT_SYMBOL(ceph_osdc_release_request);
static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
{
......@@ -268,6 +260,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
return req;
}
EXPORT_SYMBOL(ceph_osdc_alloc_request);
static void osd_req_encode_op(struct ceph_osd_request *req,
struct ceph_osd_op *dst,
......@@ -403,6 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
msg->hdr.front_len = cpu_to_le32(msg_size);
return;
}
EXPORT_SYMBOL(ceph_osdc_build_request);
/*
* build new request AND message, calculate layout, and adjust file
......@@ -460,6 +454,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
return req;
}
EXPORT_SYMBOL(ceph_osdc_new_request);
/*
* We keep osd requests in an rbtree, sorted by ->r_tid.
......@@ -614,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
dout("__move_osd_to_lru %p\n", osd);
BUG_ON(!list_empty(&osd->o_osd_lru));
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ;
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
}
static void __remove_osd_from_lru(struct ceph_osd *osd)
......@@ -708,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
{
schedule_delayed_work(&osdc->timeout_work,
osdc->client->mount_args->osd_keepalive_timeout * HZ);
osdc->client->options->osd_keepalive_timeout * HZ);
}
static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
......@@ -909,9 +904,9 @@ static void handle_timeout(struct work_struct *work)
container_of(work, struct ceph_osd_client, timeout_work.work);
struct ceph_osd_request *req, *last_req = NULL;
struct ceph_osd *osd;
unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
unsigned long timeout = osdc->client->options->osd_timeout * HZ;
unsigned long keepalive =
osdc->client->mount_args->osd_keepalive_timeout * HZ;
osdc->client->options->osd_keepalive_timeout * HZ;
unsigned long last_stamp = 0;
struct rb_node *p;
struct list_head slow_osds;
......@@ -998,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work)
container_of(work, struct ceph_osd_client,
osds_timeout_work.work);
unsigned long delay =
osdc->client->mount_args->osd_idle_ttl * HZ >> 2;
osdc->client->options->osd_idle_ttl * HZ >> 2;
dout("osds timeout\n");
down_read(&osdc->map_sem);
......@@ -1360,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
up_read(&osdc->map_sem);
return rc;
}
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
* wait for a request to complete
......@@ -1382,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
return req->r_result;
}
EXPORT_SYMBOL(ceph_osdc_wait_request);
/*
* sync - wait for all in-flight requests to flush. avoid starvation.
......@@ -1415,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
mutex_unlock(&osdc->request_mutex);
dout("sync done (thru tid %llu)\n", last_tid);
}
EXPORT_SYMBOL(ceph_osdc_sync);
/*
* init, shutdown
......@@ -1440,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
schedule_delayed_work(&osdc->osds_timeout_work,
round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ));
round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
err = -ENOMEM;
osdc->req_mempool = mempool_create_kmalloc_pool(10,
......@@ -1466,6 +1464,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
out:
return err;
}
EXPORT_SYMBOL(ceph_osdc_init);
void ceph_osdc_stop(struct ceph_osd_client *osdc)
{
......@@ -1480,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
ceph_msgpool_destroy(&osdc->msgpool_op);
ceph_msgpool_destroy(&osdc->msgpool_op_reply);
}
EXPORT_SYMBOL(ceph_osdc_stop);
/*
* Read some contiguous pages. If we cross a stripe boundary, shorten
......@@ -1517,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
dout("readpages result %d\n", rc);
return rc;
}
EXPORT_SYMBOL(ceph_osdc_readpages);
/*
* do a synchronous write on N pages
......@@ -1559,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
dout("writepages result %d\n", rc);
return rc;
}
EXPORT_SYMBOL(ceph_osdc_writepages);
/*
* handle incoming message
......
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/div64.h>
#include "super.h"
#include "osdmap.h"
#include "crush/hash.h"
#include "crush/mapper.h"
#include "decode.h"
#include <linux/ceph/libceph.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/decode.h>
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>
char *ceph_osdmap_state_str(char *str, int len, int state)
{
......@@ -429,6 +430,7 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
}
return -ENOENT;
}
EXPORT_SYMBOL(ceph_pg_poolid_by_name);
static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{
......@@ -979,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
}
EXPORT_SYMBOL(ceph_calc_file_object_mapping);
/*
* calculate an object layout (i.e. pgid) from an oid,
......@@ -1024,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
ol->ol_stripe_unit = fl->fl_object_stripe_unit;
return 0;
}
EXPORT_SYMBOL(ceph_calc_object_layout);
/*
* Calculate raw osd vector for the given pgid. Return pointer to osd
......@@ -1121,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
return osds[i];
return -1;
}
EXPORT_SYMBOL(ceph_calc_pg_primary);
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include "pagelist.h"
#include <linux/ceph/pagelist.h>
static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
{
......@@ -25,6 +25,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl)
}
return 0;
}
EXPORT_SYMBOL(ceph_pagelist_release);
static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
{
......@@ -61,3 +62,4 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
pl->room -= len;
return 0;
}
EXPORT_SYMBOL(ceph_pagelist_append);
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/namei.h>
#include <linux/writeback.h>
#include <linux/ceph/libceph.h>
/*
* build a vector of user pages
*/
struct page **ceph_get_direct_page_vector(const char __user *data,
int num_pages,
loff_t off, size_t len)
{
struct page **pages;
int rc;
pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
if (!pages)
return ERR_PTR(-ENOMEM);
down_read(&current->mm->mmap_sem);
rc = get_user_pages(current, current->mm, (unsigned long)data,
num_pages, 0, 0, pages, NULL);
up_read(&current->mm->mmap_sem);
if (rc < 0)
goto fail;
return pages;
fail:
kfree(pages);
return ERR_PTR(rc);
}
EXPORT_SYMBOL(ceph_get_direct_page_vector);
void ceph_put_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
kfree(pages);
}
EXPORT_SYMBOL(ceph_put_page_vector);
void ceph_release_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
__free_pages(pages[i], 0);
kfree(pages);
}
EXPORT_SYMBOL(ceph_release_page_vector);
/*
* allocate a vector new pages
*/
struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
{
struct page **pages;
int i;
pages = kmalloc(sizeof(*pages) * num_pages, flags);
if (!pages)
return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++) {
pages[i] = __page_cache_alloc(flags);
if (pages[i] == NULL) {
ceph_release_page_vector(pages, i);
return ERR_PTR(-ENOMEM);
}
}
return pages;
}
EXPORT_SYMBOL(ceph_alloc_page_vector);
/*
* copy user data into a page vector
*/
int ceph_copy_user_to_page_vector(struct page **pages,
const char __user *data,
loff_t off, size_t len)
{
int i = 0;
int po = off & ~PAGE_CACHE_MASK;
int left = len;
int l, bad;
while (left > 0) {
l = min_t(int, PAGE_CACHE_SIZE-po, left);
bad = copy_from_user(page_address(pages[i]) + po, data, l);
if (bad == l)
return -EFAULT;
data += l - bad;
left -= l - bad;
po += l - bad;
if (po == PAGE_CACHE_SIZE) {
po = 0;
i++;
}
}
return len;
}
EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
int ceph_copy_to_page_vector(struct page **pages,
const char *data,
loff_t off, size_t len)
{
int i = 0;
size_t po = off & ~PAGE_CACHE_MASK;
size_t left = len;
size_t l;
while (left > 0) {
l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
memcpy(page_address(pages[i]) + po, data, l);
data += l;
left -= l;
po += l;
if (po == PAGE_CACHE_SIZE) {
po = 0;
i++;
}
}
return len;
}
EXPORT_SYMBOL(ceph_copy_to_page_vector);
int ceph_copy_from_page_vector(struct page **pages,
char *data,
loff_t off, size_t len)
{
int i = 0;
size_t po = off & ~PAGE_CACHE_MASK;
size_t left = len;
size_t l;
while (left > 0) {
l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
memcpy(data, page_address(pages[i]) + po, l);
data += l;
left -= l;
po += l;
if (po == PAGE_CACHE_SIZE) {
po = 0;
i++;
}
}
return len;
}
EXPORT_SYMBOL(ceph_copy_from_page_vector);
/*
* copy user data from a page vector into a user pointer
*/
int ceph_copy_page_vector_to_user(struct page **pages,
char __user *data,
loff_t off, size_t len)
{
int i = 0;
int po = off & ~PAGE_CACHE_MASK;
int left = len;
int l, bad;
while (left > 0) {
l = min_t(int, left, PAGE_CACHE_SIZE-po);
bad = copy_to_user(data, page_address(pages[i]) + po, l);
if (bad == l)
return -EFAULT;
data += l - bad;
left -= l - bad;
if (po) {
po += l - bad;
if (po == PAGE_CACHE_SIZE)
po = 0;
}
i++;
}
return len;
}
EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
/*
* Zero an extent within a page vector. Offset is relative to the
* start of the first page.
*/
void ceph_zero_page_vector_range(int off, int len, struct page **pages)
{
int i = off >> PAGE_CACHE_SHIFT;
off &= ~PAGE_CACHE_MASK;
dout("zero_page_vector_page %u~%u\n", off, len);
/* leading partial page? */
if (off) {
int end = min((int)PAGE_CACHE_SIZE, off + len);
dout("zeroing %d %p head from %d\n", i, pages[i],
(int)off);
zero_user_segment(pages[i], off, end);
len -= (end - off);
i++;
}
while (len >= PAGE_CACHE_SIZE) {
dout("zeroing %d %p len=%d\n", i, pages[i], len);
zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
len -= PAGE_CACHE_SIZE;
i++;
}
/* trailing partial page? */
if (len) {
dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
zero_user_segment(pages[i], 0, len);
}
}
EXPORT_SYMBOL(ceph_zero_page_vector_range);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment