Commit d67ae825 authored by Tom Haynes's avatar Tom Haynes

pnfs/flexfiles: Add the FlexFile Layout Driver

The flexfile layout is a new layout that extends the
file layout. It is currently being drafted as a specification at
https://datatracker.ietf.org/doc/draft-ietf-nfsv4-layout-types/Signed-off-by: default avatarWeston Andros Adamson <dros@primarydata.com>
Signed-off-by: default avatarTom Haynes <loghyr@primarydata.com>
Signed-off-by: default avatarTao Peng <bergwolf@primarydata.com>
parent 5fadeb47
......@@ -128,6 +128,11 @@ config PNFS_OBJLAYOUT
depends on NFS_V4_1 && SCSI_OSD_ULD
default NFS_V4
config PNFS_FLEXFILE_LAYOUT
tristate
depends on NFS_V4_1 && NFS_V3
default m
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
depends on NFS_V4_1
......
......@@ -33,3 +33,4 @@ nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
#
# Makefile for the pNFS Flexfile Layout Driver kernel module
#
obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += nfs_layout_flexfiles.o
nfs_layout_flexfiles-y := flexfilelayout.o flexfilelayoutdev.o
This diff is collapsed.
/*
* NFSv4 flexfile layout driver data structures.
*
* Copyright (c) 2014, Primary Data, Inc. All rights reserved.
*
* Tao Peng <bergwolf@primarydata.com>
*/
#ifndef FS_NFS_NFS4FLEXFILELAYOUT_H
#define FS_NFS_NFS4FLEXFILELAYOUT_H
#include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
struct nfs4_ff_ds_version {
u32 version;
u32 minor_version;
u32 rsize;
u32 wsize;
bool tightly_coupled;
};
/* chained in global deviceid hlist */
struct nfs4_ff_layout_ds {
struct nfs4_deviceid_node id_node;
u32 ds_versions_cnt;
struct nfs4_ff_ds_version *ds_versions;
struct nfs4_pnfs_ds *ds;
};
struct nfs4_ff_layout_ds_err {
struct list_head list; /* linked in mirror error_list */
u64 offset;
u64 length;
int status;
enum nfs_opnum4 opnum;
nfs4_stateid stateid;
struct nfs4_deviceid deviceid;
};
struct nfs4_ff_layout_mirror {
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
struct nfs4_string user_name;
struct nfs4_string group_name;
u32 uid;
u32 gid;
struct rpc_cred *cred;
spinlock_t lock;
};
struct nfs4_ff_layout_segment {
struct pnfs_layout_segment generic_hdr;
u64 stripe_unit;
u32 mirror_array_cnt;
struct nfs4_ff_layout_mirror **mirror_array;
};
struct nfs4_flexfile_layout {
struct pnfs_layout_hdr generic_hdr;
struct pnfs_ds_commit_info commit_info;
struct list_head error_list; /* nfs4_ff_layout_ds_err */
};
static inline struct nfs4_flexfile_layout *
FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
{
return container_of(lo, struct nfs4_flexfile_layout, generic_hdr);
}
static inline struct nfs4_ff_layout_segment *
FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg)
{
return container_of(lseg,
struct nfs4_ff_layout_segment,
generic_hdr);
}
static inline struct nfs4_deviceid_node *
FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
{
if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt ||
FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL ||
FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL)
return NULL;
return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node;
}
static inline struct nfs4_ff_layout_ds *
FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node)
{
return container_of(node, struct nfs4_ff_layout_ds, id_node);
}
static inline struct nfs4_ff_layout_mirror *
FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx)
{
if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt)
return NULL;
return FF_LAYOUT_LSEG(lseg)->mirror_array[idx];
}
static inline u32
FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg)
{
return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt;
}
static inline bool
ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
{
return nfs4_test_deviceid_unavailable(node);
}
static inline int
nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version;
}
struct nfs4_ff_layout_ds *
nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
gfp_t gfp_flags);
void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds);
void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds);
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
struct nfs4_ff_layout_mirror *mirror, u64 offset,
u64 length, int status, enum nfs_opnum4 opnum,
gfp_t gfp_flags);
int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo,
struct xdr_stream *xdr, int *count,
const struct pnfs_layout_range *range);
struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
bool fail_return);
struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,
u32 ds_idx,
struct nfs_client *ds_clp,
struct inode *inode);
struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
u32 ds_idx, struct rpc_cred *mdscred);
bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
#endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */
This diff is collapsed.
......@@ -152,7 +152,7 @@ void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *f
nfs_fattr_free_group_name(fattr);
}
static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
{
unsigned long val;
char buf[16];
......@@ -166,6 +166,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re
*res = val;
return 1;
}
EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric);
static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
{
......
......@@ -7796,9 +7796,7 @@ static void nfs4_layoutreturn_release(void *calldata)
spin_lock(&lo->plh_inode->i_lock);
if (lrp->res.lrs_present)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
pnfs_clear_layoutreturn_waitbit(lo);
clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
lo->plh_block_lgets--;
......
......@@ -910,7 +910,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
pnfs_layout_io_set_failed(lo, range->iomode);
}
return NULL;
}
} else
pnfs_layout_clear_fail_bit(lo,
pnfs_iomode_to_fail_bit(range->iomode));
return lseg;
}
......@@ -930,6 +932,13 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
}
}
void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
{
clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
}
static int
pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
enum pnfs_iomode iomode, bool sync)
......@@ -943,6 +952,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
status = -ENOMEM;
spin_lock(&ino->i_lock);
lo->plh_block_lgets--;
pnfs_clear_layoutreturn_waitbit(lo);
rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
spin_unlock(&ino->i_lock);
pnfs_put_layout_hdr(lo);
......@@ -1418,6 +1428,15 @@ static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
TASK_UNINTERRUPTIBLE);
}
static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
{
unsigned long *bitlock = &lo->plh_flags;
clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
smp_mb__after_atomic();
wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
}
/*
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
......@@ -1499,6 +1518,8 @@ pnfs_update_layout(struct inode *ino,
spin_unlock(&ino->i_lock);
dprintk("%s wait for layoutreturn\n", __func__);
if (pnfs_prepare_to_retry_layoutget(lo)) {
if (first)
pnfs_clear_first_layoutget(lo);
pnfs_put_layout_hdr(lo);
dprintk("%s retrying\n", __func__);
goto lookup_again;
......@@ -1533,13 +1554,8 @@ pnfs_update_layout(struct inode *ino,
pnfs_clear_retry_layoutget(lo);
atomic_dec(&lo->plh_outstanding);
out_put_layout_hdr:
if (first) {
unsigned long *bitlock = &lo->plh_flags;
clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
smp_mb__after_atomic();
wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
}
if (first)
pnfs_clear_first_layoutget(lo);
pnfs_put_layout_hdr(lo);
out:
dprintk("%s: inode %s/%llu pNFS layout segment %s for "
......
......@@ -278,6 +278,7 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
u64 count,
enum pnfs_iomode iomode,
gfp_t gfp_flags);
void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
......
......@@ -516,6 +516,7 @@ enum pnfs_layouttype {
LAYOUT_NFSV4_1_FILES = 1,
LAYOUT_OSD2_OBJECTS = 2,
LAYOUT_BLOCK_VOLUME = 3,
LAYOUT_FLEX_FILES = 4,
};
/* used for both layout return and recall */
......
......@@ -73,5 +73,7 @@ int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t
int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
extern unsigned int nfs_idmap_cache_timeout;
#endif /* NFS_IDMAP_H */
......@@ -89,6 +89,8 @@ void rpc_free_iostats(struct rpc_iostats *);
static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; }
static inline void rpc_count_iostats(const struct rpc_task *task,
struct rpc_iostats *stats) {}
static inline void rpc_count_iostats_metrics(const struct rpc_task *,
struct rpc_iostats *) {}
static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {}
static inline void rpc_free_iostats(struct rpc_iostats *stats) {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment