Commit a71e3604 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Trond made a change to the server's tcp logic that allows a fast
     client to better take advantage of high bandwidth networks, but may
     increase the risk that a single client could starve other clients;
     a new sunrpc.svc_rpc_per_connection_limit parameter should help
     mitigate this in the (hopefully unlikely) event this becomes a
     problem in practice.

   - Tom Haynes added a minimal flex-layout pnfs server, which is of no
     use in production for now--don't build it unless you're doing
     client testing or further server development"

* tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux: (32 commits)
  nfsd: remove some dead code in nfsd_create_locked()
  nfsd: drop unnecessary MAY_EXEC check from create
  nfsd: clean up bad-type check in nfsd_create_locked
  nfsd: remove unnecessary positive-dentry check
  nfsd: reorganize nfsd_create
  nfsd: check d_can_lookup in fh_verify of directories
  nfsd: remove redundant zero-length check from create
  nfsd: Make creates return EEXIST instead of EACCES
  SUNRPC: Detect immediate closure of accepted sockets
  SUNRPC: accept() may return sockets that are still in SYN_RECV
  nfsd: allow nfsd to advertise multiple layout types
  nfsd: Close race between nfsd4_release_lockowner and nfsd4_lock
  nfsd/blocklayout: Make sure calculate signature/designator length aligned
  xfs: abstract block export operations from nfsd layouts
  SUNRPC: Remove unused callback xpo_adjust_wspace()
  SUNRPC: Change TCP socket space reservation
  SUNRPC: Add a server side per-connection limit
  SUNRPC: Micro optimisation for svc_data_ready
  SUNRPC: Call the default socket callbacks instead of open coding
  SUNRPC: lock the socket while detaching it
  ...
parents d58b0d98 2b118859
...@@ -3877,6 +3877,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -3877,6 +3877,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
using these two parameters to set the minimum and using these two parameters to set the minimum and
maximum port values. maximum port values.
sunrpc.svc_rpc_per_connection_limit=
[NFS,SUNRPC]
Limit the number of requests that the server will
process in parallel from a single connection.
The default value is 0 (no limit).
sunrpc.pool_mode= sunrpc.pool_mode=
[NFS] [NFS]
Control how the NFS server code allocates CPUs to Control how the NFS server code allocates CPUs to
......
...@@ -70,6 +70,12 @@ config FS_POSIX_ACL ...@@ -70,6 +70,12 @@ config FS_POSIX_ACL
config EXPORTFS config EXPORTFS
tristate tristate
config EXPORTFS_BLOCK_OPS
bool "Enable filesystem export operations for block IO"
help
This option enables the export operations for a filesystem to support
external block IO.
config FILE_LOCKING config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT bool "Enable POSIX file locking API" if EXPERT
default y default y
......
...@@ -90,6 +90,7 @@ config NFSD_BLOCKLAYOUT ...@@ -90,6 +90,7 @@ config NFSD_BLOCKLAYOUT
bool "NFSv4.1 server support for pNFS block layouts" bool "NFSv4.1 server support for pNFS block layouts"
depends on NFSD_V4 && BLOCK depends on NFSD_V4 && BLOCK
select NFSD_PNFS select NFSD_PNFS
select EXPORTFS_BLOCK_OPS
help help
This option enables support for the exporting pNFS block layouts This option enables support for the exporting pNFS block layouts
in the kernel's NFS server. The pNFS block layout enables NFS in the kernel's NFS server. The pNFS block layout enables NFS
...@@ -102,6 +103,7 @@ config NFSD_SCSILAYOUT ...@@ -102,6 +103,7 @@ config NFSD_SCSILAYOUT
bool "NFSv4.1 server support for pNFS SCSI layouts" bool "NFSv4.1 server support for pNFS SCSI layouts"
depends on NFSD_V4 && BLOCK depends on NFSD_V4 && BLOCK
select NFSD_PNFS select NFSD_PNFS
select EXPORTFS_BLOCK_OPS
help help
This option enables support for the exporting pNFS SCSI layouts This option enables support for the exporting pNFS SCSI layouts
in the kernel's NFS server. The pNFS SCSI layout enables NFS in the kernel's NFS server. The pNFS SCSI layout enables NFS
...@@ -111,6 +113,23 @@ config NFSD_SCSILAYOUT ...@@ -111,6 +113,23 @@ config NFSD_SCSILAYOUT
If unsure, say N. If unsure, say N.
config NFSD_FLEXFILELAYOUT
bool "NFSv4.1 server support for pNFS Flex File layouts"
depends on NFSD_V4
select NFSD_PNFS
help
This option enables support for the exporting pNFS Flex File
layouts in the kernel's NFS server. The pNFS Flex File layout
enables NFS clients to directly perform I/O to NFSv3 devices
accesible to both the server and the clients. See
draft-ietf-nfsv4-flex-files for more details.
Warning, this server implements the bare minimum functionality
to be a flex file server - it is for testing the client,
not for use in production.
If unsure, say N.
config NFSD_V4_SECURITY_LABEL config NFSD_V4_SECURITY_LABEL
bool "Provide Security Label support for NFSv4 server" bool "Provide Security Label support for NFSv4 server"
depends on NFSD_V4 && SECURITY depends on NFSD_V4 && SECURITY
......
...@@ -20,3 +20,4 @@ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ ...@@ -20,3 +20,4 @@ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
...@@ -163,6 +163,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb, ...@@ -163,6 +163,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb,
static __be32 static __be32
nfsd4_block_proc_getdeviceinfo(struct super_block *sb, nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp, struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp) struct nfsd4_getdeviceinfo *gdp)
{ {
...@@ -355,6 +356,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, ...@@ -355,6 +356,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
static __be32 static __be32
nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp, struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp) struct nfsd4_getdeviceinfo *gdp)
{ {
......
...@@ -44,7 +44,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) ...@@ -44,7 +44,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
switch (b->type) { switch (b->type) {
case PNFS_BLOCK_VOLUME_SIMPLE: case PNFS_BLOCK_VOLUME_SIMPLE:
len = 4 + 4 + 8 + 4 + b->simple.sig_len; len = 4 + 4 + 8 + 4 + (XDR_QUADLEN(b->simple.sig_len) << 2);
p = xdr_reserve_space(xdr, len); p = xdr_reserve_space(xdr, len);
if (!p) if (!p)
return -ETOOSMALL; return -ETOOSMALL;
...@@ -55,7 +55,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) ...@@ -55,7 +55,7 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
break; break;
case PNFS_BLOCK_VOLUME_SCSI: case PNFS_BLOCK_VOLUME_SCSI:
len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8; len = 4 + 4 + 4 + 4 + (XDR_QUADLEN(b->scsi.designator_len) << 2) + 8;
p = xdr_reserve_space(xdr, len); p = xdr_reserve_space(xdr, len);
if (!p) if (!p)
return -ETOOSMALL; return -ETOOSMALL;
......
...@@ -706,7 +706,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) ...@@ -706,7 +706,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_fslocs.locations = NULL; new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0; new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0; new->ex_fslocs.migrated = 0;
new->ex_layout_type = 0; new->ex_layout_types = 0;
new->ex_uuid = NULL; new->ex_uuid = NULL;
new->cd = item->cd; new->cd = item->cd;
} }
...@@ -731,7 +731,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) ...@@ -731,7 +731,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
item->ex_fslocs.locations_count = 0; item->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = item->ex_fslocs.migrated; new->ex_fslocs.migrated = item->ex_fslocs.migrated;
item->ex_fslocs.migrated = 0; item->ex_fslocs.migrated = 0;
new->ex_layout_type = item->ex_layout_type; new->ex_layout_types = item->ex_layout_types;
new->ex_nflavors = item->ex_nflavors; new->ex_nflavors = item->ex_nflavors;
for (i = 0; i < MAX_SECINFO_LIST; i++) { for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i]; new->ex_flavors[i] = item->ex_flavors[i];
...@@ -954,6 +954,16 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) ...@@ -954,6 +954,16 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
return 0; return 0;
} }
/* If the compound op contains a spo_must_allowed op,
* it will be sent with integrity/protection which
* will have to be expressly allowed on mounts that
* don't support it
*/
if (nfsd4_spo_must_allow(rqstp))
return 0;
return nfserr_wrongsec; return nfserr_wrongsec;
} }
......
...@@ -57,7 +57,7 @@ struct svc_export { ...@@ -57,7 +57,7 @@ struct svc_export {
struct nfsd4_fs_locations ex_fslocs; struct nfsd4_fs_locations ex_fslocs;
uint32_t ex_nflavors; uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
enum pnfs_layouttype ex_layout_type; u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map; struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd; struct cache_detail *cd;
}; };
......
/*
* Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
*
* The following implements a super-simple flex-file server
* where the NFSv4.1 mds is also the ds. And the storage is
* the same. I.e., writing to the mds via a NFSv4.1 WRITE
* goes to the same location as the NFSv3 WRITE.
*/
#include <linux/slab.h>
#include <linux/nfsd/debug.h>
#include <linux/sunrpc/addr.h>
#include "flexfilelayoutxdr.h"
#include "pnfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
static __be32
nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
struct nfsd4_layoutget *args)
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
u32 device_generation = 0;
int error;
uid_t u;
struct pnfs_ff_layout *fl;
/*
* The super simple flex file server has 1 mirror, 1 data server,
* and 1 file handle. So instead of 4 allocs, do 1 for now.
* Zero it out for the stateid - don't want junk in there!
*/
error = -ENOMEM;
fl = kzalloc(sizeof(*fl), GFP_KERNEL);
if (!fl)
goto out_error;
args->lg_content = fl;
/*
* Avoid layout commit, try to force the I/O to the DS,
* and for fun, cause all IOMODE_RW layout segments to
* effectively be WRITE only.
*/
fl->flags = FF_FLAGS_NO_LAYOUTCOMMIT | FF_FLAGS_NO_IO_THRU_MDS |
FF_FLAGS_NO_READ_IO;
/* Do not allow a IOMODE_READ segment to have write pemissions */
if (seg->iomode == IOMODE_READ) {
u = from_kuid(&init_user_ns, inode->i_uid) + 1;
fl->uid = make_kuid(&init_user_ns, u);
} else
fl->uid = inode->i_uid;
fl->gid = inode->i_gid;
error = nfsd4_set_deviceid(&fl->deviceid, fhp, device_generation);
if (error)
goto out_error;
fl->fh.size = fhp->fh_handle.fh_size;
memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size);
/* Give whole file layout segments */
seg->offset = 0;
seg->length = NFS4_MAX_UINT64;
dprintk("GET: 0x%llx:0x%llx %d\n", seg->offset, seg->length,
seg->iomode);
return 0;
out_error:
seg->length = 0;
return nfserrno(error);
}
static __be32
nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_ff_device_addr *da;
u16 port;
char addr[INET6_ADDRSTRLEN];
da = kzalloc(sizeof(struct pnfs_ff_device_addr), GFP_KERNEL);
if (!da)
return nfserrno(-ENOMEM);
gdp->gd_device = da;
da->version = 3;
da->minor_version = 0;
da->rsize = svc_max_payload(rqstp);
da->wsize = da->rsize;
rpc_ntop((struct sockaddr *)&rqstp->rq_daddr,
addr, INET6_ADDRSTRLEN);
if (rqstp->rq_daddr.ss_family == AF_INET) {
struct sockaddr_in *sin;
sin = (struct sockaddr_in *)&rqstp->rq_daddr;
port = ntohs(sin->sin_port);
snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp");
da->netaddr.netid_len = 3;
} else {
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)&rqstp->rq_daddr;
port = ntohs(sin6->sin6_port);
snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp6");
da->netaddr.netid_len = 4;
}
da->netaddr.addr_len =
snprintf(da->netaddr.addr, FF_ADDR_LEN + 1,
"%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
da->tightly_coupled = false;
return 0;
}
const struct nfsd4_layout_ops ff_layout_ops = {
.notify_types =
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
.proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
.proc_layoutget = nfsd4_ff_proc_layoutget,
.encode_layoutget = nfsd4_ff_encode_layoutget,
};
/*
* Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
*/
#include <linux/sunrpc/svc.h>
#include <linux/nfs4.h>
#include "nfsd.h"
#include "flexfilelayoutxdr.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
struct ff_idmap {
char buf[11];
int len;
};
__be32
nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp)
{
struct pnfs_ff_layout *fl = lgp->lg_content;
int len, mirror_len, ds_len, fh_len;
__be32 *p;
/*
* Unlike nfsd4_encode_user, we know these will
* always be stringified.
*/
struct ff_idmap uid;
struct ff_idmap gid;
fh_len = 4 + fl->fh.size;
uid.len = sprintf(uid.buf, "%u", from_kuid(&init_user_ns, fl->uid));
gid.len = sprintf(gid.buf, "%u", from_kgid(&init_user_ns, fl->gid));
/* 8 + len for recording the length, name, and padding */
ds_len = 20 + sizeof(stateid_opaque_t) + 4 + fh_len +
8 + uid.len + 8 + gid.len;
mirror_len = 4 + ds_len;
/* The layout segment */
len = 20 + mirror_len;
p = xdr_reserve_space(xdr, sizeof(__be32) + len);
if (!p)
return nfserr_toosmall;
*p++ = cpu_to_be32(len);
p = xdr_encode_hyper(p, 0); /* stripe unit of 1 */
*p++ = cpu_to_be32(1); /* single mirror */
*p++ = cpu_to_be32(1); /* single data server */
p = xdr_encode_opaque_fixed(p, &fl->deviceid,
sizeof(struct nfsd4_deviceid));
*p++ = cpu_to_be32(1); /* efficiency */
*p++ = cpu_to_be32(fl->stateid.si_generation);
p = xdr_encode_opaque_fixed(p, &fl->stateid.si_opaque,
sizeof(stateid_opaque_t));
*p++ = cpu_to_be32(1); /* single file handle */
p = xdr_encode_opaque(p, fl->fh.data, fl->fh.size);
p = xdr_encode_opaque(p, uid.buf, uid.len);
p = xdr_encode_opaque(p, gid.buf, gid.len);
*p++ = cpu_to_be32(fl->flags);
*p++ = cpu_to_be32(0); /* No stats collect hint */
return 0;
}
__be32
nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_ff_device_addr *da = gdp->gd_device;
int len;
int ver_len;
int addr_len;
__be32 *p;
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
len = 4 + ver_len + 4 + addr_len;
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
/*
* Fill in the overall length and number of volumes at the beginning
* of the layout.
*/
*p++ = cpu_to_be32(len);
*p++ = cpu_to_be32(1); /* 1 netaddr */
p = xdr_encode_opaque(p, da->netaddr.netid, da->netaddr.netid_len);
p = xdr_encode_opaque(p, da->netaddr.addr, da->netaddr.addr_len);
*p++ = cpu_to_be32(1); /* 1 versions */
*p++ = cpu_to_be32(da->version);
*p++ = cpu_to_be32(da->minor_version);
*p++ = cpu_to_be32(da->rsize);
*p++ = cpu_to_be32(da->wsize);
*p++ = cpu_to_be32(da->tightly_coupled);
return 0;
}
/*
* Copyright (c) 2016 Tom Haynes <loghyr@primarydata.com>
*/
#ifndef _NFSD_FLEXFILELAYOUTXDR_H
#define _NFSD_FLEXFILELAYOUTXDR_H 1
#include <linux/inet.h>
#include "xdr4.h"
#define FF_FLAGS_NO_LAYOUTCOMMIT 1
#define FF_FLAGS_NO_IO_THRU_MDS 2
#define FF_FLAGS_NO_READ_IO 4
struct xdr_stream;
#define FF_NETID_LEN (4)
#define FF_ADDR_LEN (INET6_ADDRSTRLEN + 8)
struct pnfs_ff_netaddr {
char netid[FF_NETID_LEN + 1];
char addr[FF_ADDR_LEN + 1];
u32 netid_len;
u32 addr_len;
};
struct pnfs_ff_device_addr {
struct pnfs_ff_netaddr netaddr;
u32 version;
u32 minor_version;
u32 rsize;
u32 wsize;
bool tightly_coupled;
};
struct pnfs_ff_layout {
u32 flags;
u32 stats_collect_hint;
kuid_t uid;
kgid_t gid;
struct nfsd4_deviceid deviceid;
stateid_t stateid;
struct nfs_fh fh;
};
__be32 nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp);
#endif /* _NFSD_FLEXFILELAYOUTXDR_H */
...@@ -27,6 +27,9 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops; ...@@ -27,6 +27,9 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lock_manager_operations nfsd4_layouts_lm_ops; static const struct lock_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
[LAYOUT_FLEX_FILES] = &ff_layout_ops,
#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT #ifdef CONFIG_NFSD_BLOCKLAYOUT
[LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops,
#endif #endif
...@@ -122,28 +125,35 @@ nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, ...@@ -122,28 +125,35 @@ nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
void nfsd4_setup_layout_type(struct svc_export *exp) void nfsd4_setup_layout_type(struct svc_export *exp)
{ {
#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
struct super_block *sb = exp->ex_path.mnt->mnt_sb; struct super_block *sb = exp->ex_path.mnt->mnt_sb;
#endif
if (!(exp->ex_flags & NFSEXP_PNFS)) if (!(exp->ex_flags & NFSEXP_PNFS))
return; return;
/* /*
* Check if the file system supports exporting a block-like layout. * If flex file is configured, use it by default. Otherwise
* check if the file system supports exporting a block-like layout.
* If the block device supports reservations prefer the SCSI layout, * If the block device supports reservations prefer the SCSI layout,
* otherwise advertise the block layout. * otherwise advertise the block layout.
*/ */
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES;
#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT #ifdef CONFIG_NFSD_BLOCKLAYOUT
/* overwrite flex file layout selection if needed */
if (sb->s_export_op->get_uuid && if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks && sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks) sb->s_export_op->commit_blocks)
exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME;
#endif #endif
#ifdef CONFIG_NFSD_SCSILAYOUT #ifdef CONFIG_NFSD_SCSILAYOUT
/* overwrite block layout selection if needed */ /* overwrite block layout selection if needed */
if (sb->s_export_op->map_blocks && if (sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks && sb->s_export_op->commit_blocks &&
sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
exp->ex_layout_type = LAYOUT_SCSI; exp->ex_layout_types |= 1 << LAYOUT_SCSI;
#endif #endif
} }
......
...@@ -605,8 +605,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -605,8 +605,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fh_init(&resfh, NFS4_FHSIZE); fh_init(&resfh, NFS4_FHSIZE);
status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_NOP);
NFSD_MAY_CREATE);
if (status) if (status)
return status; return status;
...@@ -1219,12 +1218,12 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -1219,12 +1218,12 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static const struct nfsd4_layout_ops * static const struct nfsd4_layout_ops *
nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
{ {
if (!exp->ex_layout_type) { if (!exp->ex_layout_types) {
dprintk("%s: export does not support pNFS\n", __func__); dprintk("%s: export does not support pNFS\n", __func__);
return NULL; return NULL;
} }
if (exp->ex_layout_type != layout_type) { if (!(exp->ex_layout_types & (1 << layout_type))) {
dprintk("%s: layout type %d not supported\n", dprintk("%s: layout type %d not supported\n",
__func__, layout_type); __func__, layout_type);
return NULL; return NULL;
...@@ -1270,7 +1269,7 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, ...@@ -1270,7 +1269,7 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
nfserr = nfs_ok; nfserr = nfs_ok;
if (gdp->gd_maxcount != 0) { if (gdp->gd_maxcount != 0) {
nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
cstate->session->se_client, gdp); rqstp, cstate->session->se_client, gdp);
} }
gdp->gd_notify_types &= ops->notify_types; gdp->gd_notify_types &= ops->notify_types;
...@@ -2335,6 +2334,45 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2335,6 +2334,45 @@ static struct nfsd4_operation nfsd4_ops[] = {
}, },
}; };
/**
* nfsd4_spo_must_allow - Determine if the compound op contains an
* operation that is allowed to be sent with machine credentials
*
* @rqstp: a pointer to the struct svc_rqst
*
* Checks to see if the compound contains a spo_must_allow op
* and confirms that it was sent with the proper machine creds.
*/
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_compoundargs *argp = rqstp->rq_argp;
struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
struct nfsd4_compound_state *cstate = &resp->cstate;
struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow;
u32 opiter;
if (!cstate->minorversion)
return false;
if (cstate->spo_must_allowed == true)
return true;
opiter = resp->opcnt;
while (opiter < argp->opcnt) {
this = &argp->ops[opiter++];
if (test_bit(this->opnum, allow->u.longs) &&
cstate->clp->cl_mach_cred &&
nfsd4_mach_creds_match(cstate->clp, rqstp)) {
cstate->spo_must_allowed = true;
return true;
}
}
cstate->spo_must_allowed = false;
return false;
}
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
struct nfsd4_operation *opdesc; struct nfsd4_operation *opdesc;
......
...@@ -1200,27 +1200,6 @@ free_ol_stateid_reaplist(struct list_head *reaplist) ...@@ -1200,27 +1200,6 @@ free_ol_stateid_reaplist(struct list_head *reaplist)
} }
} }
static void release_lockowner(struct nfs4_lockowner *lo)
{
struct nfs4_client *clp = lo->lo_owner.so_client;
struct nfs4_ol_stateid *stp;
struct list_head reaplist;
INIT_LIST_HEAD(&reaplist);
spin_lock(&clp->cl_lock);
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid, st_perstateowner);
WARN_ON(!unhash_lock_stateid(stp));
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
nfs4_put_stateowner(&lo->lo_owner);
}
static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
struct list_head *reaplist) struct list_head *reaplist)
{ {
...@@ -1972,7 +1951,7 @@ static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) ...@@ -1972,7 +1951,7 @@ static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
service == RPC_GSS_SVC_PRIVACY; service == RPC_GSS_SVC_PRIVACY;
} }
static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
{ {
struct svc_cred *cr = &rqstp->rq_cred; struct svc_cred *cr = &rqstp->rq_cred;
...@@ -2388,6 +2367,22 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, ...@@ -2388,6 +2367,22 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
switch (exid->spa_how) { switch (exid->spa_how) {
case SP4_MACH_CRED: case SP4_MACH_CRED:
exid->spo_must_enforce[0] = 0;
exid->spo_must_enforce[1] = (
1 << (OP_BIND_CONN_TO_SESSION - 32) |
1 << (OP_EXCHANGE_ID - 32) |
1 << (OP_CREATE_SESSION - 32) |
1 << (OP_DESTROY_SESSION - 32) |
1 << (OP_DESTROY_CLIENTID - 32));
exid->spo_must_allow[0] &= (1 << (OP_CLOSE) |
1 << (OP_OPEN_DOWNGRADE) |
1 << (OP_LOCKU) |
1 << (OP_DELEGRETURN));
exid->spo_must_allow[1] &= (
1 << (OP_TEST_STATEID - 32) |
1 << (OP_FREE_STATEID - 32));
if (!svc_rqst_integrity_protected(rqstp)) { if (!svc_rqst_integrity_protected(rqstp)) {
status = nfserr_inval; status = nfserr_inval;
goto out_nolock; goto out_nolock;
...@@ -2424,7 +2419,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, ...@@ -2424,7 +2419,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
status = nfserr_inval; status = nfserr_inval;
goto out; goto out;
} }
if (!mach_creds_match(conf, rqstp)) { if (!nfsd4_mach_creds_match(conf, rqstp)) {
status = nfserr_wrong_cred; status = nfserr_wrong_cred;
goto out; goto out;
} }
...@@ -2473,6 +2468,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, ...@@ -2473,6 +2468,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
goto out; goto out;
} }
new->cl_minorversion = cstate->minorversion; new->cl_minorversion = cstate->minorversion;
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
gen_clid(new, nn); gen_clid(new, nn);
add_to_unconfirmed(new); add_to_unconfirmed(new);
...@@ -2676,7 +2673,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, ...@@ -2676,7 +2673,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
if (conf) { if (conf) {
status = nfserr_wrong_cred; status = nfserr_wrong_cred;
if (!mach_creds_match(conf, rqstp)) if (!nfsd4_mach_creds_match(conf, rqstp))
goto out_free_conn; goto out_free_conn;
cs_slot = &conf->cl_cs_slot; cs_slot = &conf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
...@@ -2692,7 +2689,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, ...@@ -2692,7 +2689,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_conn; goto out_free_conn;
} }
status = nfserr_wrong_cred; status = nfserr_wrong_cred;
if (!mach_creds_match(unconf, rqstp)) if (!nfsd4_mach_creds_match(unconf, rqstp))
goto out_free_conn; goto out_free_conn;
cs_slot = &unconf->cl_cs_slot; cs_slot = &unconf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
...@@ -2801,7 +2798,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, ...@@ -2801,7 +2798,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
if (!session) if (!session)
goto out_no_session; goto out_no_session;
status = nfserr_wrong_cred; status = nfserr_wrong_cred;
if (!mach_creds_match(session->se_client, rqstp)) if (!nfsd4_mach_creds_match(session->se_client, rqstp))
goto out; goto out;
status = nfsd4_map_bcts_dir(&bcts->dir); status = nfsd4_map_bcts_dir(&bcts->dir);
if (status) if (status)
...@@ -2848,7 +2845,7 @@ nfsd4_destroy_session(struct svc_rqst *r, ...@@ -2848,7 +2845,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
if (!ses) if (!ses)
goto out_client_lock; goto out_client_lock;
status = nfserr_wrong_cred; status = nfserr_wrong_cred;
if (!mach_creds_match(ses->se_client, r)) if (!nfsd4_mach_creds_match(ses->se_client, r))
goto out_put_session; goto out_put_session;
status = mark_session_dead_locked(ses, 1 + ref_held_by_me); status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
if (status) if (status)
...@@ -3087,7 +3084,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta ...@@ -3087,7 +3084,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
status = nfserr_stale_clientid; status = nfserr_stale_clientid;
goto out; goto out;
} }
if (!mach_creds_match(clp, rqstp)) { if (!nfsd4_mach_creds_match(clp, rqstp)) {
clp = NULL; clp = NULL;
status = nfserr_wrong_cred; status = nfserr_wrong_cred;
goto out; goto out;
...@@ -5945,6 +5942,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, ...@@ -5945,6 +5942,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
__be32 status; __be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfs4_client *clp; struct nfs4_client *clp;
LIST_HEAD (reaplist);
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id); clid->cl_boot, clid->cl_id);
...@@ -5975,9 +5973,23 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, ...@@ -5975,9 +5973,23 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
nfs4_get_stateowner(sop); nfs4_get_stateowner(sop);
break; break;
} }
if (!lo) {
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
if (lo) return status;
release_lockowner(lo); }
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid,
st_perstateowner);
WARN_ON(!unhash_lock_stateid(stp));
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
nfs4_put_stateowner(&lo->lo_owner);
return status; return status;
} }
......
...@@ -1299,16 +1299,14 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, ...@@ -1299,16 +1299,14 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
break; break;
case SP4_MACH_CRED: case SP4_MACH_CRED:
/* spo_must_enforce */ /* spo_must_enforce */
READ_BUF(4); status = nfsd4_decode_bitmap(argp,
dummy = be32_to_cpup(p++); exid->spo_must_enforce);
READ_BUF(dummy * 4); if (status)
p += dummy; goto out;
/* spo_must_allow */ /* spo_must_allow */
READ_BUF(4); status = nfsd4_decode_bitmap(argp, exid->spo_must_allow);
dummy = be32_to_cpup(p++); if (status)
READ_BUF(dummy * 4); goto out;
p += dummy;
break; break;
case SP4_SSV: case SP4_SSV:
/* ssp_ops */ /* ssp_ops */
...@@ -2164,22 +2162,20 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, ...@@ -2164,22 +2162,20 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
} }
static inline __be32 static inline __be32
nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type) nfsd4_encode_layout_types(struct xdr_stream *xdr, u32 layout_types)
{ {
__be32 *p; __be32 *p;
unsigned long i = hweight_long(layout_types);
if (layout_type) { p = xdr_reserve_space(xdr, 4 + 4 * i);
p = xdr_reserve_space(xdr, 8);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(1);
*p++ = cpu_to_be32(layout_type);
} else {
p = xdr_reserve_space(xdr, 4);
if (!p) if (!p)
return nfserr_resource; return nfserr_resource;
*p++ = cpu_to_be32(0);
} *p++ = cpu_to_be32(i);
for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i)
if (layout_types & (1 << i))
*p++ = cpu_to_be32(i);
return 0; return 0;
} }
...@@ -2754,13 +2750,13 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, ...@@ -2754,13 +2750,13 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
} }
#ifdef CONFIG_NFSD_PNFS #ifdef CONFIG_NFSD_PNFS
if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
if (status) if (status)
goto out; goto out;
} }
if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) { if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
if (status) if (status)
goto out; goto out;
} }
...@@ -3867,14 +3863,6 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w ...@@ -3867,14 +3863,6 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
return nfserr; return nfserr;
} }
static const u32 nfs4_minimal_spo_must_enforce[2] = {
[1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
1 << (OP_EXCHANGE_ID - 32) |
1 << (OP_CREATE_SESSION - 32) |
1 << (OP_DESTROY_SESSION - 32) |
1 << (OP_DESTROY_CLIENTID - 32)
};
static __be32 static __be32
nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_exchange_id *exid) struct nfsd4_exchange_id *exid)
...@@ -3885,6 +3873,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, ...@@ -3885,6 +3873,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
char *server_scope; char *server_scope;
int major_id_sz; int major_id_sz;
int server_scope_sz; int server_scope_sz;
int status = 0;
uint64_t minor_id = 0; uint64_t minor_id = 0;
if (nfserr) if (nfserr)
...@@ -3913,18 +3902,20 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, ...@@ -3913,18 +3902,20 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
case SP4_NONE: case SP4_NONE:
break; break;
case SP4_MACH_CRED: case SP4_MACH_CRED:
/* spo_must_enforce, spo_must_allow */
p = xdr_reserve_space(xdr, 16);
if (!p)
return nfserr_resource;
/* spo_must_enforce bitmap: */ /* spo_must_enforce bitmap: */
*p++ = cpu_to_be32(2); status = nfsd4_encode_bitmap(xdr,
*p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); exid->spo_must_enforce[0],
*p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]); exid->spo_must_enforce[1],
/* empty spo_must_allow bitmap: */ exid->spo_must_enforce[2]);
*p++ = cpu_to_be32(0); if (status)
goto out;
/* spo_must_allow bitmap: */
status = nfsd4_encode_bitmap(xdr,
exid->spo_must_allow[0],
exid->spo_must_allow[1],
exid->spo_must_allow[2]);
if (status)
goto out;
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
...@@ -3951,6 +3942,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, ...@@ -3951,6 +3942,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
/* Implementation id */ /* Implementation id */
*p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */
return 0; return 0;
out:
return status;
} }
static __be32 static __be32
......
...@@ -124,6 +124,7 @@ void nfs4_state_shutdown_net(struct net *net); ...@@ -124,6 +124,7 @@ void nfs4_state_shutdown_net(struct net *net);
void nfs4_reset_lease(time_t leasetime); void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir); int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void); char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
#else #else
static inline int nfsd4_init_slabs(void) { return 0; } static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { } static inline void nfsd4_free_slabs(void) { }
...@@ -134,6 +135,10 @@ static inline void nfs4_state_shutdown_net(struct net *net) { } ...@@ -134,6 +135,10 @@ static inline void nfs4_state_shutdown_net(struct net *net) { }
static inline void nfs4_reset_lease(time_t leasetime) { } static inline void nfs4_reset_lease(time_t leasetime) { }
static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
static inline char * nfs4_recoverydir(void) {return NULL; } static inline char * nfs4_recoverydir(void) {return NULL; }
static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
{
return false;
}
#endif #endif
/* /*
......
...@@ -59,14 +59,20 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) ...@@ -59,14 +59,20 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
* the write call). * the write call).
*/ */
static inline __be32 static inline __be32
nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested) nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
umode_t requested)
{ {
mode &= S_IFMT; umode_t mode = d_inode(dentry)->i_mode & S_IFMT;
if (requested == 0) /* the caller doesn't care */ if (requested == 0) /* the caller doesn't care */
return nfs_ok; return nfs_ok;
if (mode == requested) if (mode == requested) {
if (mode == S_IFDIR && !d_can_lookup(dentry)) {
WARN_ON_ONCE(1);
return nfserr_notdir;
}
return nfs_ok; return nfs_ok;
}
/* /*
* v4 has an error more specific than err_notdir which we should * v4 has an error more specific than err_notdir which we should
* return in preference to err_notdir: * return in preference to err_notdir:
...@@ -298,7 +304,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) ...@@ -298,7 +304,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
* that it expects something not of the given type. * that it expects something not of the given type.
* *
* @access is formed from the NFSD_MAY_* constants defined in * @access is formed from the NFSD_MAY_* constants defined in
* include/linux/nfsd/nfsd.h. * fs/nfsd/vfs.h.
*/ */
__be32 __be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
...@@ -340,7 +346,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) ...@@ -340,7 +346,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
if (error) if (error)
goto out; goto out;
error = nfsd_mode_check(rqstp, d_inode(dentry)->i_mode, type); error = nfsd_mode_check(rqstp, dentry, type);
if (error) if (error)
goto out; goto out;
......
...@@ -251,9 +251,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, ...@@ -251,9 +251,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
/* Check for NFSD_MAY_WRITE in nfsd_create if necessary */ /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
nfserr = nfserr_acces;
if (!argp->len)
goto done;
nfserr = nfserr_exist; nfserr = nfserr_exist;
if (isdotent(argp->name, argp->len)) if (isdotent(argp->name, argp->len))
goto done; goto done;
...@@ -362,8 +359,8 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, ...@@ -362,8 +359,8 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = 0; nfserr = 0;
if (!inode) { if (!inode) {
/* File doesn't exist. Create it and set attrs */ /* File doesn't exist. Create it and set attrs */
nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len, nfserr = nfsd_create_locked(rqstp, dirfhp, argp->name,
attr, type, rdev, newfhp); argp->len, attr, type, rdev, newfhp);
} else if (type == S_IFREG) { } else if (type == S_IFREG) {
dprintk("nfsd: existing %s, valid=%x, size=%ld\n", dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
argp->name, attr->ia_valid, (long) attr->ia_size); argp->name, attr->ia_valid, (long) attr->ia_size);
......
...@@ -21,6 +21,7 @@ struct nfsd4_layout_ops { ...@@ -21,6 +21,7 @@ struct nfsd4_layout_ops {
u32 notify_types; u32 notify_types;
__be32 (*proc_getdeviceinfo)(struct super_block *sb, __be32 (*proc_getdeviceinfo)(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp, struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdevp); struct nfsd4_getdeviceinfo *gdevp);
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
...@@ -44,6 +45,9 @@ extern const struct nfsd4_layout_ops bl_layout_ops; ...@@ -44,6 +45,9 @@ extern const struct nfsd4_layout_ops bl_layout_ops;
#ifdef CONFIG_NFSD_SCSILAYOUT #ifdef CONFIG_NFSD_SCSILAYOUT
extern const struct nfsd4_layout_ops scsi_layout_ops; extern const struct nfsd4_layout_ops scsi_layout_ops;
#endif #endif
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
extern const struct nfsd4_layout_ops ff_layout_ops;
#endif
__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid, struct nfsd4_compound_state *cstate, stateid_t *stateid,
......
...@@ -345,6 +345,7 @@ struct nfs4_client { ...@@ -345,6 +345,7 @@ struct nfs4_client {
u32 cl_exchange_flags; u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */ /* number of rpc's in progress over an associated session: */
atomic_t cl_refcount; atomic_t cl_refcount;
struct nfs4_op_map cl_spo_must_allow;
/* for nfs41 callbacks */ /* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */ /* We currently support a single back channel with a single slot */
......
...@@ -1135,96 +1135,37 @@ nfsd_check_ignore_resizing(struct iattr *iap) ...@@ -1135,96 +1135,37 @@ nfsd_check_ignore_resizing(struct iattr *iap)
iap->ia_valid &= ~ATTR_SIZE; iap->ia_valid &= ~ATTR_SIZE;
} }
/* /* The parent directory should already be locked: */
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
* If the response fh has been verified, the parent directory should
* already be locked. Note that the parent directory is left locked.
*
* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
*/
__be32 __be32
nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap, char *fname, int flen, struct iattr *iap,
int type, dev_t rdev, struct svc_fh *resfhp) int type, dev_t rdev, struct svc_fh *resfhp)
{ {
struct dentry *dentry, *dchild = NULL; struct dentry *dentry, *dchild;
struct inode *dirp; struct inode *dirp;
__be32 err; __be32 err;
__be32 err2; __be32 err2;
int host_err; int host_err;
err = nfserr_perm;
if (!flen)
goto out;
err = nfserr_exist;
if (isdotent(fname, flen))
goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
dentry = fhp->fh_dentry; dentry = fhp->fh_dentry;
dirp = d_inode(dentry); dirp = d_inode(dentry);
err = nfserr_notdir;
if (!dirp->i_op->lookup)
goto out;
/*
* Check whether the response file handle has been verified yet.
* If it has, the parent directory should already be locked.
*/
if (!resfhp->fh_dentry) {
host_err = fh_want_write(fhp);
if (host_err)
goto out_nfserr;
/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
goto out_nfserr;
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
if (err)
goto out;
} else {
/* called from nfsd_proc_create */
dchild = dget(resfhp->fh_dentry); dchild = dget(resfhp->fh_dentry);
if (!fhp->fh_locked) { if (!fhp->fh_locked) {
/* not actually possible */ WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
printk(KERN_ERR
"nfsd_create: parent %pd2 not locked!\n",
dentry); dentry);
err = nfserr_io; err = nfserr_io;
goto out; goto out;
} }
}
/* err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
* Make sure the child dentry is still negative ... if (err)
*/
err = nfserr_exist;
if (d_really_is_positive(dchild)) {
dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
dentry, dchild);
goto out; goto out;
}
if (!(iap->ia_valid & ATTR_MODE)) if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0; iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
err = nfserr_inval;
if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
type);
goto out;
}
/*
* Get the dir op function pointer.
*/
err = 0; err = 0;
host_err = 0; host_err = 0;
switch (type) { switch (type) {
...@@ -1242,6 +1183,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1242,6 +1183,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFSOCK: case S_IFSOCK:
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break; break;
default:
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
type);
host_err = -EINVAL;
} }
if (host_err < 0) if (host_err < 0)
goto out_nfserr; goto out_nfserr;
...@@ -1251,7 +1196,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1251,7 +1196,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
/* /*
* nfsd_create_setattr already committed the child. Transactional * nfsd_create_setattr already committed the child. Transactional
* filesystems had a chance to commit changes for both parent and * filesystems had a chance to commit changes for both parent and
* child * simultaneously making the following commit_metadata a * child simultaneously making the following commit_metadata a
* noop. * noop.
*/ */
err2 = nfserrno(commit_metadata(fhp)); err2 = nfserrno(commit_metadata(fhp));
...@@ -1263,7 +1208,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1263,7 +1208,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!err) if (!err)
err = fh_update(resfhp); err = fh_update(resfhp);
out: out:
if (dchild && !IS_ERR(dchild))
dput(dchild); dput(dchild);
return err; return err;
...@@ -1272,6 +1216,50 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1272,6 +1216,50 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out; goto out;
} }
/*
* Create a filesystem object (regular, directory, special).
* Note that the parent directory is left locked.
*
* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
*/
__be32
nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap,
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
int host_err;
if (isdotent(fname, flen))
return nfserr_exist;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP);
if (err)
return err;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);
fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
return nfserrno(host_err);
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
if (err) {
dput(dchild);
return err;
}
return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
rdev, resfhp);
}
#ifdef CONFIG_NFSD_V3 #ifdef CONFIG_NFSD_V3
/* /*
...@@ -1304,12 +1292,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1304,12 +1292,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry; dentry = fhp->fh_dentry;
dirp = d_inode(dentry); dirp = d_inode(dentry);
/* Get all the sanity checks out of the way before
* we lock the parent. */
err = nfserr_notdir;
if (!dirp->i_op->lookup)
goto out;
host_err = fh_want_write(fhp); host_err = fh_want_write(fhp);
if (host_err) if (host_err)
goto out_nfserr; goto out_nfserr;
......
...@@ -59,6 +59,9 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, ...@@ -59,6 +59,9 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
__be32 nfsd4_clone_file_range(struct file *, u64, struct file *, __be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
u64, u64); u64, u64);
#endif /* CONFIG_NFSD_V4 */ #endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *, __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs, char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res); int type, dev_t rdev, struct svc_fh *res);
......
...@@ -59,6 +59,7 @@ struct nfsd4_compound_state { ...@@ -59,6 +59,7 @@ struct nfsd4_compound_state {
struct nfsd4_session *session; struct nfsd4_session *session;
struct nfsd4_slot *slot; struct nfsd4_slot *slot;
int data_offset; int data_offset;
bool spo_must_allowed;
size_t iovlen; size_t iovlen;
u32 minorversion; u32 minorversion;
__be32 status; __be32 status;
...@@ -403,6 +404,8 @@ struct nfsd4_exchange_id { ...@@ -403,6 +404,8 @@ struct nfsd4_exchange_id {
clientid_t clientid; clientid_t clientid;
u32 seqid; u32 seqid;
int spa_how; int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
}; };
struct nfsd4_sequence { struct nfsd4_sequence {
...@@ -654,6 +657,8 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) ...@@ -654,6 +657,8 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
} }
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
struct nfsd4_compoundargs *); struct nfsd4_compoundargs *);
......
...@@ -121,5 +121,4 @@ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o ...@@ -121,5 +121,4 @@ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o
...@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = { ...@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = {
.fh_to_parent = xfs_fs_fh_to_parent, .fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent, .get_parent = xfs_fs_get_parent,
.commit_metadata = xfs_fs_nfs_commit_metadata, .commit_metadata = xfs_fs_nfs_commit_metadata,
#ifdef CONFIG_NFSD_BLOCKLAYOUT #ifdef CONFIG_EXPORTFS_BLOCK_OPS
.get_uuid = xfs_fs_get_uuid, .get_uuid = xfs_fs_get_uuid,
.map_blocks = xfs_fs_map_blocks, .map_blocks = xfs_fs_map_blocks,
.commit_blocks = xfs_fs_commit_blocks, .commit_blocks = xfs_fs_commit_blocks,
......
#ifndef _XFS_PNFS_H #ifndef _XFS_PNFS_H
#define _XFS_PNFS_H 1 #define _XFS_PNFS_H 1
#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT) #ifdef CONFIG_EXPORTFS_BLOCK_OPS
int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
struct iomap *iomap, bool write, u32 *device_generation); struct iomap *iomap, bool write, u32 *device_generation);
...@@ -15,5 +15,5 @@ xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex) ...@@ -15,5 +15,5 @@ xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
{ {
return 0; return 0;
} }
#endif /* CONFIG_NFSD_PNFS */ #endif /* CONFIG_EXPORTFS_BLOCK_OPS */
#endif /* _XFS_PNFS_H */ #endif /* _XFS_PNFS_H */
...@@ -643,4 +643,15 @@ enum pnfs_update_layout_reason { ...@@ -643,4 +643,15 @@ enum pnfs_update_layout_reason {
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
}; };
#define NFS4_OP_MAP_NUM_LONGS \
DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
#define NFS4_OP_MAP_NUM_WORDS \
(NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
struct nfs4_op_map {
union {
unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
u32 words[NFS4_OP_MAP_NUM_WORDS];
} u;
};
#endif #endif
...@@ -1185,17 +1185,6 @@ struct pnfs_ds_commit_info { ...@@ -1185,17 +1185,6 @@ struct pnfs_ds_commit_info {
struct pnfs_commit_bucket *buckets; struct pnfs_commit_bucket *buckets;
}; };
#define NFS4_OP_MAP_NUM_LONGS \
DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
#define NFS4_OP_MAP_NUM_WORDS \
(NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
struct nfs4_op_map {
union {
unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
u32 words[NFS4_OP_MAP_NUM_WORDS];
} u;
};
struct nfs41_state_protection { struct nfs41_state_protection {
u32 how; u32 how;
struct nfs4_op_map enforce; struct nfs4_op_map enforce;
......
...@@ -78,8 +78,6 @@ struct cache_detail { ...@@ -78,8 +78,6 @@ struct cache_detail {
struct hlist_head * hash_table; struct hlist_head * hash_table;
rwlock_t hash_lock; rwlock_t hash_lock;
atomic_t inuse; /* active user-space update or lookup */
char *name; char *name;
void (*cache_put)(struct kref *); void (*cache_put)(struct kref *);
......
...@@ -268,6 +268,7 @@ struct svc_rqst { ...@@ -268,6 +268,7 @@ struct svc_rqst {
* cache pages */ * cache pages */
#define RQ_VICTIM (5) /* about to be shut down */ #define RQ_VICTIM (5) /* about to be shut down */
#define RQ_BUSY (6) /* request is busy */ #define RQ_BUSY (6) /* request is busy */
#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */ unsigned long rq_flags; /* flags field */
void * rq_argp; /* decoded arguments */ void * rq_argp; /* decoded arguments */
......
...@@ -25,7 +25,6 @@ struct svc_xprt_ops { ...@@ -25,7 +25,6 @@ struct svc_xprt_ops {
void (*xpo_detach)(struct svc_xprt *); void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *); int (*xpo_secure_port)(struct svc_rqst *);
void (*xpo_adjust_wspace)(struct svc_xprt *);
}; };
struct svc_xprt_class { struct svc_xprt_class {
...@@ -69,6 +68,7 @@ struct svc_xprt { ...@@ -69,6 +68,7 @@ struct svc_xprt {
struct svc_serv *xpt_server; /* service for transport */ struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */ atomic_t xpt_reserved; /* space on outq that is rsvd */
atomic_t xpt_nr_rqsts; /* Number of requests */
struct mutex xpt_mutex; /* to serialize sending data */ struct mutex xpt_mutex; /* to serialize sending data */
spinlock_t xpt_lock; /* protects sk_deferred spinlock_t xpt_lock; /* protects sk_deferred
* and xpt_auth_cache */ * and xpt_auth_cache */
......
...@@ -473,6 +473,39 @@ TRACE_EVENT(svc_recv, ...@@ -473,6 +473,39 @@ TRACE_EVENT(svc_recv,
show_rqstp_flags(__entry->flags)) show_rqstp_flags(__entry->flags))
); );
DECLARE_EVENT_CLASS(svc_rqst_event,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(__be32, xid)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),
TP_fast_assign(
__entry->xid = rqst->rq_xid;
__entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen);
),
TP_printk("addr=%pIScp rq_xid=0x%x flags=%s",
(struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid),
show_rqstp_flags(__entry->flags))
);
DEFINE_EVENT(svc_rqst_event, svc_defer,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst));
DEFINE_EVENT(svc_rqst_event, svc_drop,
TP_PROTO(struct svc_rqst *rqst),
TP_ARGS(rqst));
DECLARE_EVENT_CLASS(svc_rqst_status, DECLARE_EVENT_CLASS(svc_rqst_status,
TP_PROTO(struct svc_rqst *rqst, int status), TP_PROTO(struct svc_rqst *rqst, int status),
...@@ -529,45 +562,67 @@ TRACE_EVENT(svc_xprt_do_enqueue, ...@@ -529,45 +562,67 @@ TRACE_EVENT(svc_xprt_do_enqueue,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct svc_xprt *, xprt) __field(struct svc_xprt *, xprt)
__field_struct(struct sockaddr_storage, ss)
__field(int, pid) __field(int, pid)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ?
xprt->xpt_remotelen : 0)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xprt = xprt; __entry->xprt = xprt;
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->pid = rqst? rqst->rq_task->pid : 0; __entry->pid = rqst? rqst->rq_task->pid : 0;
__entry->flags = xprt ? xprt->xpt_flags : 0; if (xprt) {
memcpy(__get_dynamic_array(addr),
&xprt->xpt_remote,
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
), ),
TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss, __get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->pid, show_svc_xprt_flags(__entry->flags)) __entry->pid, show_svc_xprt_flags(__entry->flags))
); );
TRACE_EVENT(svc_xprt_dequeue, DECLARE_EVENT_CLASS(svc_xprt_event,
TP_PROTO(struct svc_xprt *xprt), TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt), TP_ARGS(xprt),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct svc_xprt *, xprt) __field(struct svc_xprt *, xprt)
__field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ?
xprt->xpt_remotelen : 0)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xprt = xprt, __entry->xprt = xprt;
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); if (xprt) {
__entry->flags = xprt ? xprt->xpt_flags : 0; memcpy(__get_dynamic_array(addr),
&xprt->xpt_remote,
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
), ),
TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt, TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss, __get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
show_svc_xprt_flags(__entry->flags)) show_svc_xprt_flags(__entry->flags))
); );
DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt));
DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt));
TRACE_EVENT(svc_wake_up, TRACE_EVENT(svc_wake_up,
TP_PROTO(int pid), TP_PROTO(int pid),
...@@ -592,21 +647,56 @@ TRACE_EVENT(svc_handle_xprt, ...@@ -592,21 +647,56 @@ TRACE_EVENT(svc_handle_xprt,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct svc_xprt *, xprt) __field(struct svc_xprt *, xprt)
__field(int, len) __field(int, len)
__field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, xprt != NULL ?
xprt->xpt_remotelen : 0)
), ),
TP_fast_assign( TP_fast_assign(
__entry->xprt = xprt; __entry->xprt = xprt;
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->len = len; __entry->len = len;
__entry->flags = xprt ? xprt->xpt_flags : 0; if (xprt) {
memcpy(__get_dynamic_array(addr),
&xprt->xpt_remote,
xprt->xpt_remotelen);
__entry->flags = xprt->xpt_flags;
} else
__entry->flags = 0;
), ),
TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss, __get_dynamic_array_len(addr) != 0 ?
(struct sockaddr *)__get_dynamic_array(addr) : NULL,
__entry->len, show_svc_xprt_flags(__entry->flags)) __entry->len, show_svc_xprt_flags(__entry->flags))
); );
DECLARE_EVENT_CLASS(svc_deferred_event,
TP_PROTO(struct svc_deferred_req *dr),
TP_ARGS(dr),
TP_STRUCT__entry(
__field(__be32, xid)
__dynamic_array(unsigned char, addr, dr->addrlen)
),
TP_fast_assign(
__entry->xid = *(__be32 *)(dr->args + (dr->xprt_hlen>>2));
memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen);
),
TP_printk("addr=%pIScp xid=0x%x",
(struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid))
);
DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
TP_PROTO(struct svc_deferred_req *dr),
TP_ARGS(dr));
DEFINE_EVENT(svc_deferred_event, svc_revisit_deferred,
TP_PROTO(struct svc_deferred_req *dr),
TP_ARGS(dr));
#endif /* _TRACE_SUNRPC_H */ #endif /* _TRACE_SUNRPC_H */
#include <trace/define_trace.h> #include <trace/define_trace.h>
...@@ -1230,8 +1230,9 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, ...@@ -1230,8 +1230,9 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
if (status) if (status)
goto out; goto out;
dprintk("RPC: svcauth_gss: gss major status = %d\n", dprintk("RPC: svcauth_gss: gss major status = %d "
ud.major_status); "minor status = %d\n",
ud.major_status, ud.minor_status);
switch (ud.major_status) { switch (ud.major_status) {
case GSS_S_CONTINUE_NEEDED: case GSS_S_CONTINUE_NEEDED:
......
...@@ -362,7 +362,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) ...@@ -362,7 +362,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
cache_purge(cd); cache_purge(cd);
spin_lock(&cache_list_lock); spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock); write_lock(&cd->hash_lock);
if (cd->entries || atomic_read(&cd->inuse)) { if (cd->entries) {
write_unlock(&cd->hash_lock); write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock); spin_unlock(&cache_list_lock);
goto out; goto out;
......
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
static unsigned int svc_rpc_per_connection_limit __read_mostly;
module_param(svc_rpc_per_connection_limit, uint, 0644);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp); static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req); static struct cache_deferred_req *svc_defer(struct cache_req *req);
...@@ -329,12 +333,45 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) ...@@ -329,12 +333,45 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
} }
EXPORT_SYMBOL_GPL(svc_print_addr); EXPORT_SYMBOL_GPL(svc_print_addr);
static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
{
unsigned int limit = svc_rpc_per_connection_limit;
int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
}
static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
{
if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
if (!svc_xprt_slots_in_range(xprt))
return false;
atomic_inc(&xprt->xpt_nr_rqsts);
set_bit(RQ_DATA, &rqstp->rq_flags);
}
return true;
}
static void svc_xprt_release_slot(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
atomic_dec(&xprt->xpt_nr_rqsts);
svc_xprt_enqueue(xprt);
}
}
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
{ {
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE))) if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
return true; return true;
if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
return xprt->xpt_ops->xpo_has_wspace(xprt); if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
svc_xprt_slots_in_range(xprt))
return true;
trace_svc_xprt_no_write_space(xprt);
return false;
}
return false; return false;
} }
...@@ -480,8 +517,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space) ...@@ -480,8 +517,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space; rqstp->rq_reserved = space;
if (xprt->xpt_ops->xpo_adjust_wspace)
xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt); svc_xprt_enqueue(xprt);
} }
} }
...@@ -512,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp) ...@@ -512,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
rqstp->rq_res.head[0].iov_len = 0; rqstp->rq_res.head[0].iov_len = 0;
svc_reserve(rqstp, 0); svc_reserve(rqstp, 0);
svc_xprt_release_slot(rqstp);
rqstp->rq_xprt = NULL; rqstp->rq_xprt = NULL;
svc_xprt_put(xprt); svc_xprt_put(xprt);
} }
...@@ -781,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) ...@@ -781,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt); svc_add_new_temp_xprt(serv, newxpt);
else else
module_put(xprt->xpt_class->xcl_owner); module_put(xprt->xpt_class->xcl_owner);
} else { } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */ /* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt, rqstp, rqstp->rq_pool->sp_id, xprt,
...@@ -871,6 +906,7 @@ EXPORT_SYMBOL_GPL(svc_recv); ...@@ -871,6 +906,7 @@ EXPORT_SYMBOL_GPL(svc_recv);
*/ */
void svc_drop(struct svc_rqst *rqstp) void svc_drop(struct svc_rqst *rqstp)
{ {
trace_svc_drop(rqstp);
dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp); svc_xprt_release(rqstp);
} }
...@@ -1148,6 +1184,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) ...@@ -1148,6 +1184,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
spin_unlock(&xprt->xpt_lock); spin_unlock(&xprt->xpt_lock);
dprintk("revisit canceled\n"); dprintk("revisit canceled\n");
svc_xprt_put(xprt); svc_xprt_put(xprt);
trace_svc_drop_deferred(dr);
kfree(dr); kfree(dr);
return; return;
} }
...@@ -1205,6 +1242,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) ...@@ -1205,6 +1242,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
set_bit(RQ_DROPME, &rqstp->rq_flags); set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit; dr->handle.revisit = svc_revisit;
trace_svc_defer(rqstp);
return &dr->handle; return &dr->handle;
} }
...@@ -1245,6 +1283,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) ...@@ -1245,6 +1283,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req, struct svc_deferred_req,
handle.recent); handle.recent);
list_del_init(&dr->handle.recent); list_del_init(&dr->handle.recent);
trace_svc_revisit_deferred(dr);
} else } else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags); clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock); spin_unlock(&xprt->xpt_lock);
......
...@@ -60,7 +60,6 @@ ...@@ -60,7 +60,6 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags); int flags);
static void svc_udp_data_ready(struct sock *);
static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *);
static void svc_sock_detach(struct svc_xprt *); static void svc_sock_detach(struct svc_xprt *);
...@@ -398,48 +397,21 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp) ...@@ -398,48 +397,21 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
return svc_port_is_privileged(svc_addr(rqstp)); return svc_port_is_privileged(svc_addr(rqstp));
} }
static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
{
if (!wq)
return false;
/*
* There should normally be a memory * barrier here--see
* wq_has_sleeper().
*
* It appears that isn't currently necessary, though, basically
* because callers all appear to have sufficient memory barriers
* between the time the relevant change is made and the
* time they call these callbacks.
*
* The nfsd code itself doesn't actually explicitly wait on
* these waitqueues, but it may wait on them for example in
* sendpage() or sendmsg() calls. (And those may be the only
* places, since it it uses nonblocking reads.)
*
* Maybe we should add the memory barriers anyway, but these are
* hot paths so we'd need to be convinced there's no sigificant
* penalty.
*/
return waitqueue_active(wq);
}
/* /*
* INET callback when data has been received on the socket. * INET callback when data has been received on the socket.
*/ */
static void svc_udp_data_ready(struct sock *sk) static void svc_data_ready(struct sock *sk)
{ {
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) { if (svsk) {
dprintk("svc: socket %p(inet %p), busy=%d\n", dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk, svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svsk->sk_odata(sk);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt); svc_xprt_enqueue(&svsk->sk_xprt);
} }
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
} }
/* /*
...@@ -448,56 +420,22 @@ static void svc_udp_data_ready(struct sock *sk) ...@@ -448,56 +420,22 @@ static void svc_udp_data_ready(struct sock *sk)
static void svc_write_space(struct sock *sk) static void svc_write_space(struct sock *sk)
{ {
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) { if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n", dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt); svc_xprt_enqueue(&svsk->sk_xprt);
} }
if (sunrpc_waitqueue_active(wq)) {
dprintk("RPC svc_write_space: someone sleeping on %p\n",
svsk);
wake_up_interruptible(wq);
}
} }
static int svc_tcp_has_wspace(struct svc_xprt *xprt) static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{ {
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int required;
if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
return 1; return 1;
required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
if (sk_stream_wspace(svsk->sk_sk) >= required ||
(sk_stream_min_wspace(svsk->sk_sk) == 0 &&
atomic_read(&xprt->xpt_reserved) == 0))
return 1;
set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
return 0;
}
static void svc_tcp_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
struct socket *sock = sk->sk_socket;
if (!sk_stream_is_writeable(sk) || !sock)
return;
if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
clear_bit(SOCK_NOSPACE, &sock->flags);
svc_write_space(sk);
}
static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
if (svc_tcp_has_wspace(xprt))
clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
} }
/* /*
...@@ -746,7 +684,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) ...@@ -746,7 +684,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv); &svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_data_ready = svc_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space; svsk->sk_sk->sk_write_space = svc_write_space;
/* initialise setting must have enough space to /* initialise setting must have enough space to
...@@ -786,11 +724,12 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) ...@@ -786,11 +724,12 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
static void svc_tcp_listen_data_ready(struct sock *sk) static void svc_tcp_listen_data_ready(struct sock *sk)
{ {
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq;
dprintk("svc: socket %p TCP (listen) state change %d\n", dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state); sk, sk->sk_state);
if (svsk)
svsk->sk_odata(sk);
/* /*
* This callback may called twice when a new connection * This callback may called twice when a new connection
* is established as a child socket inherits everything * is established as a child socket inherits everything
...@@ -808,10 +747,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) ...@@ -808,10 +747,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
} else } else
printk("svc: socket %p: no user data\n", sk); printk("svc: socket %p: no user data\n", sk);
} }
wq = sk_sleep(sk);
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
} }
/* /*
...@@ -820,7 +755,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) ...@@ -820,7 +755,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
static void svc_tcp_state_change(struct sock *sk) static void svc_tcp_state_change(struct sock *sk)
{ {
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
sk, sk->sk_state, sk->sk_user_data); sk, sk->sk_state, sk->sk_user_data);
...@@ -828,26 +762,12 @@ static void svc_tcp_state_change(struct sock *sk) ...@@ -828,26 +762,12 @@ static void svc_tcp_state_change(struct sock *sk)
if (!svsk) if (!svsk)
printk("svc: socket %p: no user data\n", sk); printk("svc: socket %p: no user data\n", sk);
else { else {
svsk->sk_ostate(sk);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt); svc_xprt_enqueue(&svsk->sk_xprt);
} }
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible_all(wq);
}
static void svc_tcp_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
dprintk("svc: socket %p TCP data ready (svsk %p)\n",
sk, sk->sk_user_data);
if (svsk) {
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
} }
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
} }
/* /*
...@@ -901,6 +821,11 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) ...@@ -901,6 +821,11 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
dprintk("%s: connect from %s\n", serv->sv_name, dprintk("%s: connect from %s\n", serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf))); __svc_print_addr(sin, buf, sizeof(buf)));
/* Reset the inherited callbacks before calling svc_setup_socket */
newsock->sk->sk_state_change = svsk->sk_ostate;
newsock->sk->sk_data_ready = svsk->sk_odata;
newsock->sk->sk_write_space = svsk->sk_owspace;
/* make sure that a write doesn't block forever when /* make sure that a write doesn't block forever when
* low on memory * low on memory
*/ */
...@@ -1317,7 +1242,6 @@ static struct svc_xprt_ops svc_tcp_ops = { ...@@ -1317,7 +1242,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace, .xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept, .xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port, .xpo_secure_port = svc_sock_secure_port,
.xpo_adjust_wspace = svc_tcp_adjust_wspace,
}; };
static struct svc_xprt_class svc_tcp_class = { static struct svc_xprt_class svc_tcp_class = {
...@@ -1357,8 +1281,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) ...@@ -1357,8 +1281,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
} else { } else {
dprintk("setting up TCP socket for reading\n"); dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change; sk->sk_state_change = svc_tcp_state_change;
sk->sk_data_ready = svc_tcp_data_ready; sk->sk_data_ready = svc_data_ready;
sk->sk_write_space = svc_tcp_write_space; sk->sk_write_space = svc_write_space;
svsk->sk_reclen = 0; svsk->sk_reclen = 0;
svsk->sk_tcplen = 0; svsk->sk_tcplen = 0;
...@@ -1368,9 +1292,14 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) ...@@ -1368,9 +1292,14 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state != TCP_ESTABLISHED) switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
break;
default:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
} }
}
} }
void svc_sock_update_bufs(struct svc_serv *serv) void svc_sock_update_bufs(struct svc_serv *serv)
...@@ -1428,17 +1357,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, ...@@ -1428,17 +1357,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Initialize the socket */ /* Initialize the socket */
if (sock->type == SOCK_DGRAM) if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv); svc_udp_init(svsk, serv);
else { else
/* initialise setting must have enough space to
* receive and respond to one request.
*/
svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
4 * serv->sv_max_mesg);
svc_tcp_init(svsk, serv); svc_tcp_init(svsk, serv);
}
dprintk("svc: svc_setup_socket created %p (inet %p)\n", dprintk("svc: svc_setup_socket created %p (inet %p), "
svsk, svsk->sk_sk); "listen %d close %d\n",
svsk, svsk->sk_sk,
test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
return svsk; return svsk;
} }
...@@ -1606,18 +1532,16 @@ static void svc_sock_detach(struct svc_xprt *xprt) ...@@ -1606,18 +1532,16 @@ static void svc_sock_detach(struct svc_xprt *xprt)
{ {
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk; struct sock *sk = svsk->sk_sk;
wait_queue_head_t *wq;
dprintk("svc: svc_sock_detach(%p)\n", svsk); dprintk("svc: svc_sock_detach(%p)\n", svsk);
/* put back the old socket callbacks */ /* put back the old socket callbacks */
lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate; sk->sk_state_change = svsk->sk_ostate;
sk->sk_data_ready = svsk->sk_odata; sk->sk_data_ready = svsk->sk_odata;
sk->sk_write_space = svsk->sk_owspace; sk->sk_write_space = svsk->sk_owspace;
sk->sk_user_data = NULL;
wq = sk_sleep(sk); release_sock(sk);
if (sunrpc_waitqueue_active(wq))
wake_up_interruptible(wq);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment