Commit 1ddd8739 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.7-rc-1' of git://git.linux-nfs.org/projects/cel/cel-2.6

Pull nfsd fixes from Chuck Lever:
 "The first set of 5.7-rc fixes for NFS server issues.

  These were all unresolved at the time the 5.7 window opened, and
  needed some additional time to ensure they were correctly addressed.
  They are ready now.

  At the moment I know of one more urgent issue regarding the NFS
  server. A fix has been tested and is under review. I expect to send
  one more pull request, containing this fix (which now consists of 3
  patches).

  Fixes:

   - Address several use-after-free and memory leak bugs

   - Prevent a backchannel livelock"

* tag 'nfsd-5.7-rc-1' of git://git.linux-nfs.org/projects/cel/cel-2.6:
  svcrdma: Fix leak of svc_rdma_recv_ctxt objects
  svcrdma: Fix trace point use-after-free race
  SUNRPC: Fix backchannel RPC soft lockups
  SUNRPC/cache: Fix unsafe traverse caused double-free in cache_purge
  nfsd: memory corruption in nfsd4_lock()
parents 6f8cd037 23cf1ee1
......@@ -1312,6 +1312,7 @@ nfsd4_run_cb_work(struct work_struct *work)
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
......@@ -1340,7 +1341,8 @@ nfsd4_run_cb_work(struct work_struct *work)
}
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
}
......
......@@ -267,6 +267,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
if (!nbl) {
nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
if (nbl) {
INIT_LIST_HEAD(&nbl->nbl_list);
INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
......
......@@ -170,6 +170,7 @@ extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt);
extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
......
......@@ -1695,17 +1695,15 @@ DECLARE_EVENT_CLASS(svcrdma_sendcomp_event,
TRACE_EVENT(svcrdma_post_send,
TP_PROTO(
const struct ib_send_wr *wr,
int status
const struct ib_send_wr *wr
),
TP_ARGS(wr, status),
TP_ARGS(wr),
TP_STRUCT__entry(
__field(const void *, cqe)
__field(unsigned int, num_sge)
__field(u32, inv_rkey)
__field(int, status)
),
TP_fast_assign(
......@@ -1713,12 +1711,11 @@ TRACE_EVENT(svcrdma_post_send,
__entry->num_sge = wr->num_sge;
__entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ?
wr->ex.invalidate_rkey : 0;
__entry->status = status;
),
TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d",
TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x",
__entry->cqe, __entry->num_sge,
__entry->inv_rkey, __entry->status
__entry->inv_rkey
)
);
......@@ -1783,26 +1780,23 @@ TRACE_EVENT(svcrdma_wc_receive,
TRACE_EVENT(svcrdma_post_rw,
TP_PROTO(
const void *cqe,
int sqecount,
int status
int sqecount
),
TP_ARGS(cqe, sqecount, status),
TP_ARGS(cqe, sqecount),
TP_STRUCT__entry(
__field(const void *, cqe)
__field(int, sqecount)
__field(int, status)
),
TP_fast_assign(
__entry->cqe = cqe;
__entry->sqecount = sqecount;
__entry->status = status;
),
TP_printk("cqe=%p sqecount=%d status=%d",
__entry->cqe, __entry->sqecount, __entry->status
TP_printk("cqe=%p sqecount=%d",
__entry->cqe, __entry->sqecount
)
);
......@@ -1870,6 +1864,34 @@ DECLARE_EVENT_CLASS(svcrdma_sendqueue_event,
DEFINE_SQ_EVENT(full);
DEFINE_SQ_EVENT(retry);
TRACE_EVENT(svcrdma_sq_post_err,
TP_PROTO(
const struct svcxprt_rdma *rdma,
int status
),
TP_ARGS(rdma, status),
TP_STRUCT__entry(
__field(int, avail)
__field(int, depth)
__field(int, status)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->avail = atomic_read(&rdma->sc_sq_avail);
__entry->depth = rdma->sc_sq_depth;
__entry->status = status;
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("addr=%s sc_sq_avail=%d/%d status=%d",
__get_str(addr), __entry->avail, __entry->depth,
__entry->status
)
);
#endif /* _TRACE_RPCRDMA_H */
#include <trace/define_trace.h>
......@@ -529,7 +529,6 @@ void cache_purge(struct cache_detail *detail)
{
struct cache_head *ch = NULL;
struct hlist_head *head = NULL;
struct hlist_node *tmp = NULL;
int i = 0;
spin_lock(&detail->hash_lock);
......@@ -541,7 +540,9 @@ void cache_purge(struct cache_detail *detail)
dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
while (!hlist_empty(head)) {
ch = hlist_entry(head->first, struct cache_head,
cache_list);
sunrpc_begin_cache_remove_entry(ch, detail);
spin_unlock(&detail->hash_lock);
sunrpc_end_cache_remove_entry(ch, detail);
......
......@@ -908,9 +908,6 @@ int svc_send(struct svc_rqst *rqstp)
if (!xprt)
goto out;
/* release the receive skb before sending the reply */
xprt->xpt_ops->xpo_release_rqst(rqstp);
/* calculate over-all length */
xb = &rqstp->rq_res;
xb->len = xb->head[0].iov_len +
......@@ -1040,6 +1037,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
dprintk("svc: svc_delete_xprt(%p)\n", xprt);
xprt->xpt_ops->xpo_detach(xprt);
if (xprt->xpt_bc_xprt)
xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
spin_lock_bh(&serv->sv_lock);
list_del_init(&xprt->xpt_list);
......
......@@ -527,6 +527,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
svc_release_udp_skb(rqstp);
svc_set_cmsg_data(rqstp, cmh);
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
......@@ -1076,6 +1078,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
svc_release_skb(rqstp);
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
......
......@@ -244,6 +244,8 @@ static void
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
xprt_disconnect_done(xprt);
xprt->cwnd = RPC_CWNDSHIFT;
}
......
......@@ -223,6 +223,26 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
/**
* svc_rdma_release_rqst - Release transport-specific per-rqst resources
* @rqstp: svc_rqst being released
*
* Ensure that the recv_ctxt is released whether or not a Reply
* was sent. For example, the client could close the connection,
* or svc_process could drop an RPC, before the Reply is sent.
*/
void svc_rdma_release_rqst(struct svc_rqst *rqstp)
{
struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
rqstp->rq_xprt_ctxt = NULL;
if (ctxt)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
......@@ -820,6 +840,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
__be32 *p;
int ret;
rqstp->rq_xprt_ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
if (ctxt) {
......
......@@ -323,8 +323,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
trace_svcrdma_post_rw(&cc->cc_cqe,
cc->cc_sqecount, ret);
if (ret)
break;
return 0;
......@@ -337,6 +335,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
trace_svcrdma_sq_retry(rdma);
} while (1);
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
/* If even one was posted, there will be a completion. */
......
......@@ -322,15 +322,17 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
}
svc_xprt_get(&rdma->sc_xprt);
trace_svcrdma_post_send(wr);
ret = ib_post_send(rdma->sc_qp, wr, NULL);
trace_svcrdma_post_send(wr, ret);
if (ret) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_put(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
}
break;
if (ret)
break;
return 0;
}
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_put(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
}
......@@ -924,12 +926,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
goto err1;
ret = 0;
out:
rqstp->rq_xprt_ctxt = NULL;
svc_rdma_recv_ctxt_put(rdma, rctxt);
return ret;
return 0;
err2:
if (ret != -E2BIG && ret != -EINVAL)
......@@ -938,16 +935,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
if (ret < 0)
goto err1;
ret = 0;
goto out;
return 0;
err1:
svc_rdma_send_ctxt_put(rdma, sctxt);
err0:
trace_svcrdma_send_failed(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
ret = -ENOTCONN;
goto out;
return -ENOTCONN;
}
/**
......
......@@ -71,7 +71,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct sockaddr *sa, int salen,
int flags);
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
......@@ -552,10 +551,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
return NULL;
}
static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
{
}
/*
* When connected, an svc_xprt has at least two references:
*
......
......@@ -2584,6 +2584,7 @@ static int bc_send_request(struct rpc_rqst *req)
static void bc_close(struct rpc_xprt *xprt)
{
xprt_disconnect_done(xprt);
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment