Commit 1ddd8739 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.7-rc-1' of git://git.linux-nfs.org/projects/cel/cel-2.6

Pull nfsd fixes from Chuck Lever:
 "The first set of 5.7-rc fixes for NFS server issues.

  These were all unresolved at the time the 5.7 window opened, and
  needed some additional time to ensure they were correctly addressed.
  They are ready now.

  At the moment I know of one more urgent issue regarding the NFS
  server. A fix has been tested and is under review. I expect to send
  one more pull request, containing this fix (which now consists of 3
  patches).

  Fixes:

   - Address several use-after-free and memory leak bugs

   - Prevent a backchannel livelock"

* tag 'nfsd-5.7-rc-1' of git://git.linux-nfs.org/projects/cel/cel-2.6:
  svcrdma: Fix leak of svc_rdma_recv_ctxt objects
  svcrdma: Fix trace point use-after-free race
  SUNRPC: Fix backchannel RPC soft lockups
  SUNRPC/cache: Fix unsafe traverse caused double-free in cache_purge
  nfsd: memory corruption in nfsd4_lock()
parents 6f8cd037 23cf1ee1
...@@ -1312,6 +1312,7 @@ nfsd4_run_cb_work(struct work_struct *work) ...@@ -1312,6 +1312,7 @@ nfsd4_run_cb_work(struct work_struct *work)
container_of(work, struct nfsd4_callback, cb_work); container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp; struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt; struct rpc_clnt *clnt;
int flags;
if (cb->cb_need_restart) { if (cb->cb_need_restart) {
cb->cb_need_restart = false; cb->cb_need_restart = false;
...@@ -1340,7 +1341,8 @@ nfsd4_run_cb_work(struct work_struct *work) ...@@ -1340,7 +1341,8 @@ nfsd4_run_cb_work(struct work_struct *work)
} }
cb->cb_msg.rpc_cred = clp->cl_cb_cred; cb->cb_msg.rpc_cred = clp->cl_cb_cred;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
} }
......
...@@ -267,6 +267,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, ...@@ -267,6 +267,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
if (!nbl) { if (!nbl) {
nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
if (nbl) { if (nbl) {
INIT_LIST_HEAD(&nbl->nbl_list);
INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh); fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock); locks_init_lock(&nbl->nbl_lock);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
......
...@@ -170,6 +170,7 @@ extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); ...@@ -170,6 +170,7 @@ extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt); struct svc_rdma_recv_ctxt *ctxt);
extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
extern int svc_rdma_recvfrom(struct svc_rqst *); extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */ /* svc_rdma_rw.c */
......
...@@ -1695,17 +1695,15 @@ DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, ...@@ -1695,17 +1695,15 @@ DECLARE_EVENT_CLASS(svcrdma_sendcomp_event,
TRACE_EVENT(svcrdma_post_send, TRACE_EVENT(svcrdma_post_send,
TP_PROTO( TP_PROTO(
const struct ib_send_wr *wr, const struct ib_send_wr *wr
int status
), ),
TP_ARGS(wr, status), TP_ARGS(wr),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, cqe) __field(const void *, cqe)
__field(unsigned int, num_sge) __field(unsigned int, num_sge)
__field(u32, inv_rkey) __field(u32, inv_rkey)
__field(int, status)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -1713,12 +1711,11 @@ TRACE_EVENT(svcrdma_post_send, ...@@ -1713,12 +1711,11 @@ TRACE_EVENT(svcrdma_post_send,
__entry->num_sge = wr->num_sge; __entry->num_sge = wr->num_sge;
__entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ?
wr->ex.invalidate_rkey : 0; wr->ex.invalidate_rkey : 0;
__entry->status = status;
), ),
TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d", TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x",
__entry->cqe, __entry->num_sge, __entry->cqe, __entry->num_sge,
__entry->inv_rkey, __entry->status __entry->inv_rkey
) )
); );
...@@ -1783,26 +1780,23 @@ TRACE_EVENT(svcrdma_wc_receive, ...@@ -1783,26 +1780,23 @@ TRACE_EVENT(svcrdma_wc_receive,
TRACE_EVENT(svcrdma_post_rw, TRACE_EVENT(svcrdma_post_rw,
TP_PROTO( TP_PROTO(
const void *cqe, const void *cqe,
int sqecount, int sqecount
int status
), ),
TP_ARGS(cqe, sqecount, status), TP_ARGS(cqe, sqecount),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, cqe) __field(const void *, cqe)
__field(int, sqecount) __field(int, sqecount)
__field(int, status)
), ),
TP_fast_assign( TP_fast_assign(
__entry->cqe = cqe; __entry->cqe = cqe;
__entry->sqecount = sqecount; __entry->sqecount = sqecount;
__entry->status = status;
), ),
TP_printk("cqe=%p sqecount=%d status=%d", TP_printk("cqe=%p sqecount=%d",
__entry->cqe, __entry->sqecount, __entry->status __entry->cqe, __entry->sqecount
) )
); );
...@@ -1870,6 +1864,34 @@ DECLARE_EVENT_CLASS(svcrdma_sendqueue_event, ...@@ -1870,6 +1864,34 @@ DECLARE_EVENT_CLASS(svcrdma_sendqueue_event,
DEFINE_SQ_EVENT(full); DEFINE_SQ_EVENT(full);
DEFINE_SQ_EVENT(retry); DEFINE_SQ_EVENT(retry);
TRACE_EVENT(svcrdma_sq_post_err,
TP_PROTO(
const struct svcxprt_rdma *rdma,
int status
),
TP_ARGS(rdma, status),
TP_STRUCT__entry(
__field(int, avail)
__field(int, depth)
__field(int, status)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->avail = atomic_read(&rdma->sc_sq_avail);
__entry->depth = rdma->sc_sq_depth;
__entry->status = status;
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("addr=%s sc_sq_avail=%d/%d status=%d",
__get_str(addr), __entry->avail, __entry->depth,
__entry->status
)
);
#endif /* _TRACE_RPCRDMA_H */ #endif /* _TRACE_RPCRDMA_H */
#include <trace/define_trace.h> #include <trace/define_trace.h>
...@@ -529,7 +529,6 @@ void cache_purge(struct cache_detail *detail) ...@@ -529,7 +529,6 @@ void cache_purge(struct cache_detail *detail)
{ {
struct cache_head *ch = NULL; struct cache_head *ch = NULL;
struct hlist_head *head = NULL; struct hlist_head *head = NULL;
struct hlist_node *tmp = NULL;
int i = 0; int i = 0;
spin_lock(&detail->hash_lock); spin_lock(&detail->hash_lock);
...@@ -541,7 +540,9 @@ void cache_purge(struct cache_detail *detail) ...@@ -541,7 +540,9 @@ void cache_purge(struct cache_detail *detail)
dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name); dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
for (i = 0; i < detail->hash_size; i++) { for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i]; head = &detail->hash_table[i];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) { while (!hlist_empty(head)) {
ch = hlist_entry(head->first, struct cache_head,
cache_list);
sunrpc_begin_cache_remove_entry(ch, detail); sunrpc_begin_cache_remove_entry(ch, detail);
spin_unlock(&detail->hash_lock); spin_unlock(&detail->hash_lock);
sunrpc_end_cache_remove_entry(ch, detail); sunrpc_end_cache_remove_entry(ch, detail);
......
...@@ -908,9 +908,6 @@ int svc_send(struct svc_rqst *rqstp) ...@@ -908,9 +908,6 @@ int svc_send(struct svc_rqst *rqstp)
if (!xprt) if (!xprt)
goto out; goto out;
/* release the receive skb before sending the reply */
xprt->xpt_ops->xpo_release_rqst(rqstp);
/* calculate over-all length */ /* calculate over-all length */
xb = &rqstp->rq_res; xb = &rqstp->rq_res;
xb->len = xb->head[0].iov_len + xb->len = xb->head[0].iov_len +
...@@ -1040,6 +1037,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt) ...@@ -1040,6 +1037,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
dprintk("svc: svc_delete_xprt(%p)\n", xprt); dprintk("svc: svc_delete_xprt(%p)\n", xprt);
xprt->xpt_ops->xpo_detach(xprt); xprt->xpt_ops->xpo_detach(xprt);
if (xprt->xpt_bc_xprt)
xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
spin_lock_bh(&serv->sv_lock); spin_lock_bh(&serv->sv_lock);
list_del_init(&xprt->xpt_list); list_del_init(&xprt->xpt_list);
......
...@@ -527,6 +527,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) ...@@ -527,6 +527,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent); unsigned int uninitialized_var(sent);
int err; int err;
svc_release_udp_skb(rqstp);
svc_set_cmsg_data(rqstp, cmh); svc_set_cmsg_data(rqstp, cmh);
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
...@@ -1076,6 +1078,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) ...@@ -1076,6 +1078,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent); unsigned int uninitialized_var(sent);
int err; int err;
svc_release_skb(rqstp);
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr); xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker))) if (err < 0 || sent != (xdr->len + sizeof(marker)))
......
...@@ -244,6 +244,8 @@ static void ...@@ -244,6 +244,8 @@ static void
xprt_rdma_bc_close(struct rpc_xprt *xprt) xprt_rdma_bc_close(struct rpc_xprt *xprt)
{ {
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
xprt_disconnect_done(xprt);
xprt->cwnd = RPC_CWNDSHIFT; xprt->cwnd = RPC_CWNDSHIFT;
} }
......
...@@ -223,6 +223,26 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, ...@@ -223,6 +223,26 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
svc_rdma_recv_ctxt_destroy(rdma, ctxt); svc_rdma_recv_ctxt_destroy(rdma, ctxt);
} }
/**
* svc_rdma_release_rqst - Release transport-specific per-rqst resources
* @rqstp: svc_rqst being released
*
* Ensure that the recv_ctxt is released whether or not a Reply
* was sent. For example, the client could close the connection,
* or svc_process could drop an RPC, before the Reply is sent.
*/
void svc_rdma_release_rqst(struct svc_rqst *rqstp)
{
struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
rqstp->rq_xprt_ctxt = NULL;
if (ctxt)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt) struct svc_rdma_recv_ctxt *ctxt)
{ {
...@@ -820,6 +840,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -820,6 +840,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
__be32 *p; __be32 *p;
int ret; int ret;
rqstp->rq_xprt_ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock); spin_lock(&rdma_xprt->sc_rq_dto_lock);
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
if (ctxt) { if (ctxt) {
......
...@@ -323,8 +323,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) ...@@ -323,8 +323,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
if (atomic_sub_return(cc->cc_sqecount, if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) { &rdma->sc_sq_avail) > 0) {
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
trace_svcrdma_post_rw(&cc->cc_cqe,
cc->cc_sqecount, ret);
if (ret) if (ret)
break; break;
return 0; return 0;
...@@ -337,6 +335,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) ...@@ -337,6 +335,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
trace_svcrdma_sq_retry(rdma); trace_svcrdma_sq_retry(rdma);
} while (1); } while (1);
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_CLOSE, &xprt->xpt_flags);
/* If even one was posted, there will be a completion. */ /* If even one was posted, there will be a completion. */
......
...@@ -322,15 +322,17 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) ...@@ -322,15 +322,17 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
} }
svc_xprt_get(&rdma->sc_xprt); svc_xprt_get(&rdma->sc_xprt);
trace_svcrdma_post_send(wr);
ret = ib_post_send(rdma->sc_qp, wr, NULL); ret = ib_post_send(rdma->sc_qp, wr, NULL);
trace_svcrdma_post_send(wr, ret); if (ret)
if (ret) { break;
return 0;
}
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_put(&rdma->sc_xprt); svc_xprt_put(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait); wake_up(&rdma->sc_send_wait);
}
break;
}
return ret; return ret;
} }
...@@ -924,12 +926,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -924,12 +926,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0) if (ret < 0)
goto err1; goto err1;
ret = 0; return 0;
out:
rqstp->rq_xprt_ctxt = NULL;
svc_rdma_recv_ctxt_put(rdma, rctxt);
return ret;
err2: err2:
if (ret != -E2BIG && ret != -EINVAL) if (ret != -E2BIG && ret != -EINVAL)
...@@ -938,16 +935,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -938,16 +935,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp); ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
if (ret < 0) if (ret < 0)
goto err1; goto err1;
ret = 0; return 0;
goto out;
err1: err1:
svc_rdma_send_ctxt_put(rdma, sctxt); svc_rdma_send_ctxt_put(rdma, sctxt);
err0: err0:
trace_svcrdma_send_failed(rqstp, ret); trace_svcrdma_send_failed(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_CLOSE, &xprt->xpt_flags);
ret = -ENOTCONN; return -ENOTCONN;
goto out;
} }
/** /**
......
...@@ -71,7 +71,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, ...@@ -71,7 +71,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct sockaddr *sa, int salen, struct sockaddr *sa, int salen,
int flags); int flags);
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt);
...@@ -552,10 +551,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -552,10 +551,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
return NULL; return NULL;
} }
static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
{
}
/* /*
* When connected, an svc_xprt has at least two references: * When connected, an svc_xprt has at least two references:
* *
......
...@@ -2584,6 +2584,7 @@ static int bc_send_request(struct rpc_rqst *req) ...@@ -2584,6 +2584,7 @@ static int bc_send_request(struct rpc_rqst *req)
static void bc_close(struct rpc_xprt *xprt) static void bc_close(struct rpc_xprt *xprt)
{ {
xprt_disconnect_done(xprt);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment