Commit 806276b7 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Five fixes for this series:

   - a fix from me to ensure that blk-mq drivers that terminate IO in
     their ->queue_rq() handler by returning QUEUE_ERROR don't stall
     with a scheduler enabled.

   - four nbd fixes from Josef and Ratna, fixing various problems that
     are critical enough to go in for this cycle. They have been well
     tested"

* 'for-linus' of git://git.kernel.dk/linux-block:
  nbd: replace kill_bdev() with __invalidate_device()
  nbd: set queue timeout properly
  nbd: set rq->errors to actual error code
  nbd: handle ERESTARTSYS properly
  blk-mq: include errors in did_work calculation
parents 52b9c816 abbbdf12
...@@ -969,7 +969,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) ...@@ -969,7 +969,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
struct request *rq; struct request *rq;
LIST_HEAD(driver_list); LIST_HEAD(driver_list);
struct list_head *dptr; struct list_head *dptr;
int queued, ret = BLK_MQ_RQ_QUEUE_OK; int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
/* /*
* Start off with dptr being NULL, so we start the first request * Start off with dptr being NULL, so we start the first request
...@@ -980,7 +980,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) ...@@ -980,7 +980,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
/* /*
* Now process all the entries, sending them to the driver. * Now process all the entries, sending them to the driver.
*/ */
queued = 0; errors = queued = 0;
while (!list_empty(list)) { while (!list_empty(list)) {
struct blk_mq_queue_data bd; struct blk_mq_queue_data bd;
...@@ -1037,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) ...@@ -1037,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
default: default:
pr_err("blk-mq: bad return on queue: %d\n", ret); pr_err("blk-mq: bad return on queue: %d\n", ret);
case BLK_MQ_RQ_QUEUE_ERROR: case BLK_MQ_RQ_QUEUE_ERROR:
errors++;
rq->errors = -EIO; rq->errors = -EIO;
blk_mq_end_request(rq, rq->errors); blk_mq_end_request(rq, rq->errors);
break; break;
...@@ -1088,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) ...@@ -1088,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
blk_mq_run_hw_queue(hctx, true); blk_mq_run_hw_queue(hctx, true);
} }
return queued != 0; return (queued + errors) != 0;
} }
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
......
...@@ -47,6 +47,8 @@ static DEFINE_MUTEX(nbd_index_mutex); ...@@ -47,6 +47,8 @@ static DEFINE_MUTEX(nbd_index_mutex);
struct nbd_sock { struct nbd_sock {
struct socket *sock; struct socket *sock;
struct mutex tx_lock; struct mutex tx_lock;
struct request *pending;
int sent;
}; };
#define NBD_TIMEDOUT 0 #define NBD_TIMEDOUT 0
...@@ -124,6 +126,7 @@ static const char *nbdcmd_to_ascii(int cmd) ...@@ -124,6 +126,7 @@ static const char *nbdcmd_to_ascii(int cmd)
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{ {
if (bdev->bd_openers <= 1)
bd_set_size(bdev, 0); bd_set_size(bdev, 0);
set_capacity(nbd->disk, 0); set_capacity(nbd->disk, 0);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
...@@ -190,7 +193,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, ...@@ -190,7 +193,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
req->errors++; req->errors = -EIO;
mutex_lock(&nbd->config_lock); mutex_lock(&nbd->config_lock);
sock_shutdown(nbd); sock_shutdown(nbd);
...@@ -202,7 +205,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, ...@@ -202,7 +205,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
* Send or receive packet. * Send or receive packet.
*/ */
static int sock_xmit(struct nbd_device *nbd, int index, int send, static int sock_xmit(struct nbd_device *nbd, int index, int send,
struct iov_iter *iter, int msg_flags) struct iov_iter *iter, int msg_flags, int *sent)
{ {
struct socket *sock = nbd->socks[index]->sock; struct socket *sock = nbd->socks[index]->sock;
int result; int result;
...@@ -237,6 +240,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, ...@@ -237,6 +240,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
result = -EPIPE; /* short read */ result = -EPIPE; /* short read */
break; break;
} }
if (sent)
*sent += result;
} while (msg_data_left(&msg)); } while (msg_data_left(&msg));
tsk_restore_flags(current, pflags, PF_MEMALLOC); tsk_restore_flags(current, pflags, PF_MEMALLOC);
...@@ -248,6 +253,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, ...@@ -248,6 +253,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
{ {
struct request *req = blk_mq_rq_from_pdu(cmd); struct request *req = blk_mq_rq_from_pdu(cmd);
struct nbd_sock *nsock = nbd->socks[index];
int result; int result;
struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
...@@ -256,6 +262,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -256,6 +262,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
struct bio *bio; struct bio *bio;
u32 type; u32 type;
u32 tag = blk_mq_unique_tag(req); u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0;
iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
...@@ -283,6 +290,17 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -283,6 +290,17 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
return -EIO; return -EIO;
} }
/* We did a partial send previously, and we at least sent the whole
* request struct, so just go and send the rest of the pages in the
* request.
*/
if (sent) {
if (sent >= sizeof(request)) {
skip = sent - sizeof(request);
goto send_pages;
}
iov_iter_advance(&from, sent);
}
request.type = htonl(type); request.type = htonl(type);
if (type != NBD_CMD_FLUSH) { if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
...@@ -294,15 +312,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -294,15 +312,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
cmd, nbdcmd_to_ascii(type), cmd, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
result = sock_xmit(nbd, index, 1, &from, result = sock_xmit(nbd, index, 1, &from,
(type == NBD_CMD_WRITE) ? MSG_MORE : 0); (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
if (result <= 0) { if (result <= 0) {
if (result == -ERESTARTSYS) {
/* If we havne't sent anything we can just return BUSY,
* however if we have sent something we need to make
* sure we only allow this req to be sent until we are
* completely done.
*/
if (sent) {
nsock->pending = req;
nsock->sent = sent;
}
return BLK_MQ_RQ_QUEUE_BUSY;
}
dev_err_ratelimited(disk_to_dev(nbd->disk), dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result); "Send control failed (result %d)\n", result);
return -EIO; return -EIO;
} }
send_pages:
if (type != NBD_CMD_WRITE) if (type != NBD_CMD_WRITE)
return 0; goto out;
bio = req->bio; bio = req->bio;
while (bio) { while (bio) {
...@@ -318,8 +348,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -318,8 +348,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
cmd, bvec.bv_len); cmd, bvec.bv_len);
iov_iter_bvec(&from, ITER_BVEC | WRITE, iov_iter_bvec(&from, ITER_BVEC | WRITE,
&bvec, 1, bvec.bv_len); &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 1, &from, flags); if (skip) {
if (skip >= iov_iter_count(&from)) {
skip -= iov_iter_count(&from);
continue;
}
iov_iter_advance(&from, skip);
skip = 0;
}
result = sock_xmit(nbd, index, 1, &from, flags, &sent);
if (result <= 0) { if (result <= 0) {
if (result == -ERESTARTSYS) {
/* We've already sent the header, we
* have no choice but to set pending and
* return BUSY.
*/
nsock->pending = req;
nsock->sent = sent;
return BLK_MQ_RQ_QUEUE_BUSY;
}
dev_err(disk_to_dev(nbd->disk), dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n", "Send data failed (result %d)\n",
result); result);
...@@ -336,6 +383,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) ...@@ -336,6 +383,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
} }
bio = next; bio = next;
} }
out:
nsock->pending = NULL;
nsock->sent = 0;
return 0; return 0;
} }
...@@ -353,7 +403,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -353,7 +403,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
reply.magic = 0; reply.magic = 0;
iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) { if (result <= 0) {
if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
!test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
...@@ -383,7 +433,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -383,7 +433,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (ntohl(reply.error)) { if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error)); ntohl(reply.error));
req->errors++; req->errors = -EIO;
return cmd; return cmd;
} }
...@@ -395,11 +445,11 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ...@@ -395,11 +445,11 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
rq_for_each_segment(bvec, req, iter) { rq_for_each_segment(bvec, req, iter) {
iov_iter_bvec(&to, ITER_BVEC | READ, iov_iter_bvec(&to, ITER_BVEC | READ,
&bvec, 1, bvec.bv_len); &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) { if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result); result);
req->errors++; req->errors = -EIO;
return cmd; return cmd;
} }
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
...@@ -469,7 +519,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) ...@@ -469,7 +519,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
if (!blk_mq_request_started(req)) if (!blk_mq_request_started(req))
return; return;
cmd = blk_mq_rq_to_pdu(req); cmd = blk_mq_rq_to_pdu(req);
req->errors++; req->errors = -EIO;
nbd_end_request(cmd); nbd_end_request(cmd);
} }
...@@ -482,22 +532,23 @@ static void nbd_clear_que(struct nbd_device *nbd) ...@@ -482,22 +532,23 @@ static void nbd_clear_que(struct nbd_device *nbd)
} }
static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
{ {
struct request *req = blk_mq_rq_from_pdu(cmd); struct request *req = blk_mq_rq_from_pdu(cmd);
struct nbd_device *nbd = cmd->nbd; struct nbd_device *nbd = cmd->nbd;
struct nbd_sock *nsock; struct nbd_sock *nsock;
int ret;
if (index >= nbd->num_connections) { if (index >= nbd->num_connections) {
dev_err_ratelimited(disk_to_dev(nbd->disk), dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n"); "Attempted send on invalid socket\n");
goto error_out; return -EINVAL;
} }
if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
dev_err_ratelimited(disk_to_dev(nbd->disk), dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n"); "Attempted send on closed socket\n");
goto error_out; return -EINVAL;
} }
req->errors = 0; req->errors = 0;
...@@ -508,29 +559,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) ...@@ -508,29 +559,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
mutex_unlock(&nsock->tx_lock); mutex_unlock(&nsock->tx_lock);
dev_err_ratelimited(disk_to_dev(nbd->disk), dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n"); "Attempted send on closed socket\n");
goto error_out; return -EINVAL;
} }
if (nbd_send_cmd(nbd, cmd, index) != 0) { /* Handle the case that we have a pending request that was partially
dev_err_ratelimited(disk_to_dev(nbd->disk), * transmitted that _has_ to be serviced first. We need to call requeue
"Request send failed\n"); * here so that it gets put _after_ the request that is already on the
req->errors++; * dispatch list.
nbd_end_request(cmd); */
if (unlikely(nsock->pending && nsock->pending != req)) {
blk_mq_requeue_request(req, true);
ret = 0;
goto out;
} }
ret = nbd_send_cmd(nbd, cmd, index);
out:
mutex_unlock(&nsock->tx_lock); mutex_unlock(&nsock->tx_lock);
return ret;
return;
error_out:
req->errors++;
nbd_end_request(cmd);
} }
static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
int ret;
/* /*
* Since we look at the bio's to send the request over the network we * Since we look at the bio's to send the request over the network we
...@@ -543,10 +595,20 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -543,10 +595,20 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
*/ */
init_completion(&cmd->send_complete); init_completion(&cmd->send_complete);
blk_mq_start_request(bd->rq); blk_mq_start_request(bd->rq);
nbd_handle_cmd(cmd, hctx->queue_num);
/* We can be called directly from the user space process, which means we
* could possibly have signals pending so our sendmsg will fail. In
* this case we need to return that we are busy, otherwise error out as
* appropriate.
*/
ret = nbd_handle_cmd(cmd, hctx->queue_num);
if (ret < 0)
ret = BLK_MQ_RQ_QUEUE_ERROR;
if (!ret)
ret = BLK_MQ_RQ_QUEUE_OK;
complete(&cmd->send_complete); complete(&cmd->send_complete);
return BLK_MQ_RQ_QUEUE_OK; return ret;
} }
static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
...@@ -581,6 +643,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, ...@@ -581,6 +643,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
mutex_init(&nsock->tx_lock); mutex_init(&nsock->tx_lock);
nsock->sock = sock; nsock->sock = sock;
nsock->pending = NULL;
nsock->sent = 0;
socks[nbd->num_connections++] = nsock; socks[nbd->num_connections++] = nsock;
if (max_part) if (max_part)
...@@ -602,6 +666,8 @@ static void nbd_reset(struct nbd_device *nbd) ...@@ -602,6 +666,8 @@ static void nbd_reset(struct nbd_device *nbd)
static void nbd_bdev_reset(struct block_device *bdev) static void nbd_bdev_reset(struct block_device *bdev)
{ {
if (bdev->bd_openers > 1)
return;
set_device_ro(bdev, false); set_device_ro(bdev, false);
bdev->bd_inode->i_size = 0; bdev->bd_inode->i_size = 0;
if (max_part > 0) { if (max_part > 0) {
...@@ -634,7 +700,7 @@ static void send_disconnects(struct nbd_device *nbd) ...@@ -634,7 +700,7 @@ static void send_disconnects(struct nbd_device *nbd)
for (i = 0; i < nbd->num_connections; i++) { for (i = 0; i < nbd->num_connections; i++) {
iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
ret = sock_xmit(nbd, i, 1, &from, 0); ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
if (ret <= 0) if (ret <= 0)
dev_err(disk_to_dev(nbd->disk), dev_err(disk_to_dev(nbd->disk),
"Send disconnect failed %d\n", ret); "Send disconnect failed %d\n", ret);
...@@ -665,7 +731,8 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) ...@@ -665,7 +731,8 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
{ {
sock_shutdown(nbd); sock_shutdown(nbd);
nbd_clear_que(nbd); nbd_clear_que(nbd);
kill_bdev(bdev);
__invalidate_device(bdev, true);
nbd_bdev_reset(bdev); nbd_bdev_reset(bdev);
/* /*
* We want to give the run thread a chance to wait for everybody * We want to give the run thread a chance to wait for everybody
...@@ -781,7 +848,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, ...@@ -781,7 +848,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd_size_set(nbd, bdev, nbd->blksize, arg); nbd_size_set(nbd, bdev, nbd->blksize, arg);
return 0; return 0;
case NBD_SET_TIMEOUT: case NBD_SET_TIMEOUT:
if (arg) {
nbd->tag_set.timeout = arg * HZ; nbd->tag_set.timeout = arg * HZ;
blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
}
return 0; return 0;
case NBD_SET_FLAGS: case NBD_SET_FLAGS:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment