Commit a379fbbc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request:
      - fix nvmet-tcp header digest verification (Amit Engel)
      - fix a memory leak in nvmet-tcp when releasing a queue (Maurizio
        Lombardi)
      - fix nvme-tcp H2CData PDU send accounting again (Sagi Grimberg)
      - fix digest pointer calculation in nvme-tcp and nvmet-tcp (Varun
        Prakash)
      - fix possible nvme-tcp req->offset corruption (Varun Prakash)

 - Queue drain ordering fix (Ming)

 - Partition check regression for zoned devices (Shin'ichiro)

 - Zone queue restart fix (Naohiro)

* tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block:
  block: Fix partition check for host-aware zoned block devices
  nvmet-tcp: fix header digest verification
  nvmet-tcp: fix data digest pointer calculation
  nvme-tcp: fix data digest pointer calculation
  nvme-tcp: fix possible req->offset corruption
  block: schedule queue restart after BLK_STS_ZONE_RESOURCE
  block: drain queue after disk is removed from sysfs
  nvme-tcp: fix H2CData PDU send accounting (again)
  nvmet-tcp: fix a memory leak when releasing a queue
parents 17d50f89 f4aaf1fa
......@@ -1325,6 +1325,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
int errors, queued;
blk_status_t ret = BLK_STS_OK;
LIST_HEAD(zone_list);
bool needs_resource = false;
if (list_empty(list))
return false;
......@@ -1370,6 +1371,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
queued++;
break;
case BLK_STS_RESOURCE:
needs_resource = true;
fallthrough;
case BLK_STS_DEV_RESOURCE:
blk_mq_handle_dev_resource(rq, list);
goto out;
......@@ -1380,6 +1383,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
* accept.
*/
blk_mq_handle_zone_resource(rq, &zone_list);
needs_resource = true;
break;
default:
errors++;
......@@ -1406,7 +1410,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
/* For non-shared tags, the RESTART check will suffice */
bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
if (nr_budgets)
blk_mq_release_budgets(q, list);
......@@ -1447,14 +1450,16 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
* If driver returns BLK_STS_RESOURCE and SCHED_RESTART
* bit is set, run queue after a delay to avoid IO stalls
* that could otherwise occur if the queue is idle. We'll do
* similar if we couldn't get budget and SCHED_RESTART is set.
* similar if we couldn't get budget or couldn't lock a zone
* and SCHED_RESTART is set.
*/
needs_restart = blk_mq_sched_needs_restart(hctx);
if (prep == PREP_DISPATCH_NO_BUDGET)
needs_resource = true;
if (!needs_restart ||
(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
blk_mq_run_hw_queue(hctx, true);
else if (needs_restart && (ret == BLK_STS_RESOURCE ||
no_budget_avail))
else if (needs_restart && needs_resource)
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
blk_mq_update_dispatch_busy(hctx, true);
......
......@@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
static bool disk_has_partitions(struct gendisk *disk)
{
unsigned long idx;
struct block_device *part;
bool ret = false;
rcu_read_lock();
xa_for_each(&disk->part_tbl, idx, part) {
if (bdev_is_partition(part)) {
ret = true;
break;
}
}
rcu_read_unlock();
return ret;
}
/**
* blk_queue_set_zoned - configure a disk queue zoned model.
* @disk: the gendisk of the queue to configure
......@@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
* we do nothing special as far as the block layer is concerned.
*/
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
!xa_empty(&disk->part_tbl))
disk_has_partitions(disk))
model = BLK_ZONED_NONE;
break;
case BLK_ZONED_NONE:
......
......@@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk)
* Prevent new I/O from crossing bio_queue_enter().
*/
blk_queue_start_drain(q);
blk_mq_freeze_queue_wait(q);
rq_qos_exit(q);
blk_sync_queue(q);
blk_flush_integrity();
/*
* Allow using passthrough request again after the queue is torn down.
*/
blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
__blk_mq_unfreeze_queue(q, true);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
......@@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk));
blk_mq_freeze_queue_wait(q);
rq_qos_exit(q);
blk_sync_queue(q);
blk_flush_integrity();
/*
* Allow using passthrough request again after the queue is torn down.
*/
blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
__blk_mq_unfreeze_queue(q, true);
}
EXPORT_SYMBOL(del_gendisk);
......
......@@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
int req_data_len = req->data_len;
while (true) {
struct page *page = nvme_tcp_req_cur_page(req);
size_t offset = nvme_tcp_req_cur_offset(req);
size_t len = nvme_tcp_req_cur_length(req);
bool last = nvme_tcp_pdu_last_send(req, len);
int req_data_sent = req->data_sent;
int ret, flags = MSG_DONTWAIT;
if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
......@@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
* in the request where we don't want to modify it as we may
* compete with the RX path completing the request.
*/
if (req->data_sent + ret < req->data_len)
if (req_data_sent + ret < req_data_len)
nvme_tcp_advance_req(req, ret);
/* fully successful last send in current PDU */
......@@ -1048,10 +1050,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
size_t offset = req->offset;
int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
.iov_base = &req->ddgst + req->offset,
.iov_base = (u8 *)&req->ddgst + req->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
};
......@@ -1064,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
if (unlikely(ret <= 0))
return ret;
if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
nvme_tcp_done_send_req(queue);
return 1;
}
......
......@@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
struct nvmet_tcp_queue *queue = cmd->queue;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
.iov_base = &cmd->exp_ddgst + cmd->offset,
.iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
};
int ret;
......@@ -1096,7 +1096,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
}
if (queue->hdr_digest &&
nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) {
nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
nvmet_tcp_fatal_error(queue); /* fatal */
return -EPROTO;
}
......@@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_release_queue_work(struct work_struct *w)
{
struct page *page;
struct nvmet_tcp_queue *queue =
container_of(w, struct nvmet_tcp_queue, release_work);
......@@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
nvmet_tcp_free_crypto(queue);
ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
page = virt_to_head_page(queue->pf_cache.va);
__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
kfree(queue);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment