Commit b3bd86a0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.5-2023-07-14' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - Don't require quirk to use duplicate namespace identifiers
        (Christoph, Sagi)
      - One more BOGUS_NID quirk (Pankaj)
      - IO timeout and error hanlding fixes for PCI (Keith)
      - Enhanced metadata format mask fix (Ankit)
      - Association race condition fix for fibre channel (Michael)
      - Correct debugfs error checks (Minjie)
      - Use PAGE_SECTORS_SHIFT where needed (Damien)
      - Reduce kernel logs for legacy nguid attribute (Keith)
      - Use correct dma direction when unmapping metadata (Ming)

 - Fix for a flush handling regression in this release (Christoph)

 - Fix for batched request time stamping (Chengming)

 - Fix for a regression in the mq-deadline position calculation (Bart)

 - Lockdep fix for blk-crypto (Eric)

 - Fix for a regression in the Amiga partition handling changes
   (Michael)

* tag 'block-6.5-2023-07-14' of git://git.kernel.dk/linux:
  block: queue data commands from the flush state machine at the head
  blk-mq: fix start_time_ns and alloc_time_ns for pre-allocated rq
  nvme-pci: fix DMA direction of unmapping integrity data
  nvme: don't reject probe due to duplicate IDs for single-ported PCIe devices
  block/mq-deadline: Fix a bug in deadline_from_pos()
  nvme: ensure disabling pairs with unquiesce
  nvme-fc: fix race between error recovery and creating association
  nvme-fc: return non-zero status code when fails to create association
  nvme: fix parameter check in nvme_fault_inject_init()
  nvme: warn only once for legacy uuid attribute
  block: remove dead struc request->completion_data field
  nvme: fix the NVME_ID_NS_NVM_STS_MASK definition
  nvmet: use PAGE_SECTORS_SHIFT
  nvme: add BOGUS_NID quirk for Samsung SM953
  blk-crypto: use dynamic lock class for blk_crypto_profile::lock
  block/partition: fix signedness issue for Amiga partitions
parents ec17f164 9f87fc4d
......@@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
unsigned int slot_hashtable_size;
memset(profile, 0, sizeof(*profile));
init_rwsem(&profile->lock);
/*
* profile->lock of an underlying device can nest inside profile->lock
* of a device-mapper device, so use a dynamic lock class to avoid
* false-positive lockdep reports.
*/
lockdep_register_key(&profile->lockdep_key);
__init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key);
if (num_slots == 0)
return 0;
......@@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]),
GFP_KERNEL);
if (!profile->slots)
return -ENOMEM;
goto err_destroy;
profile->num_slots = num_slots;
......@@ -435,6 +442,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile)
{
if (!profile)
return;
lockdep_unregister_key(&profile->lockdep_key);
kvfree(profile->slot_hashtable);
kvfree_sensitive(profile->slots,
sizeof(profile->slots[0]) * profile->num_slots);
......
......@@ -189,7 +189,7 @@ static void blk_flush_complete_seq(struct request *rq,
case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
spin_lock(&q->requeue_lock);
list_add_tail(&rq->queuelist, &q->flush_list);
list_add(&rq->queuelist, &q->requeue_list);
spin_unlock(&q->requeue_lock);
blk_mq_kick_requeue_list(q);
break;
......
......@@ -328,8 +328,24 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
}
EXPORT_SYMBOL(blk_rq_init);
/* Set start and alloc time when the allocated request is actually used */
static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
{
if (blk_mq_need_time_stamp(rq))
rq->start_time_ns = ktime_get_ns();
else
rq->start_time_ns = 0;
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
if (blk_queue_rq_alloc_time(rq->q))
rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns;
else
rq->alloc_time_ns = 0;
#endif
}
static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
struct blk_mq_tags *tags, unsigned int tag, u64 alloc_time_ns)
struct blk_mq_tags *tags, unsigned int tag)
{
struct blk_mq_ctx *ctx = data->ctx;
struct blk_mq_hw_ctx *hctx = data->hctx;
......@@ -356,14 +372,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
}
rq->timeout = 0;
if (blk_mq_need_time_stamp(rq))
rq->start_time_ns = ktime_get_ns();
else
rq->start_time_ns = 0;
rq->part = NULL;
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
rq->alloc_time_ns = alloc_time_ns;
#endif
rq->io_start_time_ns = 0;
rq->stats_sectors = 0;
rq->nr_phys_segments = 0;
......@@ -393,8 +402,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
}
static inline struct request *
__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
u64 alloc_time_ns)
__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
{
unsigned int tag, tag_offset;
struct blk_mq_tags *tags;
......@@ -413,7 +421,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
tag = tag_offset + i;
prefetch(tags->static_rqs[tag]);
tag_mask &= ~(1UL << i);
rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns);
rq = blk_mq_rq_ctx_init(data, tags, tag);
rq_list_add(data->cached_rq, rq);
nr++;
}
......@@ -474,9 +482,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
* Try batched alloc if we want more than 1 tag.
*/
if (data->nr_tags > 1) {
rq = __blk_mq_alloc_requests_batch(data, alloc_time_ns);
if (rq)
rq = __blk_mq_alloc_requests_batch(data);
if (rq) {
blk_mq_rq_time_init(rq, alloc_time_ns);
return rq;
}
data->nr_tags = 1;
}
......@@ -499,8 +509,9 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
goto retry;
}
return blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag,
alloc_time_ns);
rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag);
blk_mq_rq_time_init(rq, alloc_time_ns);
return rq;
}
static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
......@@ -555,6 +566,7 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
return NULL;
plug->cached_rq = rq_list_next(rq);
blk_mq_rq_time_init(rq, 0);
}
rq->cmd_flags = opf;
......@@ -656,8 +668,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
tag = blk_mq_get_tag(&data);
if (tag == BLK_MQ_NO_TAG)
goto out_queue_exit;
rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag,
alloc_time_ns);
rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
blk_mq_rq_time_init(rq, alloc_time_ns);
rq->__data_len = 0;
rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL;
......@@ -2896,6 +2908,7 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
plug->cached_rq = rq_list_next(rq);
rq_qos_throttle(q, *bio);
blk_mq_rq_time_init(rq, 0);
rq->cmd_flags = (*bio)->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
return rq;
......
......@@ -176,7 +176,7 @@ static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
* zoned writes, start searching from the start of a zone.
*/
if (blk_rq_is_seq_zoned_write(rq))
pos -= round_down(pos, rq->q->limits.chunk_sectors);
pos = round_down(pos, rq->q->limits.chunk_sectors);
while (node) {
rq = rb_entry_rq(node);
......
......@@ -90,7 +90,7 @@ int amiga_partition(struct parsed_partitions *state)
}
blk = be32_to_cpu(rdb->rdb_PartitionList);
put_dev_sector(sect);
for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
for (part = 1; (s32) blk>0 && part<=16; part++, put_dev_sector(sect)) {
/* Read in terms partition table understands */
if (check_mul_overflow(blk, (sector_t) blksize, &blk)) {
pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n",
......
......@@ -3431,12 +3431,42 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
if (ret) {
dev_err(ctrl->device,
"globally duplicate IDs for nsid %d\n", info->nsid);
/*
* We've found two different namespaces on two different
* subsystems that report the same ID. This is pretty nasty
* for anything that actually requires unique device
* identification. In the kernel we need this for multipathing,
* and in user space the /dev/disk/by-id/ links rely on it.
*
* If the device also claims to be multi-path capable back off
* here now and refuse the probe the second device as this is a
* recipe for data corruption. If not this is probably a
* cheap consumer device if on the PCIe bus, so let the user
* proceed and use the shiny toy, but warn that with changing
* probing order (which due to our async probing could just be
* device taking longer to startup) the other device could show
* up at any time.
*/
nvme_print_device_info(ctrl);
if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
info->is_shared)) {
dev_err(ctrl->device,
"ignoring nsid %d because of duplicate IDs\n",
info->nsid);
return ret;
}
dev_err(ctrl->device,
"clearing duplicate IDs for nsid %d\n", info->nsid);
dev_err(ctrl->device,
"use of /dev/disk/by-id/ may cause data corruption\n");
memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
}
mutex_lock(&ctrl->subsys->lock);
head = nvme_find_ns_head(ctrl, info->nsid);
if (!head) {
......
......@@ -27,7 +27,7 @@ void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
/* create debugfs directory and attribute */
parent = debugfs_create_dir(dev_name, NULL);
if (!parent) {
if (IS_ERR(parent)) {
pr_warn("%s: failed to create debugfs directory\n", dev_name);
return;
}
......
......@@ -2548,14 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
* the controller. Abort any ios on the association and let the
* create_association error path resolve things.
*/
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
__nvme_fc_abort_outstanding_ios(ctrl, true);
enum nvme_ctrl_state state;
unsigned long flags;
spin_lock_irqsave(&ctrl->lock, flags);
state = ctrl->ctrl.state;
if (state == NVME_CTRL_CONNECTING) {
set_bit(ASSOC_FAILED, &ctrl->flags);
spin_unlock_irqrestore(&ctrl->lock, flags);
__nvme_fc_abort_outstanding_ios(ctrl, true);
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport error during (re)connect\n",
ctrl->cnum);
return;
}
spin_unlock_irqrestore(&ctrl->lock, flags);
/* Otherwise, only proceed if in LIVE state - e.g. on first error */
if (ctrl->ctrl.state != NVME_CTRL_LIVE)
if (state != NVME_CTRL_LIVE)
return;
dev_warn(ctrl->ctrl.device,
......@@ -3110,7 +3120,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
*/
ret = nvme_enable_ctrl(&ctrl->ctrl);
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
ret = -EIO;
if (ret)
goto out_disconnect_admin_queue;
ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
......@@ -3120,7 +3132,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
nvme_unquiesce_admin_queue(&ctrl->ctrl);
ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
ret = -EIO;
if (ret)
goto out_disconnect_admin_queue;
/* sanity checks */
......@@ -3165,10 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else
ret = nvme_fc_recreate_io_queues(ctrl);
}
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_term_aen_ops;
spin_lock_irqsave(&ctrl->lock, flags);
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
ret = -EIO;
if (ret) {
spin_unlock_irqrestore(&ctrl->lock, flags);
goto out_term_aen_ops;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
spin_unlock_irqrestore(&ctrl->lock, flags);
ctrl->ctrl.nr_reconnects = 0;
......@@ -3180,6 +3200,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
out_term_aen_ops:
nvme_fc_term_aen_ops(ctrl);
out_disconnect_admin_queue:
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
ctrl->cnum, ctrl->association_id, ret);
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
spin_lock_irqsave(&ctrl->lock, flags);
......
......@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req)->bv_len, rq_data_dir(req));
rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))
......@@ -1298,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
return BLK_EH_DONE;
goto disable;
}
/*
......@@ -1351,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
return BLK_EH_DONE;
goto disable;
}
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
......@@ -1391,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* as the device then is in a faulty state.
*/
return BLK_EH_RESET_TIMER;
disable:
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
return BLK_EH_DONE;
nvme_dev_disable(dev, false);
if (nvme_try_sched_reset(&dev->ctrl))
nvme_unquiesce_io_queues(&dev->ctrl);
return BLK_EH_DONE;
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
......@@ -3278,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
nvme_dev_disable(dev, true);
return PCI_ERS_RESULT_DISCONNECT;
}
nvme_dev_disable(dev, false);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
......@@ -3294,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
nvme_reset_ctrl(&dev->ctrl);
if (!nvme_try_sched_reset(&dev->ctrl))
nvme_unquiesce_io_queues(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
......@@ -3396,6 +3405,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */
......
......@@ -92,7 +92,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
* we have no UUID set
*/
if (uuid_is_null(&ids->uuid)) {
dev_warn_ratelimited(dev,
dev_warn_once(dev,
"No UUID available providing old NGUID\n");
return sysfs_emit(buf, "%pU\n", ids->nguid);
}
......
......@@ -373,7 +373,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
goto out_cleanup_tagset;
ctrl->ctrl.max_hw_sectors =
(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
(NVME_LOOP_MAX_SEGMENTS - 1) << PAGE_SECTORS_SHIFT;
nvme_unquiesce_admin_queue(&ctrl->ctrl);
......
......@@ -102,14 +102,14 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
* which depends on the host's memory fragementation. To solve this,
* ensure mdts is limited to the pages equal to the number of segments.
*/
max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
max_hw_sectors = min_not_zero(pctrl->max_segments << PAGE_SECTORS_SHIFT,
pctrl->max_hw_sectors);
/*
* nvmet_passthru_map_sg is limitted to using a single bio so limit
* the mdts based on BIO_MAX_VECS as well
*/
max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9),
max_hw_sectors = min_not_zero(BIO_MAX_VECS << PAGE_SECTORS_SHIFT,
max_hw_sectors);
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
......
......@@ -111,6 +111,7 @@ struct blk_crypto_profile {
* keyslots while ensuring that they can't be changed concurrently.
*/
struct rw_semaphore lock;
struct lock_class_key lockdep_key;
/* List of idle slots, with least recently used slot at front */
wait_queue_head_t idle_slots_wait_queue;
......
......@@ -158,13 +158,13 @@ struct request {
/*
* The rb_node is only used inside the io scheduler, requests
* are pruned when moved to the dispatch queue. So let the
* completion_data share space with the rb_node.
* are pruned when moved to the dispatch queue. special_vec must
* only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be
* insert into an IO scheduler.
*/
union {
struct rb_node rb_node; /* sort/lookup */
struct bio_vec special_vec;
void *completion_data;
};
/*
......
......@@ -473,7 +473,7 @@ struct nvme_id_ns_nvm {
};
enum {
NVME_ID_NS_NVM_STS_MASK = 0x3f,
NVME_ID_NS_NVM_STS_MASK = 0x7f,
NVME_ID_NS_NVM_GUARD_SHIFT = 7,
NVME_ID_NS_NVM_GUARD_MASK = 0x3,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment