Commit 0d18c12b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.14-2021-07-16' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe fixes via Christoph:
      - fix various races in nvme-pci when shutting down just after
        probing (Casey Chen)
      - fix a net_device leak in nvme-tcp (Prabhakar Kushwaha)

 - Fix regression in xen-blkfront by cleaning up the removal state
   machine (Christoph)

 - Fix tag_set and queue cleanup ordering regression in nbd (Wang)

 - Fix tag_set and queue cleanup ordering regression in pd (Guoqing)

* tag 'block-5.14-2021-07-16' of git://git.kernel.dk/linux-block:
  xen-blkfront: sanitize the removal state machine
  nbd: fix order of cleaning up the queue and freeing the tagset
  pd: fix order of cleaning up the queue and freeing the tagset
  nvme-pci: do not call nvme_dev_remove_admin from nvme_remove
  nvme-pci: fix multiple races in nvme_setup_io_queues
  nvme-tcp: use __dev_get_by_name instead dev_get_by_name for OPT_HOST_IFACE
parents 13fdaf04 05d69d95
...@@ -239,8 +239,8 @@ static void nbd_dev_remove(struct nbd_device *nbd) ...@@ -239,8 +239,8 @@ static void nbd_dev_remove(struct nbd_device *nbd)
if (disk) { if (disk) {
del_gendisk(disk); del_gendisk(disk);
blk_mq_free_tag_set(&nbd->tag_set);
blk_cleanup_disk(disk); blk_cleanup_disk(disk);
blk_mq_free_tag_set(&nbd->tag_set);
} }
/* /*
......
...@@ -1014,8 +1014,8 @@ static void __exit pd_exit(void) ...@@ -1014,8 +1014,8 @@ static void __exit pd_exit(void)
if (p) { if (p) {
disk->gd = NULL; disk->gd = NULL;
del_gendisk(p); del_gendisk(p);
blk_mq_free_tag_set(&disk->tag_set);
blk_cleanup_disk(p); blk_cleanup_disk(p);
blk_mq_free_tag_set(&disk->tag_set);
pi_release(disk->pi); pi_release(disk->pi);
} }
} }
......
...@@ -502,34 +502,21 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) ...@@ -502,34 +502,21 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
static int blkif_ioctl(struct block_device *bdev, fmode_t mode, static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
unsigned command, unsigned long argument) unsigned command, unsigned long argument)
{ {
struct blkfront_info *info = bdev->bd_disk->private_data;
int i; int i;
dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
command, (long)argument);
switch (command) { switch (command) {
case CDROMMULTISESSION: case CDROMMULTISESSION:
dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++) for (i = 0; i < sizeof(struct cdrom_multisession); i++)
if (put_user(0, (char __user *)(argument + i))) if (put_user(0, (char __user *)(argument + i)))
return -EFAULT; return -EFAULT;
return 0; return 0;
case CDROM_GET_CAPABILITY:
case CDROM_GET_CAPABILITY: { if (bdev->bd_disk->flags & GENHD_FL_CD)
struct gendisk *gd = info->gd;
if (gd->flags & GENHD_FL_CD)
return 0; return 0;
return -EINVAL; return -EINVAL;
}
default: default:
/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", return -EINVAL;
command);*/
return -EINVAL; /* same return as native Linux */
} }
return 0;
} }
static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
...@@ -1177,36 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, ...@@ -1177,36 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
return err; return err;
} }
static void xlvbd_release_gendisk(struct blkfront_info *info)
{
unsigned int minor, nr_minors, i;
struct blkfront_ring_info *rinfo;
if (info->rq == NULL)
return;
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
for_each_rinfo(info, rinfo, i) {
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&rinfo->callback);
/* Flush gnttab callback work. Must be done with no locks held. */
flush_work(&rinfo->work);
}
del_gendisk(info->gd);
minor = info->gd->first_minor;
nr_minors = info->gd->minors;
xlbd_release_minors(minor, nr_minors);
blk_cleanup_disk(info->gd);
info->gd = NULL;
blk_mq_free_tag_set(&info->tag_set);
}
/* Already hold rinfo->ring_lock. */ /* Already hold rinfo->ring_lock. */
static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo) static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
{ {
...@@ -1756,12 +1713,6 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt, ...@@ -1756,12 +1713,6 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt,
return err; return err;
} }
static void free_info(struct blkfront_info *info)
{
list_del(&info->info_list);
kfree(info);
}
/* Common code used when first setting up, and when resuming. */ /* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev, static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info) struct blkfront_info *info)
...@@ -1880,13 +1831,6 @@ static int talk_to_blkback(struct xenbus_device *dev, ...@@ -1880,13 +1831,6 @@ static int talk_to_blkback(struct xenbus_device *dev,
xenbus_dev_fatal(dev, err, "%s", message); xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring: destroy_blkring:
blkif_free(info, 0); blkif_free(info, 0);
mutex_lock(&blkfront_mutex);
free_info(info);
mutex_unlock(&blkfront_mutex);
dev_set_drvdata(&dev->dev, NULL);
return err; return err;
} }
...@@ -2126,38 +2070,26 @@ static int blkfront_resume(struct xenbus_device *dev) ...@@ -2126,38 +2070,26 @@ static int blkfront_resume(struct xenbus_device *dev)
static void blkfront_closing(struct blkfront_info *info) static void blkfront_closing(struct blkfront_info *info)
{ {
struct xenbus_device *xbdev = info->xbdev; struct xenbus_device *xbdev = info->xbdev;
struct block_device *bdev = NULL; struct blkfront_ring_info *rinfo;
unsigned int i;
mutex_lock(&info->mutex);
if (xbdev->state == XenbusStateClosing) { if (xbdev->state == XenbusStateClosing)
mutex_unlock(&info->mutex);
return; return;
}
if (info->gd) /* No more blkif_request(). */
bdev = bdgrab(info->gd->part0); blk_mq_stop_hw_queues(info->rq);
blk_set_queue_dying(info->rq);
set_capacity(info->gd, 0);
mutex_unlock(&info->mutex); for_each_rinfo(info, rinfo, i) {
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&rinfo->callback);
if (!bdev) { /* Flush gnttab callback work. Must be done with no locks held. */
xenbus_frontend_closed(xbdev); flush_work(&rinfo->work);
return;
} }
mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers) {
xenbus_dev_error(xbdev, -EBUSY,
"Device in use; refusing to close");
xenbus_switch_state(xbdev, XenbusStateClosing);
} else {
xlvbd_release_gendisk(info);
xenbus_frontend_closed(xbdev); xenbus_frontend_closed(xbdev);
}
mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(bdev);
} }
static void blkfront_setup_discard(struct blkfront_info *info) static void blkfront_setup_discard(struct blkfront_info *info)
...@@ -2472,7 +2404,6 @@ static void blkback_changed(struct xenbus_device *dev, ...@@ -2472,7 +2404,6 @@ static void blkback_changed(struct xenbus_device *dev,
break; break;
fallthrough; fallthrough;
case XenbusStateClosing: case XenbusStateClosing:
if (info)
blkfront_closing(info); blkfront_closing(info);
break; break;
} }
...@@ -2481,56 +2412,21 @@ static void blkback_changed(struct xenbus_device *dev, ...@@ -2481,56 +2412,21 @@ static void blkback_changed(struct xenbus_device *dev,
static int blkfront_remove(struct xenbus_device *xbdev) static int blkfront_remove(struct xenbus_device *xbdev)
{ {
struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
struct block_device *bdev = NULL;
struct gendisk *disk;
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
if (!info) del_gendisk(info->gd);
return 0;
blkif_free(info, 0);
mutex_lock(&info->mutex);
disk = info->gd;
if (disk)
bdev = bdgrab(disk->part0);
info->xbdev = NULL;
mutex_unlock(&info->mutex);
if (!bdev) {
mutex_lock(&blkfront_mutex);
free_info(info);
mutex_unlock(&blkfront_mutex);
return 0;
}
/*
* The xbdev was removed before we reached the Closed
* state. See if it's safe to remove the disk. If the bdev
* isn't closed yet, we let release take care of it.
*/
mutex_lock(&disk->open_mutex);
info = disk->private_data;
dev_warn(disk_to_dev(disk),
"%s was hot-unplugged, %d stale handles\n",
xbdev->nodename, bdev->bd_openers);
if (info && !bdev->bd_openers) {
xlvbd_release_gendisk(info);
disk->private_data = NULL;
mutex_lock(&blkfront_mutex); mutex_lock(&blkfront_mutex);
free_info(info); list_del(&info->info_list);
mutex_unlock(&blkfront_mutex); mutex_unlock(&blkfront_mutex);
}
mutex_unlock(&disk->open_mutex); blkif_free(info, 0);
bdput(bdev); xlbd_release_minors(info->gd->first_minor, info->gd->minors);
blk_cleanup_disk(info->gd);
blk_mq_free_tag_set(&info->tag_set);
kfree(info);
return 0; return 0;
} }
...@@ -2541,77 +2437,9 @@ static int blkfront_is_ready(struct xenbus_device *dev) ...@@ -2541,77 +2437,9 @@ static int blkfront_is_ready(struct xenbus_device *dev)
return info->is_ready && info->xbdev; return info->is_ready && info->xbdev;
} }
static int blkif_open(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
struct blkfront_info *info;
int err = 0;
mutex_lock(&blkfront_mutex);
info = disk->private_data;
if (!info) {
/* xbdev gone */
err = -ERESTARTSYS;
goto out;
}
mutex_lock(&info->mutex);
if (!info->gd)
/* xbdev is closed */
err = -ERESTARTSYS;
mutex_unlock(&info->mutex);
out:
mutex_unlock(&blkfront_mutex);
return err;
}
static void blkif_release(struct gendisk *disk, fmode_t mode)
{
struct blkfront_info *info = disk->private_data;
struct xenbus_device *xbdev;
mutex_lock(&blkfront_mutex);
if (disk->part0->bd_openers)
goto out_mutex;
/*
* Check if we have been instructed to close. We will have
* deferred this request, because the bdev was still open.
*/
mutex_lock(&info->mutex);
xbdev = info->xbdev;
if (xbdev && xbdev->state == XenbusStateClosing) {
/* pending switch to state closed */
dev_info(disk_to_dev(disk), "releasing disk\n");
xlvbd_release_gendisk(info);
xenbus_frontend_closed(info->xbdev);
}
mutex_unlock(&info->mutex);
if (!xbdev) {
/* sudden device removal */
dev_info(disk_to_dev(disk), "releasing disk\n");
xlvbd_release_gendisk(info);
disk->private_data = NULL;
free_info(info);
}
out_mutex:
mutex_unlock(&blkfront_mutex);
}
static const struct block_device_operations xlvbd_block_fops = static const struct block_device_operations xlvbd_block_fops =
{ {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = blkif_open,
.release = blkif_release,
.getgeo = blkif_getgeo, .getgeo = blkif_getgeo,
.ioctl = blkif_ioctl, .ioctl = blkif_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl, .compat_ioctl = blkdev_compat_ptr_ioctl,
......
...@@ -1554,6 +1554,28 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) ...@@ -1554,6 +1554,28 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
wmb(); /* ensure the first interrupt sees the initialization */ wmb(); /* ensure the first interrupt sees the initialization */
} }
/*
* Try getting shutdown_lock while setting up IO queues.
*/
static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
{
/*
* Give up if the lock is being held by nvme_dev_disable.
*/
if (!mutex_trylock(&dev->shutdown_lock))
return -ENODEV;
/*
* Controller is in wrong state, fail early.
*/
if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
mutex_unlock(&dev->shutdown_lock);
return -ENODEV;
}
return 0;
}
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
{ {
struct nvme_dev *dev = nvmeq->dev; struct nvme_dev *dev = nvmeq->dev;
...@@ -1582,8 +1604,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) ...@@ -1582,8 +1604,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
goto release_cq; goto release_cq;
nvmeq->cq_vector = vector; nvmeq->cq_vector = vector;
nvme_init_queue(nvmeq, qid);
result = nvme_setup_io_queues_trylock(dev);
if (result)
return result;
nvme_init_queue(nvmeq, qid);
if (!polled) { if (!polled) {
result = queue_request_irq(nvmeq); result = queue_request_irq(nvmeq);
if (result < 0) if (result < 0)
...@@ -1591,10 +1616,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) ...@@ -1591,10 +1616,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
} }
set_bit(NVMEQ_ENABLED, &nvmeq->flags); set_bit(NVMEQ_ENABLED, &nvmeq->flags);
mutex_unlock(&dev->shutdown_lock);
return result; return result;
release_sq: release_sq:
dev->online_queues--; dev->online_queues--;
mutex_unlock(&dev->shutdown_lock);
adapter_delete_sq(dev, qid); adapter_delete_sq(dev, qid);
release_cq: release_cq:
adapter_delete_cq(dev, qid); adapter_delete_cq(dev, qid);
...@@ -2167,7 +2194,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2167,7 +2194,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0) if (nr_io_queues == 0)
return 0; return 0;
clear_bit(NVMEQ_ENABLED, &adminq->flags); /*
* Free IRQ resources as soon as NVMEQ_ENABLED bit transitions
* from set to unset. If there is a window to it is truely freed,
* pci_free_irq_vectors() jumping into this window will crash.
* And take lock to avoid racing with pci_free_irq_vectors() in
* nvme_dev_disable() path.
*/
result = nvme_setup_io_queues_trylock(dev);
if (result)
return result;
if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
pci_free_irq(pdev, 0, adminq);
if (dev->cmb_use_sqes) { if (dev->cmb_use_sqes) {
result = nvme_cmb_qdepth(dev, nr_io_queues, result = nvme_cmb_qdepth(dev, nr_io_queues,
...@@ -2183,13 +2221,16 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2183,13 +2221,16 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
result = nvme_remap_bar(dev, size); result = nvme_remap_bar(dev, size);
if (!result) if (!result)
break; break;
if (!--nr_io_queues) if (!--nr_io_queues) {
return -ENOMEM; result = -ENOMEM;
goto out_unlock;
}
} while (1); } while (1);
adminq->q_db = dev->dbs; adminq->q_db = dev->dbs;
retry: retry:
/* Deregister the admin queue's interrupt */ /* Deregister the admin queue's interrupt */
if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
pci_free_irq(pdev, 0, adminq); pci_free_irq(pdev, 0, adminq);
/* /*
...@@ -2199,8 +2240,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2199,8 +2240,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
pci_free_irq_vectors(pdev); pci_free_irq_vectors(pdev);
result = nvme_setup_irqs(dev, nr_io_queues); result = nvme_setup_irqs(dev, nr_io_queues);
if (result <= 0) if (result <= 0) {
return -EIO; result = -EIO;
goto out_unlock;
}
dev->num_vecs = result; dev->num_vecs = result;
result = max(result - 1, 1); result = max(result - 1, 1);
...@@ -2214,8 +2257,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2214,8 +2257,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/ */
result = queue_request_irq(adminq); result = queue_request_irq(adminq);
if (result) if (result)
return result; goto out_unlock;
set_bit(NVMEQ_ENABLED, &adminq->flags); set_bit(NVMEQ_ENABLED, &adminq->flags);
mutex_unlock(&dev->shutdown_lock);
result = nvme_create_io_queues(dev); result = nvme_create_io_queues(dev);
if (result || dev->online_queues < 2) if (result || dev->online_queues < 2)
...@@ -2224,6 +2268,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2224,6 +2268,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (dev->online_queues - 1 < dev->max_qid) { if (dev->online_queues - 1 < dev->max_qid) {
nr_io_queues = dev->online_queues - 1; nr_io_queues = dev->online_queues - 1;
nvme_disable_io_queues(dev); nvme_disable_io_queues(dev);
result = nvme_setup_io_queues_trylock(dev);
if (result)
return result;
nvme_suspend_io_queues(dev); nvme_suspend_io_queues(dev);
goto retry; goto retry;
} }
...@@ -2232,6 +2279,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2232,6 +2279,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->io_queues[HCTX_TYPE_READ], dev->io_queues[HCTX_TYPE_READ],
dev->io_queues[HCTX_TYPE_POLL]); dev->io_queues[HCTX_TYPE_POLL]);
return 0; return 0;
out_unlock:
mutex_unlock(&dev->shutdown_lock);
return result;
} }
static void nvme_del_queue_end(struct request *req, blk_status_t error) static void nvme_del_queue_end(struct request *req, blk_status_t error)
...@@ -2962,7 +3012,6 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2962,7 +3012,6 @@ static void nvme_remove(struct pci_dev *pdev)
if (!pci_device_is_present(pdev)) { if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
nvme_dev_disable(dev, true); nvme_dev_disable(dev, true);
nvme_dev_remove_admin(dev);
} }
flush_work(&dev->ctrl.reset_work); flush_work(&dev->ctrl.reset_work);
......
...@@ -123,7 +123,6 @@ struct nvme_tcp_ctrl { ...@@ -123,7 +123,6 @@ struct nvme_tcp_ctrl {
struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set admin_tag_set;
struct sockaddr_storage addr; struct sockaddr_storage addr;
struct sockaddr_storage src_addr; struct sockaddr_storage src_addr;
struct net_device *ndev;
struct nvme_ctrl ctrl; struct nvme_ctrl ctrl;
struct work_struct err_work; struct work_struct err_work;
...@@ -2533,8 +2532,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, ...@@ -2533,8 +2532,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
} }
if (opts->mask & NVMF_OPT_HOST_IFACE) { if (opts->mask & NVMF_OPT_HOST_IFACE) {
ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface); if (!__dev_get_by_name(&init_net, opts->host_iface)) {
if (!ctrl->ndev) {
pr_err("invalid interface passed: %s\n", pr_err("invalid interface passed: %s\n",
opts->host_iface); opts->host_iface);
ret = -ENODEV; ret = -ENODEV;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment