Commit b7405176 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.18-2' of git://git.infradead.org/nvme into for-4.18/block

Pull NVMe changes from Christoph:

"Here is the current batch of nvme updates for 4.18, we have a few more
 patches in the queue, but I'd like to get this pile into your tree
 and linux-next ASAP.

 The biggest item is support for file-backed namespaces in the NVMe
 target from Chaitanya, in addition to that we mostly small fixes from
 all the usual suspects."

* 'nvme-4.18-2' of git://git.infradead.org/nvme:
  nvme: fixup memory leak in nvme_init_identify()
  nvme: fix KASAN warning when parsing host nqn
  nvmet-loop: use nr_phys_segments when map rq to sgl
  nvmet-fc: increase LS buffer count per fc port
  nvmet: add simple file backed ns support
  nvmet: remove duplicate NULL initialization for req->ns
  nvmet: make a few error messages more generic
  nvme-fabrics: allow duplicate connections to the discovery controller
  nvme-fabrics: centralize discovery controller defaults
  nvme-fabrics: remove unnecessary controller subnqn validation
  nvme-fc: remove setting DNR on exception conditions
  nvme-rdma: stop admin queue before freeing it
  nvme-pci: Fix AER reset handling
  nvme-pci: set nvmeq->cq_vector after alloc cq/sq
  nvme: host: core: fix precedence of ternary operator
  nvme: fix lockdep warning in nvme_mpath_clear_current_path
parents 5afb7835 75c8b19a
......@@ -1578,7 +1578,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key,
static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
enum pr_type type, bool abort)
{
u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
}
......@@ -1590,7 +1590,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key)
static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
}
......@@ -2184,7 +2184,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
if (!ctrl->opts->discovery_nqn &&
nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
found->subnqn);
......@@ -2315,7 +2316,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
ret = nvme_get_effects_log(ctrl);
if (ret < 0)
return ret;
goto out_free;
}
if (!ctrl->identified) {
......
......@@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
goto out_unlock;
kref_init(&host->ref);
memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
......@@ -689,10 +689,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->discovery_nqn =
!(strcmp(opts->subsysnqn,
NVME_DISC_SUBSYS_NAME));
if (opts->discovery_nqn) {
opts->kato = 0;
opts->nr_io_queues = 0;
}
break;
case NVMF_OPT_TRADDR:
p = match_strdup(args);
......@@ -851,6 +847,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
}
if (opts->discovery_nqn) {
opts->kato = 0;
opts->nr_io_queues = 0;
opts->duplicate_connect = true;
}
if (ctrl_loss_tmo < 0)
opts->max_reconnects = -1;
else
......@@ -983,16 +984,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_module_put;
}
if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) {
dev_warn(ctrl->device,
"controller returned incorrect NQN: \"%s\".\n",
ctrl->subsys->subnqn);
module_put(ops->module);
up_read(&nvmf_transports_rwsem);
nvme_delete_ctrl_sync(ctrl);
return ERR_PTR(-EINVAL);
}
module_put(ops->module);
up_read(&nvmf_transports_rwsem);
return ctrl;
......
......@@ -1686,16 +1686,6 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
goto check_error;
}
/*
* Force failures of commands if we're killing the controller
* or have an error on a command used to create an new association
*/
if (status &&
(blk_queue_dying(rq->q) ||
ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_CONNECTING))
status |= cpu_to_le16(NVME_SC_DNR << 1);
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
nvme_end_request(rq, status, result);
......
......@@ -22,6 +22,7 @@
#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
......@@ -449,7 +450,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
if (head && ns == srcu_dereference(head->current_path, &head->srcu))
if (head && ns == rcu_access_pointer(head->current_path))
rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
......
......@@ -1076,7 +1076,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
}
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
struct nvme_queue *nvmeq)
struct nvme_queue *nvmeq, s16 vector)
{
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
......@@ -1091,7 +1091,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cqid = cpu_to_le16(qid);
c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
c.create_cq.cq_flags = cpu_to_le16(flags);
c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
c.create_cq.irq_vector = cpu_to_le16(vector);
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
......@@ -1462,6 +1462,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
{
struct nvme_dev *dev = nvmeq->dev;
int result;
s16 vector;
if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
......@@ -1474,15 +1475,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
*/
nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid;
result = adapter_alloc_cq(dev, qid, nvmeq);
vector = dev->num_vecs == 1 ? 0 : qid;
result = adapter_alloc_cq(dev, qid, nvmeq, vector);
if (result < 0)
goto release_vector;
goto out;
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
goto release_cq;
/*
* Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will
* invoke free_irq for it and cause a 'Trying to free already-free IRQ
* xxx' warning if the create CQ/SQ command times out.
*/
nvmeq->cq_vector = vector;
nvme_init_queue(nvmeq, qid);
result = queue_request_irq(nvmeq);
if (result < 0)
......@@ -1490,13 +1497,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
release_sq:
release_sq:
nvmeq->cq_vector = -1;
dev->online_queues--;
adapter_delete_sq(dev, qid);
release_cq:
release_cq:
adapter_delete_cq(dev, qid);
release_vector:
nvmeq->cq_vector = -1;
out:
return result;
}
......@@ -2695,19 +2702,15 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
nvme_reset_ctrl_sync(&dev->ctrl);
switch (dev->ctrl.state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
return PCI_ERS_RESULT_RECOVERED;
default:
return PCI_ERS_RESULT_DISCONNECT;
}
nvme_reset_ctrl(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
static void nvme_error_resume(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
flush_work(&dev->ctrl.reset_work);
pci_cleanup_aer_uncorrect_error_status(pdev);
}
......
......@@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
goto out_cleanup_queue;
goto out_stop_queue;
}
ctrl->ctrl.sqsize =
......@@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
goto out_cleanup_queue;
goto out_stop_queue;
ctrl->ctrl.max_hw_sectors =
(ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
error = nvme_init_identify(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
goto out_stop_queue;
error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
&ctrl->async_event_sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
if (error)
goto out_cleanup_queue;
goto out_stop_queue;
return 0;
out_stop_queue:
nvme_rdma_stop_queue(&ctrl->queues[0]);
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->ctrl.admin_q);
......
......@@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o
obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o
obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
discovery.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
nvme-loop-y += loop.o
nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
......
......@@ -45,6 +45,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
return NVME_SC_INVALID_NS;
}
/* we don't have the right data for file backed ns */
if (!ns->bdev)
goto out;
host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
......@@ -54,6 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
put_unaligned_le64(host_writes, &slog->host_writes[0]);
put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
out:
nvmet_put_namespace(ns);
return NVME_SC_SUCCESS;
......@@ -71,6 +76,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
/* we don't have the right data for file backed ns */
if (!ns->bdev)
continue;
host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
data_units_read +=
part_stat_read(ns->bdev->bd_part, sectors[READ]);
......@@ -548,8 +556,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
struct nvme_command *cmd = req->cmd;
u16 ret;
req->ns = NULL;
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
......
......@@ -271,6 +271,12 @@ void nvmet_put_namespace(struct nvmet_ns *ns)
percpu_ref_put(&ns->ref);
}
static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
{
nvmet_bdev_ns_disable(ns);
nvmet_file_ns_disable(ns);
}
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
......@@ -281,23 +287,16 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
if (ns->enabled)
goto out_unlock;
ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
NULL);
if (IS_ERR(ns->bdev)) {
pr_err("failed to open block device %s: (%ld)\n",
ns->device_path, PTR_ERR(ns->bdev));
ret = PTR_ERR(ns->bdev);
ns->bdev = NULL;
ret = nvmet_bdev_ns_enable(ns);
if (ret)
ret = nvmet_file_ns_enable(ns);
if (ret)
goto out_unlock;
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
0, GFP_KERNEL);
if (ret)
goto out_blkdev_put;
goto out_dev_put;
if (ns->nsid > subsys->max_nsid)
subsys->max_nsid = ns->nsid;
......@@ -328,9 +327,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
out_blkdev_put:
blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
ns->bdev = NULL;
out_dev_put:
nvmet_ns_dev_disable(ns);
goto out_unlock;
}
......@@ -366,8 +364,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
if (ns->bdev)
blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
nvmet_ns_dev_disable(ns);
out_unlock:
mutex_unlock(&subsys->lock);
}
......@@ -499,6 +496,25 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
u16 ret;
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns))
return NVME_SC_INVALID_NS | NVME_SC_DNR;
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
else
return nvmet_bdev_parse_io_cmd(req);
}
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
{
......@@ -710,15 +726,14 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
{
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
cmd->common.opcode, req->sq->qid);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
cmd->common.opcode, req->sq->qid);
req->ns = NULL;
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
return 0;
......
......@@ -187,8 +187,6 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while not ready\n",
cmd->common.opcode);
......
......@@ -77,8 +77,6 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
req->data_len = 0;
......@@ -242,8 +240,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
if (cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
......
......@@ -31,7 +31,7 @@
/* *************************** Data Structures/Defines ****************** */
#define NVMET_LS_CTX_COUNT 4
#define NVMET_LS_CTX_COUNT 256
/* for this implementation, assume small single frame rqst/rsp */
#define NVME_FC_MAX_LS_BUFFER_SIZE 2048
......
......@@ -16,6 +16,34 @@
#include <linux/module.h>
#include "nvmet.h"
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
ns->bdev = blkdev_get_by_path(ns->device_path,
FMODE_READ | FMODE_WRITE, NULL);
if (IS_ERR(ns->bdev)) {
ret = PTR_ERR(ns->bdev);
if (ret != -ENOTBLK) {
pr_err("failed to open block device %s: (%ld)\n",
ns->device_path, PTR_ERR(ns->bdev));
}
ns->bdev = NULL;
return ret;
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
return 0;
}
void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
{
if (ns->bdev) {
blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
ns->bdev = NULL;
}
}
static void nvmet_bio_done(struct bio *bio)
{
struct nvmet_req *req = bio->bi_private;
......@@ -23,20 +51,14 @@ static void nvmet_bio_done(struct bio *bio)
nvmet_req_complete(req,
bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
if (bio != &req->inline_bio)
if (bio != &req->b.inline_bio)
bio_put(bio);
}
static inline u32 nvmet_rw_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
static void nvmet_execute_rw(struct nvmet_req *req)
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
struct bio *bio = &req->inline_bio;
struct bio *bio = &req->b.inline_bio;
struct scatterlist *sg;
sector_t sector;
blk_qc_t cookie;
......@@ -89,9 +111,9 @@ static void nvmet_execute_rw(struct nvmet_req *req)
blk_poll(bdev_get_queue(req->ns->bdev), cookie);
}
static void nvmet_execute_flush(struct nvmet_req *req)
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->inline_bio;
struct bio *bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
bio_set_dev(bio, req->ns->bdev);
......@@ -102,7 +124,7 @@ static void nvmet_execute_flush(struct nvmet_req *req)
submit_bio(bio);
}
static u16 nvmet_discard_range(struct nvmet_ns *ns,
static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
int ret;
......@@ -116,7 +138,7 @@ static u16 nvmet_discard_range(struct nvmet_ns *ns,
return 0;
}
static void nvmet_execute_discard(struct nvmet_req *req)
static void nvmet_bdev_execute_discard(struct nvmet_req *req)
{
struct nvme_dsm_range range;
struct bio *bio = NULL;
......@@ -129,7 +151,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
if (status)
break;
status = nvmet_discard_range(req->ns, &range, &bio);
status = nvmet_bdev_discard_range(req->ns, &range, &bio);
if (status)
break;
}
......@@ -148,11 +170,11 @@ static void nvmet_execute_discard(struct nvmet_req *req)
}
}
static void nvmet_execute_dsm(struct nvmet_req *req)
static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
nvmet_execute_discard(req);
nvmet_bdev_execute_discard(req);
return;
case NVME_DSMGMT_IDR:
case NVME_DSMGMT_IDW:
......@@ -163,7 +185,7 @@ static void nvmet_execute_dsm(struct nvmet_req *req)
}
}
static void nvmet_execute_write_zeroes(struct nvmet_req *req)
static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
{
struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
struct bio *bio = NULL;
......@@ -189,38 +211,27 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
}
}
u16 nvmet_parse_io_cmd(struct nvmet_req *req)
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
u16 ret;
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret)) {
req->ns = NULL;
return ret;
}
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns))
return NVME_SC_INVALID_NS | NVME_SC_DNR;
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_execute_rw;
req->execute = nvmet_bdev_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_execute_flush;
req->execute = nvmet_bdev_execute_flush;
req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_execute_dsm;
req->execute = nvmet_bdev_execute_dsm;
req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_execute_write_zeroes;
req->execute = nvmet_bdev_execute_write_zeroes;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
......
// SPDX-License-Identifier: GPL-2.0
/*
* NVMe Over Fabrics Target File I/O commands implementation.
* Copyright (c) 2017-2018 Western Digital Corporation or its
* affiliates.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/uio.h>
#include <linux/falloc.h>
#include <linux/file.h>
#include "nvmet.h"
#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16
void nvmet_file_ns_disable(struct nvmet_ns *ns)
{
if (ns->file) {
mempool_destroy(ns->bvec_pool);
ns->bvec_pool = NULL;
kmem_cache_destroy(ns->bvec_cache);
ns->bvec_cache = NULL;
fput(ns->file);
ns->file = NULL;
}
}
int nvmet_file_ns_enable(struct nvmet_ns *ns)
{
int ret;
struct kstat stat;
ns->file = filp_open(ns->device_path,
O_RDWR | O_LARGEFILE | O_DIRECT, 0);
if (IS_ERR(ns->file)) {
pr_err("failed to open file %s: (%ld)\n",
ns->device_path, PTR_ERR(ns->bdev));
return PTR_ERR(ns->file);
}
ret = vfs_getattr(&ns->file->f_path,
&stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
if (ret)
goto err;
ns->size = stat.size;
ns->blksize_shift = file_inode(ns->file)->i_blkbits;
ns->bvec_cache = kmem_cache_create("nvmet-bvec",
NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ns->bvec_cache)
goto err;
ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
mempool_free_slab, ns->bvec_cache);
if (!ns->bvec_pool)
goto err;
return ret;
err:
ns->size = 0;
ns->blksize_shift = 0;
nvmet_file_ns_disable(ns);
return ret;
}
static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
{
bv->bv_page = sg_page_iter_page(iter);
bv->bv_offset = iter->sg->offset;
bv->bv_len = PAGE_SIZE - iter->sg->offset;
}
static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
unsigned long nr_segs, size_t count)
{
struct kiocb *iocb = &req->f.iocb;
ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
struct iov_iter iter;
int ki_flags = 0, rw;
ssize_t ret;
if (req->cmd->rw.opcode == nvme_cmd_write) {
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
ki_flags = IOCB_DSYNC;
call_iter = req->ns->file->f_op->write_iter;
rw = WRITE;
} else {
call_iter = req->ns->file->f_op->read_iter;
rw = READ;
}
iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
iocb->ki_pos = pos;
iocb->ki_filp = req->ns->file;
iocb->ki_flags = IOCB_DIRECT | ki_flags;
ret = call_iter(iocb, &iter);
if (ret != -EIOCBQUEUED && iocb->ki_complete)
iocb->ki_complete(iocb, ret, 0);
return ret;
}
static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
{
struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
if (req->f.bvec != req->inline_bvec) {
if (likely(req->f.mpool_alloc == false))
kfree(req->f.bvec);
else
mempool_free(req->f.bvec, req->ns->bvec_pool);
}
nvmet_req_complete(req, ret != req->data_len ?
NVME_SC_INTERNAL | NVME_SC_DNR : 0);
}
static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
struct sg_page_iter sg_pg_iter;
unsigned long bv_cnt = 0;
bool is_sync = false;
size_t len = 0, total_len = 0;
ssize_t ret = 0;
loff_t pos;
if (!req->sg_cnt || !nr_bvec) {
nvmet_req_complete(req, 0);
return;
}
if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
GFP_KERNEL);
else
req->f.bvec = req->inline_bvec;
req->f.mpool_alloc = false;
if (unlikely(!req->f.bvec)) {
/* fallback under memory pressure */
req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
req->f.mpool_alloc = true;
if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
is_sync = true;
}
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
memset(&req->f.iocb, 0, sizeof(struct kiocb));
for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
len += req->f.bvec[bv_cnt].bv_len;
total_len += req->f.bvec[bv_cnt].bv_len;
bv_cnt++;
WARN_ON_ONCE((nr_bvec - 1) < 0);
if (unlikely(is_sync) &&
(nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
if (ret < 0)
goto out;
pos += len;
bv_cnt = 0;
len = 0;
}
nr_bvec--;
}
if (WARN_ON_ONCE(total_len != req->data_len))
ret = -EIO;
out:
if (unlikely(is_sync || ret)) {
nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
return;
}
req->f.iocb.ki_complete = nvmet_file_io_done;
nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
}
static void nvmet_file_flush_work(struct work_struct *w)
{
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
int ret;
ret = vfs_fsync(req->ns->file, 1);
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
}
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
}
static void nvmet_file_execute_discard(struct nvmet_req *req)
{
int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
struct nvme_dsm_range range;
loff_t offset;
loff_t len;
int i, ret;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
sizeof(range)))
break;
offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
ret = vfs_fallocate(req->ns->file, mode, offset, len);
if (ret)
break;
}
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
}
static void nvmet_file_dsm_work(struct work_struct *w)
{
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
nvmet_file_execute_discard(req);
return;
case NVME_DSMGMT_IDR:
case NVME_DSMGMT_IDW:
default:
/* Not supported yet */
nvmet_req_complete(req, 0);
return;
}
}
static void nvmet_file_execute_dsm(struct nvmet_req *req)
{
INIT_WORK(&req->f.work, nvmet_file_dsm_work);
schedule_work(&req->f.work);
}
static void nvmet_file_write_zeroes_work(struct work_struct *w)
{
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
loff_t offset;
loff_t len;
int ret;
offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
req->ns->blksize_shift);
ret = vfs_fallocate(req->ns->file, mode, offset, len);
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
}
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);
}
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_file_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_file_execute_flush;
req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_file_execute_dsm;
req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_file_execute_write_zeroes;
req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd for file ns %d on qid %d\n",
cmd->common.opcode, req->sq->qid);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
}
......@@ -174,7 +174,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
&queue->nvme_sq, &nvme_loop_ops))
return BLK_STS_OK;
if (blk_rq_payload_bytes(req)) {
if (blk_rq_nr_phys_segments(req)) {
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
......
......@@ -43,6 +43,7 @@ struct nvmet_ns {
struct list_head dev_link;
struct percpu_ref ref;
struct block_device *bdev;
struct file *file;
u32 nsid;
u32 blksize_shift;
loff_t size;
......@@ -57,6 +58,8 @@ struct nvmet_ns {
struct config_group group;
struct completion disable_done;
mempool_t *bvec_pool;
struct kmem_cache *bvec_cache;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
......@@ -222,8 +225,18 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
struct bio inline_bio;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
struct bio inline_bio;
} b;
struct {
bool mpool_alloc;
struct kiocb iocb;
struct bio_vec *bvec;
struct work_struct work;
} f;
};
int sg_cnt;
/* data length as parsed from the command: */
size_t data_len;
......@@ -263,7 +276,8 @@ struct nvmet_async_event {
};
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
u16 nvmet_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
......@@ -338,4 +352,14 @@ extern struct rw_semaphore nvmet_config_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);
int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
int nvmet_file_ns_enable(struct nvmet_ns *ns);
void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
void nvmet_file_ns_disable(struct nvmet_ns *ns);
static inline u32 nvmet_rw_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
#endif /* _NVMET_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment