Commit 5aadd5f9 authored by Arie Gershberg's avatar Arie Gershberg Committed by David S. Miller

nvme-tcp-offload: Add controller level implementation

In this patch we implement controller level functionality including:
- create_ctrl.
- delete_ctrl.
- free_ctrl.

The implementation is similar to other nvme fabrics modules, the main
difference being that the nvme-tcp-offload ULP calls the vendor specific
claim_dev() op with the given TCP/IP parameters to determine which device
will be used for this controller.
Once found, the vendor specific device and controller will be paired and
kept in a controller list managed by the ULP.
Acked-by: default avatarIgor Russkikh <irusskikh@marvell.com>
Signed-off-by: default avatarArie Gershberg <agershberg@marvell.com>
Signed-off-by: default avatarPrabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: default avatarOmkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: default avatarMichal Kalderon <mkalderon@marvell.com>
Signed-off-by: default avatarAriel Elior <aelior@marvell.com>
Signed-off-by: default avatarShai Malin <smalin@marvell.com>
Reviewed-by: default avatarHimanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4b8178ec
...@@ -12,6 +12,10 @@ ...@@ -12,6 +12,10 @@
static LIST_HEAD(nvme_tcp_ofld_devices); static LIST_HEAD(nvme_tcp_ofld_devices);
static DEFINE_MUTEX(nvme_tcp_ofld_devices_mutex); static DEFINE_MUTEX(nvme_tcp_ofld_devices_mutex);
static LIST_HEAD(nvme_tcp_ofld_ctrl_list);
static DEFINE_MUTEX(nvme_tcp_ofld_ctrl_mutex);
static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops;
static struct blk_mq_ops nvme_tcp_ofld_mq_ops;
static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl) static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)
{ {
...@@ -119,21 +123,439 @@ nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl) ...@@ -119,21 +123,439 @@ nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)
return dev; return dev;
} }
static struct blk_mq_tag_set *
nvme_tcp_ofld_alloc_tagset(struct nvme_ctrl *nctrl, bool admin)
{
struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
struct blk_mq_tag_set *set;
int rc;
if (admin) {
set = &ctrl->admin_tag_set;
memset(set, 0, sizeof(*set));
set->ops = &nvme_tcp_ofld_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = NVMF_RESERVED_TAGS;
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_ofld_req);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = NVME_ADMIN_TIMEOUT;
} else {
set = &ctrl->tag_set;
memset(set, 0, sizeof(*set));
set->ops = &nvme_tcp_ofld_mq_ops;
set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = NVMF_RESERVED_TAGS;
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_tcp_ofld_req);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
}
rc = blk_mq_alloc_tag_set(set);
if (rc)
return ERR_PTR(rc);
return set;
}
static int nvme_tcp_ofld_configure_admin_queue(struct nvme_ctrl *nctrl,
bool new)
{
int rc;
/* Placeholder - alloc_admin_queue */
if (new) {
nctrl->admin_tagset =
nvme_tcp_ofld_alloc_tagset(nctrl, true);
if (IS_ERR(nctrl->admin_tagset)) {
rc = PTR_ERR(nctrl->admin_tagset);
nctrl->admin_tagset = NULL;
goto out_destroy_queue;
}
nctrl->fabrics_q = blk_mq_init_queue(nctrl->admin_tagset);
if (IS_ERR(nctrl->fabrics_q)) {
rc = PTR_ERR(nctrl->fabrics_q);
nctrl->fabrics_q = NULL;
goto out_free_tagset;
}
nctrl->admin_q = blk_mq_init_queue(nctrl->admin_tagset);
if (IS_ERR(nctrl->admin_q)) {
rc = PTR_ERR(nctrl->admin_q);
nctrl->admin_q = NULL;
goto out_cleanup_fabrics_q;
}
}
/* Placeholder - nvme_tcp_ofld_start_queue */
rc = nvme_enable_ctrl(nctrl);
if (rc)
goto out_stop_queue;
blk_mq_unquiesce_queue(nctrl->admin_q);
rc = nvme_init_ctrl_finish(nctrl);
if (rc)
goto out_quiesce_queue;
return 0;
out_quiesce_queue:
blk_mq_quiesce_queue(nctrl->admin_q);
blk_sync_queue(nctrl->admin_q);
out_stop_queue:
/* Placeholder - stop offload queue */
nvme_cancel_admin_tagset(nctrl);
out_cleanup_fabrics_q:
if (new)
blk_cleanup_queue(nctrl->fabrics_q);
out_free_tagset:
if (new)
blk_mq_free_tag_set(nctrl->admin_tagset);
out_destroy_queue:
/* Placeholder - free admin queue */
return rc;
}
static int
nvme_tcp_ofld_configure_io_queues(struct nvme_ctrl *nctrl, bool new)
{
int rc;
/* Placeholder - alloc_io_queues */
if (new) {
nctrl->tagset = nvme_tcp_ofld_alloc_tagset(nctrl, false);
if (IS_ERR(nctrl->tagset)) {
rc = PTR_ERR(nctrl->tagset);
nctrl->tagset = NULL;
goto out_free_io_queues;
}
nctrl->connect_q = blk_mq_init_queue(nctrl->tagset);
if (IS_ERR(nctrl->connect_q)) {
rc = PTR_ERR(nctrl->connect_q);
nctrl->connect_q = NULL;
goto out_free_tag_set;
}
}
/* Placeholder - start_io_queues */
if (!new) {
nvme_start_queues(nctrl);
if (!nvme_wait_freeze_timeout(nctrl, NVME_IO_TIMEOUT)) {
/*
* If we timed out waiting for freeze we are likely to
* be stuck. Fail the controller initialization just
* to be safe.
*/
rc = -ENODEV;
goto out_wait_freeze_timed_out;
}
blk_mq_update_nr_hw_queues(nctrl->tagset, nctrl->queue_count - 1);
nvme_unfreeze(nctrl);
}
return 0;
out_wait_freeze_timed_out:
nvme_stop_queues(nctrl);
nvme_sync_io_queues(nctrl);
/* Placeholder - Stop IO queues */
if (new)
blk_cleanup_queue(nctrl->connect_q);
out_free_tag_set:
if (new)
blk_mq_free_tag_set(nctrl->tagset);
out_free_io_queues:
/* Placeholder - free_io_queues */
return rc;
}
static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
{
struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
struct nvmf_ctrl_options *opts = nctrl->opts;
int rc = 0;
rc = ctrl->dev->ops->setup_ctrl(ctrl);
if (rc)
return rc;
rc = nvme_tcp_ofld_configure_admin_queue(nctrl, new);
if (rc)
goto out_release_ctrl;
if (nctrl->icdoff) {
dev_err(nctrl->device, "icdoff is not supported!\n");
rc = -EINVAL;
goto destroy_admin;
}
if (!(nctrl->sgls & ((1 << 0) | (1 << 1)))) {
dev_err(nctrl->device, "Mandatory sgls are not supported!\n");
goto destroy_admin;
}
if (opts->queue_size > nctrl->sqsize + 1)
dev_warn(nctrl->device,
"queue_size %zu > ctrl sqsize %u, clamping down\n",
opts->queue_size, nctrl->sqsize + 1);
if (nctrl->sqsize + 1 > nctrl->maxcmd) {
dev_warn(nctrl->device,
"sqsize %u > ctrl maxcmd %u, clamping down\n",
nctrl->sqsize + 1, nctrl->maxcmd);
nctrl->sqsize = nctrl->maxcmd - 1;
}
if (nctrl->queue_count > 1) {
rc = nvme_tcp_ofld_configure_io_queues(nctrl, new);
if (rc)
goto destroy_admin;
}
if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_LIVE)) {
/*
* state change failure is ok if we started ctrl delete,
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
nctrl->state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
rc = -EINVAL;
goto destroy_io;
}
nvme_start_ctrl(nctrl);
return 0;
destroy_io:
/* Placeholder - stop and destroy io queues*/
destroy_admin:
/* Placeholder - stop and destroy admin queue*/
out_release_ctrl:
ctrl->dev->ops->release_ctrl(ctrl);
return rc;
}
static int
nvme_tcp_ofld_check_dev_opts(struct nvmf_ctrl_options *opts,
struct nvme_tcp_ofld_ops *ofld_ops)
{
unsigned int nvme_tcp_ofld_opt_mask = NVMF_ALLOWED_OPTS |
ofld_ops->allowed_opts | ofld_ops->required_opts;
struct nvmf_ctrl_options dev_opts_mask;
if (opts->mask & ~nvme_tcp_ofld_opt_mask) {
pr_warn("One or more nvmf options missing from ofld drvr %s.\n",
ofld_ops->name);
dev_opts_mask.mask = nvme_tcp_ofld_opt_mask;
return nvmf_check_required_opts(&dev_opts_mask, opts->mask);
}
return 0;
}
static void nvme_tcp_ofld_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
struct nvme_tcp_ofld_dev *dev = ctrl->dev;
if (list_empty(&ctrl->list))
goto free_ctrl;
ctrl->dev->ops->release_ctrl(ctrl);
mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
list_del(&ctrl->list);
mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
nvmf_free_options(nctrl->opts);
free_ctrl:
module_put(dev->ops->module);
kfree(ctrl->queues);
kfree(ctrl);
}
static void
nvme_tcp_ofld_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove)
{
/* Placeholder - teardown_admin_queue */
}
static void
nvme_tcp_ofld_teardown_io_queues(struct nvme_ctrl *nctrl, bool remove)
{
/* Placeholder - teardown_io_queues */
}
static void
nvme_tcp_ofld_teardown_ctrl(struct nvme_ctrl *nctrl, bool shutdown)
{
/* Placeholder - err_work and connect_work */
nvme_tcp_ofld_teardown_io_queues(nctrl, shutdown);
blk_mq_quiesce_queue(nctrl->admin_q);
if (shutdown)
nvme_shutdown_ctrl(nctrl);
else
nvme_disable_ctrl(nctrl);
nvme_tcp_ofld_teardown_admin_queue(nctrl, shutdown);
}
static void nvme_tcp_ofld_delete_ctrl(struct nvme_ctrl *nctrl)
{
nvme_tcp_ofld_teardown_ctrl(nctrl, true);
}
static int
nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
struct request *rq,
unsigned int hctx_idx,
unsigned int numa_node)
{
struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
/* Placeholder - init request */
req->done = nvme_tcp_ofld_req_done;
return 0;
}
static blk_status_t
nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
/* Call nvme_setup_cmd(...) */
/* Call ops->send_req(...) */
return BLK_STS_OK;
}
static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
.queue_rq = nvme_tcp_ofld_queue_rq,
.init_request = nvme_tcp_ofld_init_request,
/*
* All additional ops will be also implemented and registered similar to
* tcp.c
*/
};
static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops = {
.queue_rq = nvme_tcp_ofld_queue_rq,
.init_request = nvme_tcp_ofld_init_request,
/*
* All additional ops will be also implemented and registered similar to
* tcp.c
*/
};
static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
.name = "tcp_offload",
.module = THIS_MODULE,
.flags = NVME_F_FABRICS,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
.free_ctrl = nvme_tcp_ofld_free_ctrl,
.delete_ctrl = nvme_tcp_ofld_delete_ctrl,
.get_address = nvmf_get_address,
};
static bool
nvme_tcp_ofld_existing_controller(struct nvmf_ctrl_options *opts)
{
struct nvme_tcp_ofld_ctrl *ctrl;
bool found = false;
mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_tcp_ofld_ctrl_list, list) {
found = nvmf_ip_options_match(&ctrl->nctrl, opts);
if (found)
break;
}
mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
return found;
}
static struct nvme_ctrl * static struct nvme_ctrl *
nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts) nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
{ {
struct nvme_tcp_ofld_queue *queue;
struct nvme_tcp_ofld_ctrl *ctrl; struct nvme_tcp_ofld_ctrl *ctrl;
struct nvme_tcp_ofld_dev *dev; struct nvme_tcp_ofld_dev *dev;
struct nvme_ctrl *nctrl; struct nvme_ctrl *nctrl;
int rc = 0; int i, rc = 0;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl) if (!ctrl)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&ctrl->list);
nctrl = &ctrl->nctrl; nctrl = &ctrl->nctrl;
nctrl->opts = opts;
nctrl->queue_count = opts->nr_io_queues + opts->nr_write_queues +
opts->nr_poll_queues + 1;
nctrl->sqsize = opts->queue_size - 1;
nctrl->kato = opts->kato;
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid =
kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
if (!opts->trsvcid) {
rc = -ENOMEM;
goto out_free_ctrl;
}
opts->mask |= NVMF_OPT_TRSVCID;
}
rc = inet_pton_with_scope(&init_net, AF_UNSPEC, opts->traddr,
opts->trsvcid,
&ctrl->conn_params.remote_ip_addr);
if (rc) {
pr_err("malformed address passed: %s:%s\n",
opts->traddr, opts->trsvcid);
goto out_free_ctrl;
}
if (opts->mask & NVMF_OPT_HOST_TRADDR) {
rc = inet_pton_with_scope(&init_net, AF_UNSPEC,
opts->host_traddr, NULL,
&ctrl->conn_params.local_ip_addr);
if (rc) {
pr_err("malformed src address passed: %s\n",
opts->host_traddr);
goto out_free_ctrl;
}
}
/* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */ if (!opts->duplicate_connect &&
nvme_tcp_ofld_existing_controller(opts)) {
rc = -EALREADY;
goto out_free_ctrl;
}
/* Find device that can reach the dest addr */ /* Find device that can reach the dest addr */
dev = nvme_tcp_ofld_lookup_dev(ctrl); dev = nvme_tcp_ofld_lookup_dev(ctrl);
...@@ -151,6 +573,10 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts) ...@@ -151,6 +573,10 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
goto out_free_ctrl; goto out_free_ctrl;
} }
rc = nvme_tcp_ofld_check_dev_opts(opts, dev->ops);
if (rc)
goto out_module_put;
ctrl->dev = dev; ctrl->dev = dev;
if (ctrl->dev->ops->max_hw_sectors) if (ctrl->dev->ops->max_hw_sectors)
...@@ -158,14 +584,51 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts) ...@@ -158,14 +584,51 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
if (ctrl->dev->ops->max_segments) if (ctrl->dev->ops->max_segments)
nctrl->max_segments = ctrl->dev->ops->max_segments; nctrl->max_segments = ctrl->dev->ops->max_segments;
/* Init queues */ ctrl->queues = kcalloc(nctrl->queue_count,
sizeof(struct nvme_tcp_ofld_queue),
GFP_KERNEL);
if (!ctrl->queues) {
rc = -ENOMEM;
goto out_module_put;
}
/* Call nvme_init_ctrl */ for (i = 0; i < nctrl->queue_count; ++i) {
queue = &ctrl->queues[i];
queue->ctrl = ctrl;
queue->dev = dev;
queue->report_err = nvme_tcp_ofld_report_queue_err;
}
rc = nvme_init_ctrl(nctrl, ndev, &nvme_tcp_ofld_ctrl_ops, 0);
if (rc)
goto out_free_queues;
if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
WARN_ON_ONCE(1);
rc = -EINTR;
goto out_uninit_ctrl;
}
rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);
if (rc)
goto out_uninit_ctrl;
/* Setup ctrl */ dev_info(nctrl->device, "new ctrl: NQN \"%s\", addr %pISp\n",
opts->subsysnqn, &ctrl->conn_params.remote_ip_addr);
mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ofld_ctrl_list);
mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
return nctrl; return nctrl;
out_uninit_ctrl:
nvme_uninit_ctrl(nctrl);
nvme_put_ctrl(nctrl);
out_free_queues:
kfree(ctrl->queues);
out_module_put:
module_put(dev->ops->module);
out_free_ctrl: out_free_ctrl:
kfree(ctrl); kfree(ctrl);
...@@ -193,7 +656,15 @@ static int __init nvme_tcp_ofld_init_module(void) ...@@ -193,7 +656,15 @@ static int __init nvme_tcp_ofld_init_module(void)
static void __exit nvme_tcp_ofld_cleanup_module(void) static void __exit nvme_tcp_ofld_cleanup_module(void)
{ {
struct nvme_tcp_ofld_ctrl *ctrl;
nvmf_unregister_transport(&nvme_tcp_ofld_transport); nvmf_unregister_transport(&nvme_tcp_ofld_transport);
mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_tcp_ofld_ctrl_list, list)
nvme_delete_ctrl(&ctrl->nctrl);
mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
flush_workqueue(nvme_delete_wq);
} }
module_init(nvme_tcp_ofld_init_module); module_init(nvme_tcp_ofld_init_module);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment