Commit 1caddfc5 authored by Martin K. Petersen's avatar Martin K. Petersen

Merge patch series "scsi: target: Allow userspace to config cmd submission"

Mike Christie <michael.christie@oracle.com> says:

The following patches were made over Linus's tree but apply over
Martin's branches. They allow userspace to configure how fabric
drivers submit cmds to backend drivers.

Right now loop and vhost use a worker thread, and the other drivers
submit from the contexts they receive/process the cmd from. For
multiple LUN cases where the target can queue more cmds than the
backend can handle then deferring to a worker thread is safest because
the backend driver can block when doing things like waiting for a free
request/tag. Deferring also helps when the target has to handle
transport level requests from the recv context.

For cases where the backend devices can queue everything the target
sends, then there is no need to defer to a workqueue and you can see a
perf boost of up to 26% for small IO workloads. For a nvme device and
vhost-scsi I can see with 4K IOs:

fio jobs        1       2       4       8       10
--------------------------------------------------
workqueue
submit        94K     190K    394K    770K    890K

direct
submit       128K     252K    488K    950K    -

Link: https://lore.kernel.org/r/1b1f7a5c-0988-45f9-b103-dfed2c0405b1@oracle.comSigned-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parents 9f4c887f 6dbc829d
......@@ -3867,6 +3867,9 @@ static const struct target_core_fabric_ops srpt_template = {
.tfc_discovery_attrs = srpt_da_attrs,
.tfc_wwn_attrs = srpt_wwn_attrs,
.tfc_tpg_attrib_attrs = srpt_tpg_attrib_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
/**
......
......@@ -1611,6 +1611,8 @@ static const struct target_core_fabric_ops efct_lio_ops = {
.sess_get_initiator_sid = NULL,
.tfc_tpg_base_attrs = efct_lio_tpg_attrs,
.tfc_tpg_attrib_attrs = efct_lio_tpg_attrib_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static const struct target_core_fabric_ops efct_lio_npiv_ops = {
......@@ -1646,6 +1648,9 @@ static const struct target_core_fabric_ops efct_lio_npiv_ops = {
.sess_get_initiator_sid = NULL,
.tfc_tpg_base_attrs = efct_lio_npiv_tpg_attrs,
.tfc_tpg_attrib_attrs = efct_lio_npiv_tpg_attrib_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
int efct_scsi_tgt_driver_init(void)
......
......@@ -3975,6 +3975,9 @@ static const struct target_core_fabric_ops ibmvscsis_ops = {
.fabric_drop_tpg = ibmvscsis_drop_tpg,
.tfc_wwn_attrs = ibmvscsis_wwn_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static void ibmvscsis_dev_release(struct device *dev) {};
......
......@@ -1822,6 +1822,9 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
.tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs,
.tfc_tpg_base_attrs = tcm_qla2xxx_tpg_attrs,
.tfc_tpg_attrib_attrs = tcm_qla2xxx_tpg_attrib_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
......@@ -1859,6 +1862,9 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
.fabric_init_nodeacl = tcm_qla2xxx_init_nodeacl,
.tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static int tcm_qla2xxx_register_configfs(void)
......
......@@ -1234,12 +1234,6 @@ int iscsit_setup_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
spin_lock_bh(&conn->cmd_lock);
list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list);
spin_unlock_bh(&conn->cmd_lock);
/*
* Check if we need to delay processing because of ALUA
* Active/NonOptimized primary access state..
*/
core_alua_check_nonop_delay(&cmd->se_cmd);
return 0;
}
EXPORT_SYMBOL(iscsit_setup_scsi_cmd);
......
......@@ -1589,5 +1589,8 @@ const struct target_core_fabric_ops iscsi_ops = {
.tfc_tpg_nacl_auth_attrs = lio_target_nacl_auth_attrs,
.tfc_tpg_nacl_param_attrs = lio_target_nacl_param_attrs,
.write_pending_must_be_called = true,
.write_pending_must_be_called = 1,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
......@@ -948,7 +948,7 @@ int iscsit_execute_cmd(struct iscsit_cmd *cmd, int ooo)
iscsit_set_unsolicited_dataout(cmd);
}
return transport_handle_cdb_direct(&cmd->se_cmd);
return target_submit(&cmd->se_cmd);
case ISCSI_OP_NOOP_OUT:
case ISCSI_OP_TEXT:
......
......@@ -318,7 +318,7 @@ static int iscsit_task_reassign_complete_read(
pr_debug("READ ITT: 0x%08x: t_state: %d never sent to"
" transport\n", cmd->init_task_tag,
cmd->se_cmd.t_state);
transport_handle_cdb_direct(se_cmd);
target_submit(se_cmd);
return 0;
}
......
......@@ -154,7 +154,7 @@ static void tcm_loop_target_queue_cmd(struct tcm_loop_cmd *tl_cmd)
GFP_ATOMIC))
return;
target_queue_submission(se_cmd);
target_submit(se_cmd);
return;
out_done:
......@@ -1102,6 +1102,8 @@ static const struct target_core_fabric_ops loop_ops = {
.tfc_wwn_attrs = tcm_loop_wwn_attrs,
.tfc_tpg_base_attrs = tcm_loop_tpg_attrs,
.tfc_tpg_attrib_attrs = tcm_loop_tpg_attrib_attrs,
.default_submit_type = TARGET_QUEUE_SUBMIT,
.direct_submit_supp = 0,
};
static int __init tcm_loop_fabric_init(void)
......
......@@ -2278,6 +2278,9 @@ static const struct target_core_fabric_ops sbp_ops = {
.tfc_wwn_attrs = sbp_wwn_attrs,
.tfc_tpg_base_attrs = sbp_tpg_base_attrs,
.tfc_tpg_attrib_attrs = sbp_tpg_attrib_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static int __init sbp_init(void)
......
......@@ -850,7 +850,6 @@ int core_alua_check_nonop_delay(
msleep_interruptible(cmd->alua_nonop_delay);
return 0;
}
EXPORT_SYMBOL(core_alua_check_nonop_delay);
static int core_alua_write_tpg_metadata(
const char *path,
......
......@@ -577,6 +577,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(unmap_granularity_alignment);
DEF_CONFIGFS_ATTRIB_SHOW(unmap_zeroes_data);
DEF_CONFIGFS_ATTRIB_SHOW(max_write_same_len);
DEF_CONFIGFS_ATTRIB_SHOW(emulate_rsoc);
DEF_CONFIGFS_ATTRIB_SHOW(submit_type);
#define DEF_CONFIGFS_ATTRIB_STORE_U32(_name) \
static ssize_t _name##_store(struct config_item *item, const char *page,\
......@@ -1231,6 +1232,24 @@ static ssize_t emulate_rsoc_store(struct config_item *item,
return count;
}
static ssize_t submit_type_store(struct config_item *item, const char *page,
size_t count)
{
struct se_dev_attrib *da = to_attrib(item);
int ret;
u8 val;
ret = kstrtou8(page, 0, &val);
if (ret < 0)
return ret;
if (val > TARGET_QUEUE_SUBMIT)
return -EINVAL;
da->submit_type = val;
return count;
}
CONFIGFS_ATTR(, emulate_model_alias);
CONFIGFS_ATTR(, emulate_dpo);
CONFIGFS_ATTR(, emulate_fua_write);
......@@ -1266,6 +1285,7 @@ CONFIGFS_ATTR(, unmap_zeroes_data);
CONFIGFS_ATTR(, max_write_same_len);
CONFIGFS_ATTR(, alua_support);
CONFIGFS_ATTR(, pgr_support);
CONFIGFS_ATTR(, submit_type);
/*
* dev_attrib attributes for devices using the target core SBC/SPC
......@@ -1308,6 +1328,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
&attr_alua_support,
&attr_pgr_support,
&attr_emulate_rsoc,
&attr_submit_type,
NULL,
};
EXPORT_SYMBOL(sbc_attrib_attrs);
......@@ -1325,6 +1346,7 @@ struct configfs_attribute *passthrough_attrib_attrs[] = {
&attr_emulate_pr,
&attr_alua_support,
&attr_pgr_support,
&attr_submit_type,
NULL,
};
EXPORT_SYMBOL(passthrough_attrib_attrs);
......
......@@ -779,6 +779,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
dev->dev_attrib.unmap_zeroes_data =
DA_UNMAP_ZEROES_DATA_DEFAULT;
dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN;
dev->dev_attrib.submit_type = TARGET_FABRIC_DEFAULT_SUBMIT;
xcopy_lun = &dev->xcopy_lun;
rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
......
......@@ -1065,8 +1065,32 @@ target_fabric_wwn_cmd_completion_affinity_store(struct config_item *item,
}
CONFIGFS_ATTR(target_fabric_wwn_, cmd_completion_affinity);
static ssize_t
target_fabric_wwn_default_submit_type_show(struct config_item *item,
char *page)
{
struct se_wwn *wwn = container_of(to_config_group(item), struct se_wwn,
param_group);
return sysfs_emit(page, "%u\n",
wwn->wwn_tf->tf_ops->default_submit_type);
}
CONFIGFS_ATTR_RO(target_fabric_wwn_, default_submit_type);
static ssize_t
target_fabric_wwn_direct_submit_supported_show(struct config_item *item,
char *page)
{
struct se_wwn *wwn = container_of(to_config_group(item), struct se_wwn,
param_group);
return sysfs_emit(page, "%u\n",
wwn->wwn_tf->tf_ops->direct_submit_supp);
}
CONFIGFS_ATTR_RO(target_fabric_wwn_, direct_submit_supported);
static struct configfs_attribute *target_fabric_wwn_param_attrs[] = {
&target_fabric_wwn_attr_cmd_completion_affinity,
&target_fabric_wwn_attr_default_submit_type,
&target_fabric_wwn_attr_direct_submit_supported,
NULL,
};
......
......@@ -1576,17 +1576,39 @@ target_cmd_parse_cdb(struct se_cmd *cmd)
}
EXPORT_SYMBOL(target_cmd_parse_cdb);
/*
* Used by fabric module frontends to queue tasks directly.
* May only be used from process context.
*/
int transport_handle_cdb_direct(
struct se_cmd *cmd)
static int __target_submit(struct se_cmd *cmd)
{
sense_reason_t ret;
might_sleep();
/*
* Check if we need to delay processing because of ALUA
* Active/NonOptimized primary access state..
*/
core_alua_check_nonop_delay(cmd);
if (cmd->t_data_nents != 0) {
/*
* This is primarily a hack for udev and tcm loop which sends
* INQUIRYs with a single page and expects the data to be
* cleared.
*/
if (!(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) &&
cmd->data_direction == DMA_FROM_DEVICE) {
struct scatterlist *sgl = cmd->t_data_sg;
unsigned char *buf = NULL;
BUG_ON(!sgl);
buf = kmap_local_page(sg_page(sgl));
if (buf) {
memset(buf + sgl->offset, 0, sgl->length);
kunmap_local(buf);
}
}
}
if (!cmd->se_lun) {
dump_stack();
pr_err("cmd->se_lun is NULL\n");
......@@ -1614,7 +1636,6 @@ int transport_handle_cdb_direct(
transport_generic_request_failure(cmd, ret);
return 0;
}
EXPORT_SYMBOL(transport_handle_cdb_direct);
sense_reason_t
transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
......@@ -1781,53 +1802,6 @@ int target_submit_prep(struct se_cmd *se_cmd, unsigned char *cdb,
}
EXPORT_SYMBOL_GPL(target_submit_prep);
/**
* target_submit - perform final initialization and submit cmd to LIO core
* @se_cmd: command descriptor to submit
*
* target_submit_prep must have been called on the cmd, and this must be
* called from process context.
*/
void target_submit(struct se_cmd *se_cmd)
{
struct scatterlist *sgl = se_cmd->t_data_sg;
unsigned char *buf = NULL;
might_sleep();
if (se_cmd->t_data_nents != 0) {
BUG_ON(!sgl);
/*
* A work-around for tcm_loop as some userspace code via
* scsi-generic do not memset their associated read buffers,
* so go ahead and do that here for type non-data CDBs. Also
* note that this is currently guaranteed to be a single SGL
* for this case by target core in target_setup_cmd_from_cdb()
* -> transport_generic_cmd_sequencer().
*/
if (!(se_cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) &&
se_cmd->data_direction == DMA_FROM_DEVICE) {
if (sgl)
buf = kmap(sg_page(sgl)) + sgl->offset;
if (buf) {
memset(buf, 0, sgl->length);
kunmap(sg_page(sgl));
}
}
}
/*
* Check if we need to delay processing because of ALUA
* Active/NonOptimized primary access state..
*/
core_alua_check_nonop_delay(se_cmd);
transport_handle_cdb_direct(se_cmd);
}
EXPORT_SYMBOL_GPL(target_submit);
/**
* target_submit_cmd - lookup unpacked lun and submit uninitialized se_cmd
*
......@@ -1923,7 +1897,7 @@ void target_queued_submit_work(struct work_struct *work)
se_plug = target_plug_device(se_dev);
}
target_submit(se_cmd);
__target_submit(se_cmd);
}
if (se_plug)
......@@ -1934,7 +1908,7 @@ void target_queued_submit_work(struct work_struct *work)
* target_queue_submission - queue the cmd to run on the LIO workqueue
* @se_cmd: command descriptor to submit
*/
void target_queue_submission(struct se_cmd *se_cmd)
static void target_queue_submission(struct se_cmd *se_cmd)
{
struct se_device *se_dev = se_cmd->se_dev;
int cpu = se_cmd->cpuid;
......@@ -1944,7 +1918,35 @@ void target_queue_submission(struct se_cmd *se_cmd)
llist_add(&se_cmd->se_cmd_list, &sq->cmd_list);
queue_work_on(cpu, target_submission_wq, &sq->work);
}
EXPORT_SYMBOL_GPL(target_queue_submission);
/**
* target_submit - perform final initialization and submit cmd to LIO core
* @cmd: command descriptor to submit
*
* target_submit_prep or something similar must have been called on the cmd,
* and this must be called from process context.
*/
int target_submit(struct se_cmd *se_cmd)
{
const struct target_core_fabric_ops *tfo = se_cmd->se_sess->se_tpg->se_tpg_tfo;
struct se_dev_attrib *da = &se_cmd->se_dev->dev_attrib;
u8 submit_type;
if (da->submit_type == TARGET_FABRIC_DEFAULT_SUBMIT)
submit_type = tfo->default_submit_type;
else if (da->submit_type == TARGET_DIRECT_SUBMIT &&
tfo->direct_submit_supp)
submit_type = TARGET_DIRECT_SUBMIT;
else
submit_type = TARGET_QUEUE_SUBMIT;
if (submit_type == TARGET_DIRECT_SUBMIT)
return __target_submit(se_cmd);
target_queue_submission(se_cmd);
return 0;
}
EXPORT_SYMBOL_GPL(target_submit);
static void target_complete_tmr_failure(struct work_struct *work)
{
......
......@@ -432,6 +432,9 @@ static const struct target_core_fabric_ops ft_fabric_ops = {
.tfc_wwn_attrs = ft_wwn_attrs,
.tfc_tpg_nacl_base_attrs = ft_nacl_base_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static struct notifier_block ft_notifier = {
......
......@@ -1687,6 +1687,9 @@ static const struct target_core_fabric_ops usbg_ops = {
.tfc_wwn_attrs = usbg_wwn_attrs,
.tfc_tpg_base_attrs = usbg_base_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
/* Start gadget.c code */
......
......@@ -909,7 +909,7 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd)
cmd->tvc_prot_sgl_count, GFP_KERNEL))
return;
target_queue_submission(se_cmd);
target_submit(se_cmd);
}
static void
......@@ -2598,6 +2598,9 @@ static const struct target_core_fabric_ops vhost_scsi_ops = {
.tfc_wwn_attrs = vhost_scsi_wwn_attrs,
.tfc_tpg_base_attrs = vhost_scsi_tpg_attrs,
.tfc_tpg_attrib_attrs = vhost_scsi_tpg_attrib_attrs,
.default_submit_type = TARGET_QUEUE_SUBMIT,
.direct_submit_supp = 1,
};
static int __init vhost_scsi_init(void)
......
......@@ -1832,6 +1832,9 @@ static const struct target_core_fabric_ops scsiback_ops = {
.tfc_wwn_attrs = scsiback_wwn_attrs,
.tfc_tpg_base_attrs = scsiback_tpg_attrs,
.tfc_tpg_param_attrs = scsiback_param_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
static const struct xenbus_device_id scsiback_ids[] = {
......
......@@ -108,6 +108,15 @@
#define SE_MODE_PAGE_BUF 512
#define SE_SENSE_BUF 96
enum target_submit_type {
/* Use the fabric driver's default submission type */
TARGET_FABRIC_DEFAULT_SUBMIT,
/* Submit from the calling context */
TARGET_DIRECT_SUBMIT,
/* Defer submission to the LIO workqueue */
TARGET_QUEUE_SUBMIT,
};
/* struct se_hba->hba_flags */
enum hba_flags_table {
HBA_FLAGS_INTERNAL_USE = 0x01,
......@@ -717,6 +726,7 @@ struct se_dev_attrib {
u32 unmap_granularity;
u32 unmap_granularity_alignment;
u32 max_write_same_len;
u8 submit_type;
struct se_device *da_dev;
struct config_group da_group;
};
......
......@@ -113,11 +113,20 @@ struct target_core_fabric_ops {
struct configfs_attribute **tfc_tpg_nacl_param_attrs;
/*
* Set this member variable to true if the SCSI transport protocol
* Set this member variable if the SCSI transport protocol
* (e.g. iSCSI) requires that the Data-Out buffer is transferred in
* its entirety before a command is aborted.
*/
bool write_pending_must_be_called;
unsigned int write_pending_must_be_called:1;
/*
* Set this if the driver supports submitting commands to the backend
* from target_submit/target_submit_cmd.
*/
unsigned int direct_submit_supp:1;
/*
* Set this to a target_submit_type value.
*/
u8 default_submit_type;
};
int target_register_template(const struct target_core_fabric_ops *fo);
......@@ -166,20 +175,18 @@ int target_submit_prep(struct se_cmd *se_cmd, unsigned char *cdb,
struct scatterlist *sgl, u32 sgl_count,
struct scatterlist *sgl_bidi, u32 sgl_bidi_count,
struct scatterlist *sgl_prot, u32 sgl_prot_count, gfp_t gfp);
void target_submit(struct se_cmd *se_cmd);
int target_submit(struct se_cmd *se_cmd);
sense_reason_t transport_lookup_cmd_lun(struct se_cmd *);
sense_reason_t target_cmd_init_cdb(struct se_cmd *se_cmd, unsigned char *cdb,
gfp_t gfp);
sense_reason_t target_cmd_parse_cdb(struct se_cmd *);
void target_submit_cmd(struct se_cmd *, struct se_session *, unsigned char *,
unsigned char *, u64, u32, int, int, int);
void target_queue_submission(struct se_cmd *se_cmd);
int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
unsigned char *sense, u64 unpacked_lun,
void *fabric_tmr_ptr, unsigned char tm_type,
gfp_t, u64, int);
int transport_handle_cdb_direct(struct se_cmd *);
sense_reason_t transport_generic_new_cmd(struct se_cmd *);
void target_put_cmd_and_wait(struct se_cmd *cmd);
......@@ -197,8 +204,6 @@ void target_stop_session(struct se_session *se_sess);
void target_wait_for_sess_cmds(struct se_session *);
void target_show_cmd(const char *pfx, struct se_cmd *cmd);
int core_alua_check_nonop_delay(struct se_cmd *);
int core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t);
void core_tmr_release_req(struct se_tmr_req *);
int transport_generic_handle_tmr(struct se_cmd *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment