Commit 56ae8866 authored by Nicolin Chen's avatar Nicolin Chen Committed by Will Deacon

iommu/arm-smmu-v3: Issue a batch of commands to the same cmdq

The driver calls in different places the arm_smmu_get_cmdq() helper, and
it's fine to do so since the helper always returns the single SMMU CMDQ.
However, with NVIDIA CMDQV extension or SMMU ECMDQ, there can be multiple
cmdqs in the system to select one from. And either case requires a batch
of commands to be issued to the same cmdq. Thus, a cmdq has to be decided
in the higher-level callers.

Add a cmdq pointer in arm_smmu_cmdq_batch structure, and decide the cmdq
when initializing the batch. Pass its pointer down to the bottom function.
Update __arm_smmu_cmdq_issue_cmd() accordingly for single command issuers.
Suggested-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Link: https://lore.kernel.org/r/2cbf5ddefb6ea611e48d67c642271bd24421eb21.1724970714.git.nicolinc@nvidia.comSigned-off-by: default avatarWill Deacon <will@kernel.org>
parent 2d42d3ba
...@@ -592,11 +592,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, ...@@ -592,11 +592,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
/* Wait for the command queue to become non-full */ /* Wait for the command queue to become non-full */
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq) struct arm_smmu_ll_queue *llq)
{ {
unsigned long flags; unsigned long flags;
struct arm_smmu_queue_poll qp; struct arm_smmu_queue_poll qp;
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
int ret = 0; int ret = 0;
/* /*
...@@ -627,11 +627,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, ...@@ -627,11 +627,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity. * Must be called with the cmdq lock held in some capacity.
*/ */
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq) struct arm_smmu_ll_queue *llq)
{ {
int ret = 0; int ret = 0;
struct arm_smmu_queue_poll qp; struct arm_smmu_queue_poll qp;
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp); queue_poll_init(smmu, &qp);
...@@ -651,10 +651,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, ...@@ -651,10 +651,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity. * Must be called with the cmdq lock held in some capacity.
*/ */
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq) struct arm_smmu_ll_queue *llq)
{ {
struct arm_smmu_queue_poll qp; struct arm_smmu_queue_poll qp;
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 prod = llq->prod; u32 prod = llq->prod;
int ret = 0; int ret = 0;
...@@ -701,12 +701,13 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, ...@@ -701,12 +701,13 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
} }
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq) struct arm_smmu_ll_queue *llq)
{ {
if (smmu->options & ARM_SMMU_OPT_MSIPOLL) if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
return __arm_smmu_cmdq_poll_until_msi(smmu, llq); return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
} }
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
...@@ -743,13 +744,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, ...@@ -743,13 +744,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
* CPU will appear before any of the commands from the other CPU. * CPU will appear before any of the commands from the other CPU.
*/ */
static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
u64 *cmds, int n, bool sync) u64 *cmds, int n, bool sync)
{ {
u64 cmd_sync[CMDQ_ENT_DWORDS]; u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod; u32 prod;
unsigned long flags; unsigned long flags;
bool owner; bool owner;
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
struct arm_smmu_ll_queue llq, head; struct arm_smmu_ll_queue llq, head;
int ret = 0; int ret = 0;
...@@ -763,7 +764,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, ...@@ -763,7 +764,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
while (!queue_has_space(&llq, n + sync)) { while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags); local_irq_restore(flags);
if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags); local_irq_save(flags);
} }
...@@ -839,7 +840,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, ...@@ -839,7 +840,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) { if (sync) {
llq.prod = queue_inc_prod_n(&llq, n); llq.prod = queue_inc_prod_n(&llq, n);
ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
if (ret) { if (ret) {
dev_err_ratelimited(smmu->dev, dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
...@@ -874,7 +875,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, ...@@ -874,7 +875,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
return -EINVAL; return -EINVAL;
} }
return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); return arm_smmu_cmdq_issue_cmdlist(
smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync);
} }
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
...@@ -889,6 +891,13 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, ...@@ -889,6 +891,13 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
} }
static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
cmds->num = 0;
cmds->cmdq = arm_smmu_get_cmdq(smmu);
}
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *cmd) struct arm_smmu_cmdq_ent *cmd)
...@@ -897,13 +906,15 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, ...@@ -897,13 +906,15 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num = 0; cmds->num, true);
arm_smmu_cmdq_batch_init(smmu, cmds);
} }
if (cmds->num == CMDQ_BATCH_ENTRIES) { if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num = 0; cmds->num, false);
arm_smmu_cmdq_batch_init(smmu, cmds);
} }
index = cmds->num * CMDQ_ENT_DWORDS; index = cmds->num * CMDQ_ENT_DWORDS;
...@@ -919,7 +930,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, ...@@ -919,7 +930,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds) struct arm_smmu_cmdq_batch *cmds)
{ {
return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, true);
} }
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused, static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
...@@ -1170,7 +1182,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, ...@@ -1170,7 +1182,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
}, },
}; };
cmds.num = 0; arm_smmu_cmdq_batch_init(smmu, &cmds);
for (i = 0; i < master->num_streams; i++) { for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id; cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
...@@ -2021,7 +2033,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, ...@@ -2021,7 +2033,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
cmds.num = 0; arm_smmu_cmdq_batch_init(master->smmu, &cmds);
for (i = 0; i < master->num_streams; i++) { for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id; cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
...@@ -2059,7 +2071,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, ...@@ -2059,7 +2071,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
if (!atomic_read(&smmu_domain->nr_ats_masters)) if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0; return 0;
cmds.num = 0; arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds);
spin_lock_irqsave(&smmu_domain->devices_lock, flags); spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices, list_for_each_entry(master_domain, &smmu_domain->devices,
...@@ -2141,7 +2153,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, ...@@ -2141,7 +2153,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
num_pages++; num_pages++;
} }
cmds.num = 0; arm_smmu_cmdq_batch_init(smmu, &cmds);
while (iova < end) { while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
......
...@@ -570,6 +570,7 @@ struct arm_smmu_cmdq { ...@@ -570,6 +570,7 @@ struct arm_smmu_cmdq {
struct arm_smmu_cmdq_batch { struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
struct arm_smmu_cmdq *cmdq;
int num; int num;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment