Commit eec91e22 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iommu-updates-v6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux

Pull iommu updates from Joerg Roedel:
 "Core changes:
   - Allow ATS on VF when parent device is identity mapped
   - Optimize unmap path on ARM io-pagetable implementation
   - Use of_property_present()

  ARM-SMMU changes:
   - SMMUv2:
       - Devicetree binding updates for Qualcomm MMU-500 implementations
       - Extend workarounds for broken Qualcomm hypervisor to avoid
         touching features that are not available (e.g. 16KiB page
         support, reserved context banks)
   - SMMUv3:
       - Support for NVIDIA's custom virtual command queue hardware
       - Fix Stage-2 stall configuration and extend tests to cover this
         area
       - A bunch of driver cleanups, including simplification of the
         master rbtree code
   - Minor cleanups and fixes across both drivers

  Intel VT-d changes:
   - Retire si_domain and convert to use static identity domain
   - Batched IOTLB/dev-IOTLB invalidation
   - Small code refactoring and cleanups

  AMD-Vi changes:
   - Cleanup and refactoring of io-pagetable code
   - Add parameter to limit the used io-pagesizes
   - Other cleanups and fixes"

* tag 'iommu-updates-v6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux: (77 commits)
  dt-bindings: arm-smmu: Add compatible for QCS8300 SoC
  iommu/amd: Test for PAGING domains before freeing a domain
  iommu/amd: Fix argument order in amd_iommu_dev_flush_pasid_all()
  iommu/amd: Add kernel parameters to limit V1 page-sizes
  iommu/arm-smmu-v3: Reorganize struct arm_smmu_ctx_desc_cfg
  iommu/arm-smmu-v3: Add types for each level of the CD table
  iommu/arm-smmu-v3: Shrink the cdtab l1_desc array
  iommu/arm-smmu-v3: Do not use devm for the cd table allocations
  iommu/arm-smmu-v3: Remove strtab_base/cfg
  iommu/arm-smmu-v3: Reorganize struct arm_smmu_strtab_cfg
  iommu/arm-smmu-v3: Add types for each level of the 2 level stream table
  iommu/arm-smmu-v3: Add arm_smmu_strtab_l1/2_idx()
  iommu/arm-smmu-qcom: apply num_context_bank fixes for SDM630 / SDM660
  iommu/arm-smmu-v3: Use the new rb tree helpers
  dt-bindings: arm-smmu: document the support on SA8255p
  iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent
  iommu/tegra241-cmdqv: Drop static at local variable
  iommu/tegra241-cmdqv: Fix ioremap() error handling in probe()
  iommu/amd: Do not set the D bit on AMD v2 table entries
  iommu/amd: Correct the reported page sizes from the V1 table
  ...
parents c27ea952 97162f60
...@@ -339,6 +339,11 @@ ...@@ -339,6 +339,11 @@
pgtbl_v1 - Use v1 page table for DMA-API (Default). pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API. pgtbl_v2 - Use v2 page table for DMA-API.
irtcachedis - Disable Interrupt Remapping Table (IRT) caching. irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
nohugepages - Limit page-sizes used for v1 page-tables
to 4 KiB.
v2_pgsizes_only - Limit page-sizes used for v1 page-tables
to 4KiB/2Mib/1GiB.
amd_iommu_dump= [HW,X86-64] amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table Enable AMD IOMMU driver option to dump the ACPI table
......
...@@ -36,7 +36,9 @@ properties: ...@@ -36,7 +36,9 @@ properties:
items: items:
- enum: - enum:
- qcom,qcm2290-smmu-500 - qcom,qcm2290-smmu-500
- qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500 - qcom,qdu1000-smmu-500
- qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500 - qcom,sa8775p-smmu-500
- qcom,sc7180-smmu-500 - qcom,sc7180-smmu-500
- qcom,sc7280-smmu-500 - qcom,sc7280-smmu-500
...@@ -84,6 +86,7 @@ properties: ...@@ -84,6 +86,7 @@ properties:
items: items:
- enum: - enum:
- qcom,qcm2290-smmu-500 - qcom,qcm2290-smmu-500
- qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500 - qcom,sa8775p-smmu-500
- qcom,sc7280-smmu-500 - qcom,sc7280-smmu-500
- qcom,sc8180x-smmu-500 - qcom,sc8180x-smmu-500
...@@ -552,7 +555,9 @@ allOf: ...@@ -552,7 +555,9 @@ allOf:
- cavium,smmu-v2 - cavium,smmu-v2
- marvell,ap806-smmu-500 - marvell,ap806-smmu-500
- nvidia,smmu-500 - nvidia,smmu-500
- qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500 - qcom,qdu1000-smmu-500
- qcom,sa8255p-smmu-500
- qcom,sc7180-smmu-500 - qcom,sc7180-smmu-500
- qcom,sdm670-smmu-500 - qcom,sdm670-smmu-500
- qcom,sdm845-smmu-500 - qcom,sdm845-smmu-500
......
...@@ -22650,6 +22650,7 @@ M: Thierry Reding <thierry.reding@gmail.com> ...@@ -22650,6 +22650,7 @@ M: Thierry Reding <thierry.reding@gmail.com>
R: Krishna Reddy <vdumpa@nvidia.com> R: Krishna Reddy <vdumpa@nvidia.com>
L: linux-tegra@vger.kernel.org L: linux-tegra@vger.kernel.org
S: Supported S: Supported
F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
F: drivers/iommu/tegra* F: drivers/iommu/tegra*
......
...@@ -424,6 +424,17 @@ config ARM_SMMU_V3_KUNIT_TEST ...@@ -424,6 +424,17 @@ config ARM_SMMU_V3_KUNIT_TEST
Enable this option to unit-test arm-smmu-v3 driver functions. Enable this option to unit-test arm-smmu-v3 driver functions.
If unsure, say N. If unsure, say N.
config TEGRA241_CMDQV
bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
depends on ACPI
help
Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
support, except with virtualization capabilities.
Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
CMDQ-V extension.
endif endif
config S390_IOMMU config S390_IOMMU
......
...@@ -43,9 +43,10 @@ int amd_iommu_enable_faulting(unsigned int cpu); ...@@ -43,9 +43,10 @@ int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir; extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable; extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level; extern int amd_iommu_gpt_level;
extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */ /* Protection domain ops */
struct protection_domain *protection_domain_alloc(unsigned int type); struct protection_domain *protection_domain_alloc(unsigned int type, int nid);
void protection_domain_free(struct protection_domain *domain); void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm); struct mm_struct *mm);
...@@ -87,14 +88,10 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); ...@@ -87,14 +88,10 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain);
void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set);
void amd_iommu_domain_flush_complete(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain, void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size); u64 address, size_t size);
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
ioasid_t pasid, u64 address, size_t size); ioasid_t pasid, u64 address, size_t size);
void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
ioasid_t pasid);
#ifdef CONFIG_IRQ_REMAP #ifdef CONFIG_IRQ_REMAP
int amd_iommu_create_irq_domain(struct amd_iommu *iommu); int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
...@@ -121,11 +118,6 @@ static inline bool check_feature2(u64 mask) ...@@ -121,11 +118,6 @@ static inline bool check_feature2(u64 mask)
return (amd_iommu_efr2 & mask); return (amd_iommu_efr2 & mask);
} }
static inline int check_feature_gpt_level(void)
{
return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
}
static inline bool amd_iommu_gt_ppr_supported(void) static inline bool amd_iommu_gt_ppr_supported(void)
{ {
return (check_feature(FEATURE_GT) && return (check_feature(FEATURE_GT) &&
...@@ -143,19 +135,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) ...@@ -143,19 +135,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr)); return phys_to_virt(__sme_clr(paddr));
} }
static inline
void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
{
domain->iop.root = (u64 *)(root & PAGE_MASK);
domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
}
static inline
void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
{
amd_iommu_domain_set_pt_root(domain, 0);
}
static inline int get_pci_sbdf_id(struct pci_dev *pdev) static inline int get_pci_sbdf_id(struct pci_dev *pdev)
{ {
int seg = pci_domain_nr(pdev->bus); int seg = pci_domain_nr(pdev->bus);
...@@ -185,7 +164,6 @@ static inline struct protection_domain *to_pdomain(struct iommu_domain *dom) ...@@ -185,7 +164,6 @@ static inline struct protection_domain *to_pdomain(struct iommu_domain *dom)
} }
bool translation_pre_enabled(struct amd_iommu *iommu); bool translation_pre_enabled(struct amd_iommu *iommu);
bool amd_iommu_is_attach_deferred(struct device *dev);
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
#ifdef CONFIG_DMI #ifdef CONFIG_DMI
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#ifndef _ASM_X86_AMD_IOMMU_TYPES_H #ifndef _ASM_X86_AMD_IOMMU_TYPES_H
#define _ASM_X86_AMD_IOMMU_TYPES_H #define _ASM_X86_AMD_IOMMU_TYPES_H
#include <linux/bitfield.h>
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>
...@@ -95,26 +96,21 @@ ...@@ -95,26 +96,21 @@
#define FEATURE_GA BIT_ULL(7) #define FEATURE_GA BIT_ULL(7)
#define FEATURE_HE BIT_ULL(8) #define FEATURE_HE BIT_ULL(8)
#define FEATURE_PC BIT_ULL(9) #define FEATURE_PC BIT_ULL(9)
#define FEATURE_GATS_SHIFT (12) #define FEATURE_GATS GENMASK_ULL(13, 12)
#define FEATURE_GATS_MASK (3ULL) #define FEATURE_GLX GENMASK_ULL(15, 14)
#define FEATURE_GAM_VAPIC BIT_ULL(21) #define FEATURE_GAM_VAPIC BIT_ULL(21)
#define FEATURE_PASMAX GENMASK_ULL(36, 32)
#define FEATURE_GIOSUP BIT_ULL(48) #define FEATURE_GIOSUP BIT_ULL(48)
#define FEATURE_HASUP BIT_ULL(49) #define FEATURE_HASUP BIT_ULL(49)
#define FEATURE_EPHSUP BIT_ULL(50) #define FEATURE_EPHSUP BIT_ULL(50)
#define FEATURE_HDSUP BIT_ULL(52) #define FEATURE_HDSUP BIT_ULL(52)
#define FEATURE_SNP BIT_ULL(63) #define FEATURE_SNP BIT_ULL(63)
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
#define FEATURE_GLXVAL_SHIFT 14
#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT)
/* Extended Feature 2 Bits */ /* Extended Feature 2 Bits */
#define FEATURE_SNPAVICSUP_SHIFT 5 #define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5)
#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT)
#define FEATURE_SNPAVICSUP_GAM(x) \ #define FEATURE_SNPAVICSUP_GAM(x) \
((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1) (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
/* Note: /* Note:
* The current driver only support 16-bit PASID. * The current driver only support 16-bit PASID.
...@@ -294,8 +290,13 @@ ...@@ -294,8 +290,13 @@
* that we support. * that we support.
* *
* 512GB Pages are not supported due to a hardware bug * 512GB Pages are not supported due to a hardware bug
* Page sizes >= the 52 bit max physical address of the CPU are not supported.
*/ */
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) #define AMD_IOMMU_PGSIZES (GENMASK_ULL(51, 12) ^ SZ_512G)
/* Special mode where page-sizes are limited to 4 KiB */
#define AMD_IOMMU_PGSIZES_4K (PAGE_SIZE)
/* 4K, 2MB, 1G page sizes are supported */ /* 4K, 2MB, 1G page sizes are supported */
#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) #define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30))
...@@ -419,10 +420,6 @@ ...@@ -419,10 +420,6 @@
#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) #define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL)
#define DTE_GCR3_INDEX_A 0
#define DTE_GCR3_INDEX_B 1
#define DTE_GCR3_INDEX_C 1
#define DTE_GCR3_SHIFT_A 58 #define DTE_GCR3_SHIFT_A 58
#define DTE_GCR3_SHIFT_B 16 #define DTE_GCR3_SHIFT_B 16
#define DTE_GCR3_SHIFT_C 43 #define DTE_GCR3_SHIFT_C 43
...@@ -527,7 +524,7 @@ struct amd_irte_ops; ...@@ -527,7 +524,7 @@ struct amd_irte_ops;
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
#define io_pgtable_to_data(x) \ #define io_pgtable_to_data(x) \
container_of((x), struct amd_io_pgtable, iop) container_of((x), struct amd_io_pgtable, pgtbl)
#define io_pgtable_ops_to_data(x) \ #define io_pgtable_ops_to_data(x) \
io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
...@@ -537,7 +534,7 @@ struct amd_irte_ops; ...@@ -537,7 +534,7 @@ struct amd_irte_ops;
struct protection_domain, iop) struct protection_domain, iop)
#define io_pgtable_cfg_to_data(x) \ #define io_pgtable_cfg_to_data(x) \
container_of((x), struct amd_io_pgtable, pgtbl_cfg) container_of((x), struct amd_io_pgtable, pgtbl.cfg)
struct gcr3_tbl_info { struct gcr3_tbl_info {
u64 *gcr3_tbl; /* Guest CR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */
...@@ -547,8 +544,7 @@ struct gcr3_tbl_info { ...@@ -547,8 +544,7 @@ struct gcr3_tbl_info {
}; };
struct amd_io_pgtable { struct amd_io_pgtable {
struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable pgtbl;
struct io_pgtable iop;
int mode; int mode;
u64 *root; u64 *root;
u64 *pgd; /* v2 pgtable pgd pointer */ u64 *pgd; /* v2 pgtable pgd pointer */
...@@ -580,7 +576,6 @@ struct protection_domain { ...@@ -580,7 +576,6 @@ struct protection_domain {
struct amd_io_pgtable iop; struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/ spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */ u16 id; /* the domain id written to the device table */
int nid; /* Node ID */
enum protection_domain_mode pd_mode; /* Track page table type */ enum protection_domain_mode pd_mode; /* Track page table type */
bool dirty_tracking; /* dirty tracking is enabled in the domain */ bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_cnt; /* devices assigned to this domain */
......
...@@ -192,6 +192,8 @@ bool amdr_ivrs_remap_support __read_mostly; ...@@ -192,6 +192,8 @@ bool amdr_ivrs_remap_support __read_mostly;
bool amd_iommu_force_isolation __read_mostly; bool amd_iommu_force_isolation __read_mostly;
unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
/* /*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use. * to know which ones are already in use.
...@@ -2042,14 +2044,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) ...@@ -2042,14 +2044,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
int glxval; int glxval;
u64 pasmax; u64 pasmax;
pasmax = amd_iommu_efr & FEATURE_PASID_MASK; pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr);
pasmax >>= FEATURE_PASID_SHIFT;
iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr);
glxval >>= FEATURE_GLXVAL_SHIFT;
if (amd_iommu_max_glx_val == -1) if (amd_iommu_max_glx_val == -1)
amd_iommu_max_glx_val = glxval; amd_iommu_max_glx_val = glxval;
...@@ -3088,7 +3088,7 @@ static int __init early_amd_iommu_init(void) ...@@ -3088,7 +3088,7 @@ static int __init early_amd_iommu_init(void)
/* 5 level guest page table */ /* 5 level guest page table */
if (cpu_feature_enabled(X86_FEATURE_LA57) && if (cpu_feature_enabled(X86_FEATURE_LA57) &&
check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
/* Disable any previously enabled IOMMUs */ /* Disable any previously enabled IOMMUs */
...@@ -3494,6 +3494,12 @@ static int __init parse_amd_iommu_options(char *str) ...@@ -3494,6 +3494,12 @@ static int __init parse_amd_iommu_options(char *str)
amd_iommu_pgtable = AMD_IOMMU_V2; amd_iommu_pgtable = AMD_IOMMU_V2;
} else if (strncmp(str, "irtcachedis", 11) == 0) { } else if (strncmp(str, "irtcachedis", 11) == 0) {
amd_iommu_irtcachedis = true; amd_iommu_irtcachedis = true;
} else if (strncmp(str, "nohugepages", 11) == 0) {
pr_info("Restricting V1 page-sizes to 4KiB");
amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K;
} else if (strncmp(str, "v2_pgsizes_only", 15) == 0) {
pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB");
amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
} else { } else {
pr_notice("Unknown option - '%s'\n", str); pr_notice("Unknown option - '%s'\n", str);
} }
......
...@@ -24,27 +24,6 @@ ...@@ -24,27 +24,6 @@
#include "amd_iommu.h" #include "amd_iommu.h"
#include "../iommu-pages.h" #include "../iommu-pages.h"
static void v1_tlb_flush_all(void *cookie)
{
}
static void v1_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
}
static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
}
static const struct iommu_flush_ops v1_flush_ops = {
.tlb_flush_all = v1_tlb_flush_all,
.tlb_flush_walk = v1_tlb_flush_walk,
.tlb_add_page = v1_tlb_add_page,
};
/* /*
* Helper function to get the first pte of a large mapping * Helper function to get the first pte of a large mapping
*/ */
...@@ -132,56 +111,40 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) ...@@ -132,56 +111,40 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
} }
} }
void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode)
{
u64 pt_root;
/* lowest 3 bits encode pgtable mode */
pt_root = mode & 7;
pt_root |= (u64)root;
amd_iommu_domain_set_pt_root(domain, pt_root);
}
/* /*
* This function is used to add another level to an IO page table. Adding * This function is used to add another level to an IO page table. Adding
* another level increases the size of the address space by 9 bits to a size up * another level increases the size of the address space by 9 bits to a size up
* to 64 bits. * to 64 bits.
*/ */
static bool increase_address_space(struct protection_domain *domain, static bool increase_address_space(struct amd_io_pgtable *pgtable,
unsigned long address, unsigned long address,
gfp_t gfp) gfp_t gfp)
{ {
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
struct protection_domain *domain =
container_of(pgtable, struct protection_domain, iop);
unsigned long flags; unsigned long flags;
bool ret = true; bool ret = true;
u64 *pte; u64 *pte;
pte = iommu_alloc_page_node(domain->nid, gfp); pte = iommu_alloc_page_node(cfg->amd.nid, gfp);
if (!pte) if (!pte)
return false; return false;
spin_lock_irqsave(&domain->lock, flags); spin_lock_irqsave(&domain->lock, flags);
if (address <= PM_LEVEL_SIZE(domain->iop.mode)) if (address <= PM_LEVEL_SIZE(pgtable->mode))
goto out; goto out;
ret = false; ret = false;
if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL))
goto out; goto out;
*pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
domain->iop.root = pte; pgtable->root = pte;
domain->iop.mode += 1; pgtable->mode += 1;
amd_iommu_update_and_flush_device_table(domain); amd_iommu_update_and_flush_device_table(domain);
amd_iommu_domain_flush_complete(domain);
/*
* Device Table needs to be updated and flushed before the new root can
* be published.
*/
amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
pte = NULL; pte = NULL;
ret = true; ret = true;
...@@ -193,30 +156,31 @@ static bool increase_address_space(struct protection_domain *domain, ...@@ -193,30 +156,31 @@ static bool increase_address_space(struct protection_domain *domain,
return ret; return ret;
} }
static u64 *alloc_pte(struct protection_domain *domain, static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
unsigned long address, unsigned long address,
unsigned long page_size, unsigned long page_size,
u64 **pte_page, u64 **pte_page,
gfp_t gfp, gfp_t gfp,
bool *updated) bool *updated)
{ {
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
int level, end_lvl; int level, end_lvl;
u64 *pte, *page; u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size)); BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->iop.mode)) { while (address > PM_LEVEL_SIZE(pgtable->mode)) {
/* /*
* Return an error if there is no memory to update the * Return an error if there is no memory to update the
* page-table. * page-table.
*/ */
if (!increase_address_space(domain, address, gfp)) if (!increase_address_space(pgtable, address, gfp))
return NULL; return NULL;
} }
level = domain->iop.mode - 1; level = pgtable->mode - 1;
pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
address = PAGE_SIZE_ALIGN(address, page_size); address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size); end_lvl = PAGE_SIZE_LEVEL(page_size);
...@@ -251,7 +215,7 @@ static u64 *alloc_pte(struct protection_domain *domain, ...@@ -251,7 +215,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
if (!IOMMU_PTE_PRESENT(__pte) || if (!IOMMU_PTE_PRESENT(__pte) ||
pte_level == PAGE_MODE_NONE) { pte_level == PAGE_MODE_NONE) {
page = iommu_alloc_page_node(domain->nid, gfp); page = iommu_alloc_page_node(cfg->amd.nid, gfp);
if (!page) if (!page)
return NULL; return NULL;
...@@ -365,7 +329,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -365,7 +329,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount, phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped) int prot, gfp_t gfp, size_t *mapped)
{ {
struct protection_domain *dom = io_pgtable_ops_to_domain(ops); struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
LIST_HEAD(freelist); LIST_HEAD(freelist);
bool updated = false; bool updated = false;
u64 __pte, *pte; u64 __pte, *pte;
...@@ -382,7 +346,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -382,7 +346,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
while (pgcount > 0) { while (pgcount > 0) {
count = PAGE_SIZE_PTE_COUNT(pgsize); count = PAGE_SIZE_PTE_COUNT(pgsize);
pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated); pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated);
ret = -ENOMEM; ret = -ENOMEM;
if (!pte) if (!pte)
...@@ -419,6 +383,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -419,6 +383,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
out: out:
if (updated) { if (updated) {
struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&dom->lock, flags); spin_lock_irqsave(&dom->lock, flags);
...@@ -560,27 +525,17 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, ...@@ -560,27 +525,17 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
*/ */
static void v1_free_pgtable(struct io_pgtable *iop) static void v1_free_pgtable(struct io_pgtable *iop)
{ {
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
struct protection_domain *dom;
LIST_HEAD(freelist); LIST_HEAD(freelist);
if (pgtable->mode == PAGE_MODE_NONE) if (pgtable->mode == PAGE_MODE_NONE)
return; return;
dom = container_of(pgtable, struct protection_domain, iop);
/* Page-table is not visible to IOMMU anymore, so free it */ /* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE || BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
pgtable->mode > PAGE_MODE_6_LEVEL); pgtable->mode > PAGE_MODE_6_LEVEL);
free_sub_pt(pgtable->root, pgtable->mode, &freelist); free_sub_pt(pgtable->root, pgtable->mode, &freelist);
/* Update data structure */
amd_iommu_domain_clr_pt_root(dom);
/* Make changes visible to IOMMUs */
amd_iommu_domain_update(dom);
iommu_put_pages_list(&freelist); iommu_put_pages_list(&freelist);
} }
...@@ -588,17 +543,21 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo ...@@ -588,17 +543,21 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
{ {
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES; pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
if (!pgtable->root)
return NULL;
pgtable->mode = PAGE_MODE_3_LEVEL;
cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
cfg->tlb = &v1_flush_ops;
pgtable->iop.ops.map_pages = iommu_v1_map_pages; pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages;
pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages; pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys;
pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
return &pgtable->iop; return &pgtable->pgtbl;
} }
struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
......
...@@ -51,7 +51,7 @@ static inline u64 set_pgtable_attr(u64 *page) ...@@ -51,7 +51,7 @@ static inline u64 set_pgtable_attr(u64 *page)
u64 prot; u64 prot;
prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; prot |= IOMMU_PAGE_ACCESS;
return (iommu_virt_to_phys(page) | prot); return (iommu_virt_to_phys(page) | prot);
} }
...@@ -233,8 +233,8 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -233,8 +233,8 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount, phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped) int prot, gfp_t gfp, size_t *mapped)
{ {
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
u64 *pte; u64 *pte;
unsigned long map_size; unsigned long map_size;
unsigned long mapped_size = 0; unsigned long mapped_size = 0;
...@@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
while (mapped_size < size) { while (mapped_size < size) {
map_size = get_alloc_page_size(pgsize); map_size = get_alloc_page_size(pgsize);
pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd, pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
iova, map_size, gfp, &updated); iova, map_size, gfp, &updated);
if (!pte) { if (!pte) {
ret = -EINVAL; ret = -EINVAL;
...@@ -266,8 +266,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, ...@@ -266,8 +266,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
} }
out: out:
if (updated) if (updated) {
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
amd_iommu_domain_flush_pages(pdom, o_iova, size); amd_iommu_domain_flush_pages(pdom, o_iova, size);
}
if (mapped) if (mapped)
*mapped += mapped_size; *mapped += mapped_size;
...@@ -281,7 +284,7 @@ static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, ...@@ -281,7 +284,7 @@ static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
struct iommu_iotlb_gather *gather) struct iommu_iotlb_gather *gather)
{ {
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
unsigned long unmap_size; unsigned long unmap_size;
unsigned long unmapped = 0; unsigned long unmapped = 0;
size_t size = pgcount << __ffs(pgsize); size_t size = pgcount << __ffs(pgsize);
...@@ -323,30 +326,9 @@ static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo ...@@ -323,30 +326,9 @@ static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
/* /*
* ---------------------------------------------------- * ----------------------------------------------------
*/ */
static void v2_tlb_flush_all(void *cookie)
{
}
static void v2_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
}
static void v2_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
}
static const struct iommu_flush_ops v2_flush_ops = {
.tlb_flush_all = v2_tlb_flush_all,
.tlb_flush_walk = v2_tlb_flush_walk,
.tlb_add_page = v2_tlb_add_page,
};
static void v2_free_pgtable(struct io_pgtable *iop) static void v2_free_pgtable(struct io_pgtable *iop)
{ {
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
if (!pgtable || !pgtable->pgd) if (!pgtable || !pgtable->pgd)
return; return;
...@@ -359,26 +341,24 @@ static void v2_free_pgtable(struct io_pgtable *iop) ...@@ -359,26 +341,24 @@ static void v2_free_pgtable(struct io_pgtable *iop)
static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{ {
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
struct protection_domain *pdom = (struct protection_domain *)cookie;
int ias = IOMMU_IN_ADDR_BIT_SIZE; int ias = IOMMU_IN_ADDR_BIT_SIZE;
pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC); pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
if (!pgtable->pgd) if (!pgtable->pgd)
return NULL; return NULL;
if (get_pgtable_level() == PAGE_MODE_5_LEVEL) if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
ias = 57; ias = 57;
pgtable->iop.ops.map_pages = iommu_v2_map_pages; pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages;
pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages;
pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys;
cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
cfg->ias = ias, cfg->ias = ias;
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
cfg->tlb = &v2_flush_ops;
return &pgtable->iop; return &pgtable->pgtbl;
} }
struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
......
This diff is collapsed.
...@@ -181,7 +181,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, ...@@ -181,7 +181,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct protection_domain *pdom; struct protection_domain *pdom;
int ret; int ret;
pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA); pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev));
if (!pdom) if (!pdom)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -2,5 +2,6 @@ ...@@ -2,5 +2,6 @@
obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
arm_smmu_v3-y := arm-smmu-v3.o arm_smmu_v3-y := arm-smmu-v3.o
arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o
obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
...@@ -30,6 +30,11 @@ static struct mm_struct sva_mm = { ...@@ -30,6 +30,11 @@ static struct mm_struct sva_mm = {
.pgd = (void *)0xdaedbeefdeadbeefULL, .pgd = (void *)0xdaedbeefdeadbeefULL,
}; };
enum arm_smmu_test_master_feat {
ARM_SMMU_MASTER_TEST_ATS = BIT(0),
ARM_SMMU_MASTER_TEST_STALL = BIT(1),
};
static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry, static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry,
const __le64 *used_bits, const __le64 *used_bits,
const __le64 *target, const __le64 *target,
...@@ -164,16 +169,22 @@ static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; ...@@ -164,16 +169,22 @@ static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0;
static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste,
unsigned int s1dss, unsigned int s1dss,
const dma_addr_t dma_addr) const dma_addr_t dma_addr,
enum arm_smmu_test_master_feat feat)
{ {
bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS;
bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL;
struct arm_smmu_master master = { struct arm_smmu_master master = {
.ats_enabled = ats_enabled,
.cd_table.cdtab_dma = dma_addr, .cd_table.cdtab_dma = dma_addr,
.cd_table.s1cdmax = 0xFF, .cd_table.s1cdmax = 0xFF,
.cd_table.s1fmt = STRTAB_STE_0_S1FMT_64K_L2, .cd_table.s1fmt = STRTAB_STE_0_S1FMT_64K_L2,
.smmu = &smmu, .smmu = &smmu,
.stall_enabled = stall_enabled,
}; };
arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); arm_smmu_make_cdtable_ste(ste, &master, ats_enabled, s1dss);
} }
static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test)
...@@ -204,7 +215,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) ...@@ -204,7 +215,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test)
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste,
NUM_EXPECTED_SYNCS(2)); NUM_EXPECTED_SYNCS(2));
} }
...@@ -214,7 +225,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) ...@@ -214,7 +225,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test)
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste,
NUM_EXPECTED_SYNCS(2)); NUM_EXPECTED_SYNCS(2));
} }
...@@ -224,7 +235,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) ...@@ -224,7 +235,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test)
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste,
NUM_EXPECTED_SYNCS(3)); NUM_EXPECTED_SYNCS(3));
} }
...@@ -234,7 +245,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) ...@@ -234,7 +245,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test)
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste,
NUM_EXPECTED_SYNCS(3)); NUM_EXPECTED_SYNCS(3));
} }
...@@ -245,9 +256,9 @@ static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) ...@@ -245,9 +256,9 @@ static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test)
struct arm_smmu_ste s1dss_bypass; struct arm_smmu_ste s1dss_bypass;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
/* /*
* Flipping s1dss on a CD table STE only involves changes to the second * Flipping s1dss on a CD table STE only involves changes to the second
...@@ -265,7 +276,7 @@ arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) ...@@ -265,7 +276,7 @@ arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test)
struct arm_smmu_ste s1dss_bypass; struct arm_smmu_ste s1dss_bypass;
arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition( arm_smmu_v3_test_ste_expect_hitless_transition(
test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2));
} }
...@@ -276,16 +287,20 @@ arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) ...@@ -276,16 +287,20 @@ arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test)
struct arm_smmu_ste s1dss_bypass; struct arm_smmu_ste s1dss_bypass;
arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition( arm_smmu_v3_test_ste_expect_hitless_transition(
test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2));
} }
static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste,
bool ats_enabled) enum arm_smmu_test_master_feat feat)
{ {
bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS;
bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL;
struct arm_smmu_master master = { struct arm_smmu_master master = {
.ats_enabled = ats_enabled,
.smmu = &smmu, .smmu = &smmu,
.stall_enabled = stall_enabled,
}; };
struct io_pgtable io_pgtable = {}; struct io_pgtable io_pgtable = {};
struct arm_smmu_domain smmu_domain = { struct arm_smmu_domain smmu_domain = {
...@@ -308,7 +323,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) ...@@ -308,7 +323,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test)
{ {
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_s2_ste(&ste, true); arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste,
NUM_EXPECTED_SYNCS(2)); NUM_EXPECTED_SYNCS(2));
} }
...@@ -317,7 +332,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_s2(struct kunit *test) ...@@ -317,7 +332,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_s2(struct kunit *test)
{ {
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_s2_ste(&ste, true); arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste,
NUM_EXPECTED_SYNCS(2)); NUM_EXPECTED_SYNCS(2));
} }
...@@ -326,7 +341,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_bypass(struct kunit *test) ...@@ -326,7 +341,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_bypass(struct kunit *test)
{ {
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_s2_ste(&ste, true); arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste,
NUM_EXPECTED_SYNCS(2)); NUM_EXPECTED_SYNCS(2));
} }
...@@ -335,7 +350,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) ...@@ -335,7 +350,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test)
{ {
struct arm_smmu_ste ste; struct arm_smmu_ste ste;
arm_smmu_test_make_s2_ste(&ste, true); arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste,
NUM_EXPECTED_SYNCS(2)); NUM_EXPECTED_SYNCS(2));
} }
...@@ -346,8 +361,8 @@ static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) ...@@ -346,8 +361,8 @@ static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test)
struct arm_smmu_ste s2_ste; struct arm_smmu_ste s2_ste;
arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_test_make_s2_ste(&s2_ste, true); arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste,
NUM_EXPECTED_SYNCS(3)); NUM_EXPECTED_SYNCS(3));
} }
...@@ -358,8 +373,8 @@ static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) ...@@ -358,8 +373,8 @@ static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test)
struct arm_smmu_ste s2_ste; struct arm_smmu_ste s2_ste;
arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_test_make_s2_ste(&s2_ste, true); arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste,
NUM_EXPECTED_SYNCS(3)); NUM_EXPECTED_SYNCS(3));
} }
...@@ -375,9 +390,9 @@ static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) ...@@ -375,9 +390,9 @@ static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test)
* s1 dss field in the same update. * s1 dss field in the same update.
*/ */
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr); fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS,
0x4B4B4b4B4B); 0x4B4B4b4B4B, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_non_hitless_transition( arm_smmu_v3_test_ste_expect_non_hitless_transition(
test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3));
} }
...@@ -503,6 +518,30 @@ static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd, ...@@ -503,6 +518,30 @@ static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd,
arm_smmu_make_sva_cd(cd, &master, NULL, asid); arm_smmu_make_sva_cd(cd, &master, NULL, asid);
} }
static void arm_smmu_v3_write_ste_test_s1_to_s2_stall(struct kunit *test)
{
struct arm_smmu_ste s1_ste;
struct arm_smmu_ste s2_ste;
arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL);
arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste,
NUM_EXPECTED_SYNCS(3));
}
static void arm_smmu_v3_write_ste_test_s2_to_s1_stall(struct kunit *test)
{
struct arm_smmu_ste s1_ste;
struct arm_smmu_ste s2_ste;
arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL);
arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste,
NUM_EXPECTED_SYNCS(3));
}
static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test) static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test)
{ {
struct arm_smmu_cd cd = {}; struct arm_smmu_cd cd = {};
...@@ -547,6 +586,8 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { ...@@ -547,6 +586,8 @@ static struct kunit_case arm_smmu_v3_test_cases[] = {
KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless),
KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear),
KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid),
KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2_stall),
KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1_stall),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release),
{}, {},
......
This diff is collapsed.
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/sizes.h> #include <linux/sizes.h>
struct arm_smmu_device;
/* MMIO registers */ /* MMIO registers */
#define ARM_SMMU_IDR0 0x0 #define ARM_SMMU_IDR0 0x0
#define IDR0_ST_LVL GENMASK(28, 27) #define IDR0_ST_LVL GENMASK(28, 27)
...@@ -202,10 +204,8 @@ ...@@ -202,10 +204,8 @@
* 2lvl: 128k L1 entries, * 2lvl: 128k L1 entries,
* 256 lazy entries per table (each table covers a PCI bus) * 256 lazy entries per table (each table covers a PCI bus)
*/ */
#define STRTAB_L1_SZ_SHIFT 20
#define STRTAB_SPLIT 8 #define STRTAB_SPLIT 8
#define STRTAB_L1_DESC_DWORDS 1
#define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0) #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
#define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6) #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
...@@ -215,6 +215,26 @@ struct arm_smmu_ste { ...@@ -215,6 +215,26 @@ struct arm_smmu_ste {
__le64 data[STRTAB_STE_DWORDS]; __le64 data[STRTAB_STE_DWORDS];
}; };
#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT)
struct arm_smmu_strtab_l2 {
struct arm_smmu_ste stes[STRTAB_NUM_L2_STES];
};
struct arm_smmu_strtab_l1 {
__le64 l2ptr;
};
#define STRTAB_MAX_L1_ENTRIES (1 << 17)
static inline u32 arm_smmu_strtab_l1_idx(u32 sid)
{
return sid / STRTAB_NUM_L2_STES;
}
static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
{
return sid % STRTAB_NUM_L2_STES;
}
#define STRTAB_STE_0_V (1UL << 0) #define STRTAB_STE_0_V (1UL << 0)
#define STRTAB_STE_0_CFG GENMASK_ULL(3, 1) #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
#define STRTAB_STE_0_CFG_ABORT 0 #define STRTAB_STE_0_CFG_ABORT 0
...@@ -267,6 +287,7 @@ struct arm_smmu_ste { ...@@ -267,6 +287,7 @@ struct arm_smmu_ste {
#define STRTAB_STE_2_S2AA64 (1UL << 51) #define STRTAB_STE_2_S2AA64 (1UL << 51)
#define STRTAB_STE_2_S2ENDI (1UL << 52) #define STRTAB_STE_2_S2ENDI (1UL << 52)
#define STRTAB_STE_2_S2PTW (1UL << 54) #define STRTAB_STE_2_S2PTW (1UL << 54)
#define STRTAB_STE_2_S2S (1UL << 57)
#define STRTAB_STE_2_S2R (1UL << 58) #define STRTAB_STE_2_S2R (1UL << 58)
#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
...@@ -280,7 +301,6 @@ struct arm_smmu_ste { ...@@ -280,7 +301,6 @@ struct arm_smmu_ste {
*/ */
#define CTXDESC_L2_ENTRIES 1024 #define CTXDESC_L2_ENTRIES 1024
#define CTXDESC_L1_DESC_DWORDS 1
#define CTXDESC_L1_DESC_V (1UL << 0) #define CTXDESC_L1_DESC_V (1UL << 0)
#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12) #define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
...@@ -290,6 +310,24 @@ struct arm_smmu_cd { ...@@ -290,6 +310,24 @@ struct arm_smmu_cd {
__le64 data[CTXDESC_CD_DWORDS]; __le64 data[CTXDESC_CD_DWORDS];
}; };
struct arm_smmu_cdtab_l2 {
struct arm_smmu_cd cds[CTXDESC_L2_ENTRIES];
};
struct arm_smmu_cdtab_l1 {
__le64 l2ptr;
};
static inline unsigned int arm_smmu_cdtab_l1_idx(unsigned int ssid)
{
return ssid / CTXDESC_L2_ENTRIES;
}
static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
{
return ssid % CTXDESC_L2_ENTRIES;
}
#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
#define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
...@@ -320,7 +358,7 @@ struct arm_smmu_cd { ...@@ -320,7 +358,7 @@ struct arm_smmu_cd {
* When the SMMU only supports linear context descriptor tables, pick a * When the SMMU only supports linear context descriptor tables, pick a
* reasonable size limit (64kB). * reasonable size limit (64kB).
*/ */
#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) #define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / sizeof(struct arm_smmu_cd))
/* Command queue */ /* Command queue */
#define CMDQ_ENT_SZ_SHIFT 4 #define CMDQ_ENT_SZ_SHIFT 4
...@@ -566,10 +604,18 @@ struct arm_smmu_cmdq { ...@@ -566,10 +604,18 @@ struct arm_smmu_cmdq {
atomic_long_t *valid_map; atomic_long_t *valid_map;
atomic_t owner_prod; atomic_t owner_prod;
atomic_t lock; atomic_t lock;
bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent);
}; };
static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq,
struct arm_smmu_cmdq_ent *ent)
{
return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true;
}
struct arm_smmu_cmdq_batch { struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
struct arm_smmu_cmdq *cmdq;
int num; int num;
}; };
...@@ -584,24 +630,23 @@ struct arm_smmu_priq { ...@@ -584,24 +630,23 @@ struct arm_smmu_priq {
}; };
/* High-level stream table and context descriptor structures */ /* High-level stream table and context descriptor structures */
struct arm_smmu_strtab_l1_desc {
struct arm_smmu_ste *l2ptr;
};
struct arm_smmu_ctx_desc { struct arm_smmu_ctx_desc {
u16 asid; u16 asid;
}; };
struct arm_smmu_l1_ctx_desc {
struct arm_smmu_cd *l2ptr;
dma_addr_t l2ptr_dma;
};
struct arm_smmu_ctx_desc_cfg { struct arm_smmu_ctx_desc_cfg {
__le64 *cdtab; union {
dma_addr_t cdtab_dma; struct {
struct arm_smmu_l1_ctx_desc *l1_desc; struct arm_smmu_cd *table;
unsigned int num_ents;
} linear;
struct {
struct arm_smmu_cdtab_l1 *l1tab;
struct arm_smmu_cdtab_l2 **l2ptrs;
unsigned int num_l1_ents; unsigned int num_l1_ents;
} l2;
};
dma_addr_t cdtab_dma;
unsigned int used_ssids; unsigned int used_ssids;
u8 in_ste; u8 in_ste;
u8 s1fmt; u8 s1fmt;
...@@ -609,6 +654,12 @@ struct arm_smmu_ctx_desc_cfg { ...@@ -609,6 +654,12 @@ struct arm_smmu_ctx_desc_cfg {
u8 s1cdmax; u8 s1cdmax;
}; };
static inline bool
arm_smmu_cdtab_allocated(struct arm_smmu_ctx_desc_cfg *cfg)
{
return cfg->linear.table || cfg->l2.l1tab;
}
/* True if the cd table has SSIDS > 0 in use. */ /* True if the cd table has SSIDS > 0 in use. */
static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table) static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table)
{ {
...@@ -620,18 +671,35 @@ struct arm_smmu_s2_cfg { ...@@ -620,18 +671,35 @@ struct arm_smmu_s2_cfg {
}; };
struct arm_smmu_strtab_cfg { struct arm_smmu_strtab_cfg {
__le64 *strtab; union {
dma_addr_t strtab_dma; struct {
struct arm_smmu_strtab_l1_desc *l1_desc; struct arm_smmu_ste *table;
dma_addr_t ste_dma;
unsigned int num_ents;
} linear;
struct {
struct arm_smmu_strtab_l1 *l1tab;
struct arm_smmu_strtab_l2 **l2ptrs;
dma_addr_t l1_dma;
unsigned int num_l1_ents; unsigned int num_l1_ents;
} l2;
};
};
u64 strtab_base; struct arm_smmu_impl_ops {
u32 strtab_base_cfg; int (*device_reset)(struct arm_smmu_device *smmu);
void (*device_remove)(struct arm_smmu_device *smmu);
int (*init_structures)(struct arm_smmu_device *smmu);
struct arm_smmu_cmdq *(*get_secondary_cmdq)(
struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
}; };
/* An SMMUv3 instance */ /* An SMMUv3 instance */
struct arm_smmu_device { struct arm_smmu_device {
struct device *dev; struct device *dev;
struct device *impl_dev;
const struct arm_smmu_impl_ops *impl_ops;
void __iomem *base; void __iomem *base;
void __iomem *page1; void __iomem *page1;
...@@ -664,6 +732,7 @@ struct arm_smmu_device { ...@@ -664,6 +732,7 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
#define ARM_SMMU_OPT_MSIPOLL (1 << 2) #define ARM_SMMU_OPT_MSIPOLL (1 << 2)
#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
#define ARM_SMMU_OPT_TEGRA241_CMDQV (1 << 4)
u32 options; u32 options;
struct arm_smmu_cmdq cmdq; struct arm_smmu_cmdq cmdq;
...@@ -815,6 +884,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, ...@@ -815,6 +884,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size); unsigned long iova, size_t size);
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq);
int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
struct arm_smmu_queue *q, void __iomem *page,
unsigned long prod_off, unsigned long cons_off,
size_t dwords, const char *name);
int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq);
#ifdef CONFIG_ARM_SMMU_V3_SVA #ifdef CONFIG_ARM_SMMU_V3_SVA
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
...@@ -860,10 +938,15 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {} ...@@ -860,10 +938,15 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {}
#define arm_smmu_sva_domain_alloc NULL #define arm_smmu_sva_domain_alloc NULL
static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, #endif /* CONFIG_ARM_SMMU_V3_SVA */
struct device *dev,
ioasid_t id) #ifdef CONFIG_TEGRA241_CMDQV
struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu);
#else /* CONFIG_TEGRA241_CMDQV */
static inline struct arm_smmu_device *
tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
{ {
return ERR_PTR(-ENODEV);
} }
#endif /* CONFIG_ARM_SMMU_V3_SVA */ #endif /* CONFIG_TEGRA241_CMDQV */
#endif /* _ARM_SMMU_V3_H */ #endif /* _ARM_SMMU_V3_H */
This diff is collapsed.
...@@ -282,6 +282,20 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) ...@@ -282,6 +282,20 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
u32 smr; u32 smr;
int i; int i;
/*
* MSM8998 LPASS SMMU reports 13 context banks, but accessing
* the last context bank crashes the system.
*/
if (of_device_is_compatible(smmu->dev->of_node, "qcom,msm8998-smmu-v2") &&
smmu->num_context_banks == 13) {
smmu->num_context_banks = 12;
} else if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2")) {
if (smmu->num_context_banks == 21) /* SDM630 / SDM660 A2NOC SMMU */
smmu->num_context_banks = 7;
else if (smmu->num_context_banks == 14) /* SDM630 / SDM660 LPASS SMMU */
smmu->num_context_banks = 13;
}
/* /*
* Some platforms support more than the Arm SMMU architected maximum of * Some platforms support more than the Arm SMMU architected maximum of
* 128 stream matching groups. For unknown reasons, the additional * 128 stream matching groups. For unknown reasons, the additional
...@@ -338,6 +352,19 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) ...@@ -338,6 +352,19 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
return 0; return 0;
} }
static int qcom_adreno_smmuv2_cfg_probe(struct arm_smmu_device *smmu)
{
/* Support for 16K pages is advertised on some SoCs, but it doesn't seem to work */
smmu->features &= ~ARM_SMMU_FEAT_FMT_AARCH64_16K;
/* TZ protects several last context banks, hide them from Linux */
if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2") &&
smmu->num_context_banks == 5)
smmu->num_context_banks = 2;
return 0;
}
static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{ {
struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
...@@ -436,6 +463,7 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = { ...@@ -436,6 +463,7 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = {
static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
.init_context = qcom_adreno_smmu_init_context, .init_context = qcom_adreno_smmu_init_context,
.cfg_probe = qcom_adreno_smmuv2_cfg_probe,
.def_domain_type = qcom_smmu_def_domain_type, .def_domain_type = qcom_smmu_def_domain_type,
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr, .write_sctlr = qcom_adreno_smmu_write_sctlr,
......
...@@ -417,7 +417,7 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, ...@@ -417,7 +417,7 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
const struct arm_smmu_context_fault_info *cfi) const struct arm_smmu_context_fault_info *cfi)
{ {
dev_dbg(smmu->dev, dev_err(smmu->dev,
"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx);
......
...@@ -416,14 +416,12 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev) ...@@ -416,14 +416,12 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev)
static struct iommu_device *fsl_pamu_probe_device(struct device *dev) static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{ {
int len;
/* /*
* uboot must fill the fsl,liodn for platform devices to be supported by * uboot must fill the fsl,liodn for platform devices to be supported by
* the iommu. * the iommu.
*/ */
if (!dev_is_pci(dev) && if (!dev_is_pci(dev) &&
!of_get_property(dev->of_node, "fsl,liodn", &len)) !of_property_present(dev->of_node, "fsl,liodn"))
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
return &pamu_iommu; return &pamu_iommu;
......
This diff is collapsed.
...@@ -1204,9 +1204,7 @@ static void free_iommu(struct intel_iommu *iommu) ...@@ -1204,9 +1204,7 @@ static void free_iommu(struct intel_iommu *iommu)
*/ */
static inline void reclaim_free_desc(struct q_inval *qi) static inline void reclaim_free_desc(struct q_inval *qi)
{ {
while (qi->desc_status[qi->free_tail] == QI_DONE || while (qi->desc_status[qi->free_tail] == QI_FREE && qi->free_tail != qi->free_head) {
qi->desc_status[qi->free_tail] == QI_ABORT) {
qi->desc_status[qi->free_tail] = QI_FREE;
qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
qi->free_cnt++; qi->free_cnt++;
} }
...@@ -1463,8 +1461,16 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, ...@@ -1463,8 +1461,16 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
raw_spin_lock(&qi->q_lock); raw_spin_lock(&qi->q_lock);
} }
for (i = 0; i < count; i++) /*
qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE; * The reclaim code can free descriptors from multiple submissions
* starting from the tail of the queue. When count == 0, the
* status of the standalone wait descriptor at the tail of the queue
* must be set to QI_FREE to allow the reclaim code to proceed.
* It is also possible that descriptors from one of the previous
* submissions has to be reclaimed by a subsequent submission.
*/
for (i = 0; i <= count; i++)
qi->desc_status[(index + i) % QI_LENGTH] = QI_FREE;
reclaim_free_desc(qi); reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags); raw_spin_unlock_irqrestore(&qi->q_lock, flags);
...@@ -1520,24 +1526,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, ...@@ -1520,24 +1526,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type) unsigned int size_order, u64 type)
{ {
u8 dw = 0, dr = 0;
struct qi_desc desc; struct qi_desc desc;
int ih = 0;
if (cap_write_drain(iommu->cap))
dw = 1;
if (cap_read_drain(iommu->cap))
dr = 1;
desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
| QI_IOTLB_AM(size_order);
desc.qw2 = 0;
desc.qw3 = 0;
qi_desc_iotlb(iommu, did, addr, size_order, type, &desc);
qi_submit_sync(iommu, &desc, 1, 0); qi_submit_sync(iommu, &desc, 1, 0);
} }
...@@ -1555,20 +1546,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, ...@@ -1555,20 +1546,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE)) if (!(iommu->gcmd & DMA_GCMD_TE))
return; return;
if (mask) { qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc);
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
} else
desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
if (qdep >= QI_DEV_IOTLB_MAX_INVS)
qdep = 0;
desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
desc.qw2 = 0;
desc.qw3 = 0;
qi_submit_sync(iommu, &desc, 1, 0); qi_submit_sync(iommu, &desc, 1, 0);
} }
...@@ -1588,28 +1566,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, ...@@ -1588,28 +1566,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
return; return;
} }
if (npages == -1) { qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);
desc.qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
QI_EIOTLB_TYPE;
desc.qw1 = 0;
} else {
int mask = ilog2(__roundup_pow_of_two(npages));
unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
addr = ALIGN_DOWN(addr, align);
desc.qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
QI_EIOTLB_TYPE;
desc.qw1 = QI_EIOTLB_ADDR(addr) |
QI_EIOTLB_IH(ih) |
QI_EIOTLB_AM(mask);
}
qi_submit_sync(iommu, &desc, 1, 0); qi_submit_sync(iommu, &desc, 1, 0);
} }
...@@ -1617,7 +1574,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, ...@@ -1617,7 +1574,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr, unsigned int size_order) u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
{ {
unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
/* /*
...@@ -1629,40 +1585,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, ...@@ -1629,40 +1585,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE)) if (!(iommu->gcmd & DMA_GCMD_TE))
return; return;
desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | qi_desc_dev_iotlb_pasid(sid, pfsid, pasid,
QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | qdep, addr, size_order,
QI_DEV_IOTLB_PFSID(pfsid); &desc);
/*
* If S bit is 0, we only flush a single page. If S bit is set,
* The least significant zero bit indicates the invalidation address
* range. VT-d spec 6.5.2.6.
* e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
* size order = 0 is PAGE_SIZE 4KB
* Max Invs Pending (MIP) is set to 0 for now until we have DIT in
* ECAP.
*/
if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
addr, size_order);
/* Take page address */
desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
if (size_order) {
/*
* Existing 0s in address below size_order may be the least
* significant bit, we must set them to 1s to avoid having
* smaller size than desired.
*/
desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
VTD_PAGE_SHIFT);
/* Clear size_order bit to indicate size */
desc.qw1 &= ~mask;
/* Set the S bit to indicate flushing more than 1 page */
desc.qw1 |= QI_DEV_EIOTLB_SIZE;
}
qi_submit_sync(iommu, &desc, 1, 0); qi_submit_sync(iommu, &desc, 1, 0);
} }
......
This diff is collapsed.
...@@ -584,11 +584,23 @@ struct iommu_domain_info { ...@@ -584,11 +584,23 @@ struct iommu_domain_info {
* to VT-d spec, section 9.3 */ * to VT-d spec, section 9.3 */
}; };
/*
* We start simply by using a fixed size for the batched descriptors. This
* size is currently sufficient for our needs. Future improvements could
* involve dynamically allocating the batch buffer based on actual demand,
* allowing us to adjust the batch size for optimal performance in different
* scenarios.
*/
#define QI_MAX_BATCHED_DESC_COUNT 16
struct qi_batch {
struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT];
unsigned int index;
};
struct dmar_domain { struct dmar_domain {
int nid; /* node id */ int nid; /* node id */
struct xarray iommu_array; /* Attached IOMMU array */ struct xarray iommu_array; /* Attached IOMMU array */
u8 has_iotlb_device: 1;
u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 iommu_coherency: 1; /* indicate coherency of iommu access */
u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */
u8 set_pte_snp:1; u8 set_pte_snp:1;
...@@ -609,6 +621,7 @@ struct dmar_domain { ...@@ -609,6 +621,7 @@ struct dmar_domain {
spinlock_t cache_lock; /* Protect the cache tag list */ spinlock_t cache_lock; /* Protect the cache tag list */
struct list_head cache_tags; /* Cache tag list */ struct list_head cache_tags; /* Cache tag list */
struct qi_batch *qi_batch; /* Batched QI descriptors */
int iommu_superpage;/* Level of superpages supported: int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB, 0 == 4KiB (no superpages), 1 == 2MiB,
...@@ -1067,6 +1080,115 @@ static inline unsigned long nrpages_to_size(unsigned long npages) ...@@ -1067,6 +1080,115 @@ static inline unsigned long nrpages_to_size(unsigned long npages)
return npages << VTD_PAGE_SHIFT; return npages << VTD_PAGE_SHIFT;
} }
static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type,
struct qi_desc *desc)
{
u8 dw = 0, dr = 0;
int ih = 0;
if (cap_write_drain(iommu->cap))
dw = 1;
if (cap_read_drain(iommu->cap))
dr = 1;
desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
| QI_IOTLB_AM(size_order);
desc->qw2 = 0;
desc->qw3 = 0;
}
static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr,
unsigned int mask, struct qi_desc *desc)
{
if (mask) {
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
desc->qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
} else {
desc->qw1 = QI_DEV_IOTLB_ADDR(addr);
}
if (qdep >= QI_DEV_IOTLB_MAX_INVS)
qdep = 0;
desc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
desc->qw2 = 0;
desc->qw3 = 0;
}
static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr,
unsigned long npages, bool ih,
struct qi_desc *desc)
{
if (npages == -1) {
desc->qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
QI_EIOTLB_TYPE;
desc->qw1 = 0;
} else {
int mask = ilog2(__roundup_pow_of_two(npages));
unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
addr = ALIGN_DOWN(addr, align);
desc->qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
QI_EIOTLB_TYPE;
desc->qw1 = QI_EIOTLB_ADDR(addr) |
QI_EIOTLB_IH(ih) |
QI_EIOTLB_AM(mask);
}
}
static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid,
u16 qdep, u64 addr,
unsigned int size_order,
struct qi_desc *desc)
{
unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
desc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
QI_DEV_IOTLB_PFSID(pfsid);
/*
* If S bit is 0, we only flush a single page. If S bit is set,
* The least significant zero bit indicates the invalidation address
* range. VT-d spec 6.5.2.6.
* e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
* size order = 0 is PAGE_SIZE 4KB
* Max Invs Pending (MIP) is set to 0 for now until we have DIT in
* ECAP.
*/
if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
addr, size_order);
/* Take page address */
desc->qw1 = QI_DEV_EIOTLB_ADDR(addr);
if (size_order) {
/*
* Existing 0s in address below size_order may be the least
* significant bit, we must set them to 1s to avoid having
* smaller size than desired.
*/
desc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
VTD_PAGE_SHIFT);
/* Clear size_order bit to indicate size */
desc->qw1 &= ~mask;
/* Set the S bit to indicate flushing more than 1 page */
desc->qw1 |= QI_DEV_EIOTLB_SIZE;
}
}
/* Convert value to context PASID directory size field coding. */ /* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9) #define context_pdts(pds) (((pds) & 0x7) << 9)
...@@ -1098,13 +1220,15 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, ...@@ -1098,13 +1220,15 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options); unsigned int count, unsigned long options);
void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type);
/* /*
* Options used in qi_submit_sync: * Options used in qi_submit_sync:
* QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
*/ */
#define QI_OPT_WAIT_DRAIN BIT(0) #define QI_OPT_WAIT_DRAIN BIT(0)
void domain_update_iotlb(struct dmar_domain *domain);
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void device_block_translation(struct device *dev); void device_block_translation(struct device *dev);
......
...@@ -66,8 +66,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, ...@@ -66,8 +66,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
list_add(&info->link, &dmar_domain->devices); list_add(&info->link, &dmar_domain->devices);
spin_unlock_irqrestore(&dmar_domain->lock, flags); spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_update_iotlb(dmar_domain);
return 0; return 0;
unassign_tag: unassign_tag:
cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID);
...@@ -85,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain) ...@@ -85,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
spin_lock(&s2_domain->s1_lock); spin_lock(&s2_domain->s1_lock);
list_del(&dmar_domain->s2_link); list_del(&dmar_domain->s2_link);
spin_unlock(&s2_domain->s1_lock); spin_unlock(&s2_domain->s1_lock);
kfree(dmar_domain->qi_batch);
kfree(dmar_domain); kfree(dmar_domain);
} }
......
...@@ -264,8 +264,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, ...@@ -264,8 +264,6 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
else else
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
devtlb_invalidation_with_pasid(iommu, dev, pasid); devtlb_invalidation_with_pasid(iommu, dev, pasid);
} }
...@@ -493,8 +491,6 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, ...@@ -493,8 +491,6 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
devtlb_invalidation_with_pasid(iommu, dev, pasid); devtlb_invalidation_with_pasid(iommu, dev, pasid);
return 0; return 0;
...@@ -572,8 +568,6 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, ...@@ -572,8 +568,6 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
pasid_cache_invalidation_with_pasid(iommu, did, pasid); pasid_cache_invalidation_with_pasid(iommu, did, pasid);
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
devtlb_invalidation_with_pasid(iommu, dev, pasid); devtlb_invalidation_with_pasid(iommu, dev, pasid);
} }
......
...@@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) ...@@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static void intel_mm_free_notifier(struct mmu_notifier *mn) static void intel_mm_free_notifier(struct mmu_notifier *mn)
{ {
kfree(container_of(mn, struct dmar_domain, notifier)); struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
kfree(domain->qi_batch);
kfree(domain);
} }
static const struct mmu_notifier_ops intel_mmuops = { static const struct mmu_notifier_ops intel_mmuops = {
...@@ -311,7 +314,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid) ...@@ -311,7 +314,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid)
domain = info->domain; domain = info->domain;
pdev = to_pci_dev(dev); pdev = to_pci_dev(dev);
sid = PCI_DEVID(info->bus, info->devfn); sid = PCI_DEVID(info->bus, info->devfn);
did = domain_id_iommu(domain, iommu); did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
qdep = pci_ats_queue_depth(pdev); qdep = pci_ats_queue_depth(pdev);
/* /*
......
...@@ -274,13 +274,13 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, ...@@ -274,13 +274,13 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
sizeof(*ptep) * num_entries, DMA_TO_DEVICE); sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
} }
static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
{ {
for (int i = 0; i < num_entries; i++)
ptep[i] = 0;
*ptep = 0; if (!cfg->coherent_walk && num_entries)
__arm_lpae_sync_pte(ptep, num_entries, cfg);
if (!cfg->coherent_walk)
__arm_lpae_sync_pte(ptep, 1, cfg);
} }
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
...@@ -653,26 +653,29 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, ...@@ -653,26 +653,29 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start; max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
num_entries = min_t(int, pgcount, max_entries); num_entries = min_t(int, pgcount, max_entries);
while (i < num_entries) { /* Find and handle non-leaf entries */
pte = READ_ONCE(*ptep); for (i = 0; i < num_entries; i++) {
pte = READ_ONCE(ptep[i]);
if (WARN_ON(!pte)) if (WARN_ON(!pte))
break; break;
__arm_lpae_clear_pte(ptep, &iop->cfg);
if (!iopte_leaf(pte, lvl, iop->fmt)) { if (!iopte_leaf(pte, lvl, iop->fmt)) {
__arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
/* Also flush any partial walks */ /* Also flush any partial walks */
io_pgtable_tlb_flush_walk(iop, iova + i * size, size, io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
ARM_LPAE_GRANULE(data)); ARM_LPAE_GRANULE(data));
__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
} else if (!iommu_iotlb_gather_queued(gather)) {
io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
} }
ptep++;
i++;
} }
/* Clear the remaining entries */
__arm_lpae_clear_pte(ptep, &iop->cfg, i);
if (gather && !iommu_iotlb_gather_queued(gather))
for (int j = 0; j < i; j++)
io_pgtable_tlb_add_page(iop, gather, iova + j * size, size);
return i * size; return i * size;
} else if (iopte_leaf(pte, lvl, iop->fmt)) { } else if (iopte_leaf(pte, lvl, iop->fmt)) {
/* /*
......
...@@ -214,7 +214,7 @@ void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) ...@@ -214,7 +214,7 @@ void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
* that represent reservations in the IOVA space, which are regions that should * that represent reservations in the IOVA space, which are regions that should
* not be mapped. * not be mapped.
*/ */
if (of_find_property(it.node, "reg", NULL)) { if (of_property_present(it.node, "reg")) {
err = of_address_to_resource(it.node, 0, &phys); err = of_address_to_resource(it.node, 0, &phys);
if (err < 0) { if (err < 0) {
dev_err(dev, "failed to parse memory region %pOF: %d\n", dev_err(dev, "failed to parse memory region %pOF: %d\n",
......
...@@ -47,6 +47,39 @@ bool pci_ats_supported(struct pci_dev *dev) ...@@ -47,6 +47,39 @@ bool pci_ats_supported(struct pci_dev *dev)
} }
EXPORT_SYMBOL_GPL(pci_ats_supported); EXPORT_SYMBOL_GPL(pci_ats_supported);
/**
* pci_prepare_ats - Setup the PS for ATS
* @dev: the PCI device
* @ps: the IOMMU page shift
*
* This must be done by the IOMMU driver on the PF before any VFs are created to
* ensure that the VF can have ATS enabled.
*
* Returns 0 on success, or negative on failure.
*/
int pci_prepare_ats(struct pci_dev *dev, int ps)
{
u16 ctrl;
if (!pci_ats_supported(dev))
return -EINVAL;
if (WARN_ON(dev->ats_enabled))
return -EBUSY;
if (ps < PCI_ATS_MIN_STU)
return -EINVAL;
if (dev->is_virtfn)
return 0;
dev->ats_stu = ps;
ctrl = PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU);
pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
return 0;
}
EXPORT_SYMBOL_GPL(pci_prepare_ats);
/** /**
* pci_enable_ats - enable the ATS capability * pci_enable_ats - enable the ATS capability
* @dev: the PCI device * @dev: the PCI device
......
...@@ -171,6 +171,10 @@ struct io_pgtable_cfg { ...@@ -171,6 +171,10 @@ struct io_pgtable_cfg {
u64 ttbr[4]; u64 ttbr[4];
u32 n_ttbrs; u32 n_ttbrs;
} apple_dart_cfg; } apple_dart_cfg;
struct {
int nid;
} amd;
}; };
}; };
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment