Commit 21c1f902 authored by Niklas Schnelle's avatar Niklas Schnelle Committed by Joerg Roedel

s390/pci: use lock-free I/O translation updates

I/O translation tables on s390 use 8 byte page table entries and tables
which are allocated lazily but only freed when the entire I/O
translation table is torn down. Also each IOVA can at any time only
translate to one physical address Furthermore I/O table accesses by the
IOMMU hardware are cache coherent. With a bit of care we can thus use
atomic updates to manipulate the translation table without having to use
a global lock at all. This is done analogous to the existing I/O
translation table handling code used on Intel and AMD x86 systems.
Signed-off-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-6-schnelle@linux.ibm.comSigned-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 08955af0
...@@ -157,7 +157,6 @@ struct zpci_dev { ...@@ -157,7 +157,6 @@ struct zpci_dev {
/* DMA stuff */ /* DMA stuff */
unsigned long *dma_table; unsigned long *dma_table;
spinlock_t dma_table_lock;
int tlb_refresh; int tlb_refresh;
spinlock_t iommu_bitmap_lock; spinlock_t iommu_bitmap_lock;
......
...@@ -63,37 +63,55 @@ static void dma_free_page_table(void *table) ...@@ -63,37 +63,55 @@ static void dma_free_page_table(void *table)
kmem_cache_free(dma_page_table_cache, table); kmem_cache_free(dma_page_table_cache, table);
} }
static unsigned long *dma_get_seg_table_origin(unsigned long *entry) static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
{ {
unsigned long old_rte, rte;
unsigned long *sto; unsigned long *sto;
if (reg_entry_isvalid(*entry)) rte = READ_ONCE(*rtep);
sto = get_rt_sto(*entry); if (reg_entry_isvalid(rte)) {
else { sto = get_rt_sto(rte);
} else {
sto = dma_alloc_cpu_table(); sto = dma_alloc_cpu_table();
if (!sto) if (!sto)
return NULL; return NULL;
set_rt_sto(entry, virt_to_phys(sto)); set_rt_sto(&rte, virt_to_phys(sto));
validate_rt_entry(entry); validate_rt_entry(&rte);
entry_clr_protected(entry); entry_clr_protected(&rte);
old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
if (old_rte != ZPCI_TABLE_INVALID) {
/* Somone else was faster, use theirs */
dma_free_cpu_table(sto);
sto = get_rt_sto(old_rte);
}
} }
return sto; return sto;
} }
static unsigned long *dma_get_page_table_origin(unsigned long *entry) static unsigned long *dma_get_page_table_origin(unsigned long *step)
{ {
unsigned long old_ste, ste;
unsigned long *pto; unsigned long *pto;
if (reg_entry_isvalid(*entry)) ste = READ_ONCE(*step);
pto = get_st_pto(*entry); if (reg_entry_isvalid(ste)) {
else { pto = get_st_pto(ste);
} else {
pto = dma_alloc_page_table(); pto = dma_alloc_page_table();
if (!pto) if (!pto)
return NULL; return NULL;
set_st_pto(entry, virt_to_phys(pto)); set_st_pto(&ste, virt_to_phys(pto));
validate_st_entry(entry); validate_st_entry(&ste);
entry_clr_protected(entry); entry_clr_protected(&ste);
old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
if (old_ste != ZPCI_TABLE_INVALID) {
/* Somone else was faster, use theirs */
dma_free_page_table(pto);
pto = get_st_pto(old_ste);
}
} }
return pto; return pto;
} }
...@@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) ...@@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
return &pto[px]; return &pto[px];
} }
void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags) void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
{ {
unsigned long pte;
pte = READ_ONCE(*ptep);
if (flags & ZPCI_PTE_INVALID) { if (flags & ZPCI_PTE_INVALID) {
invalidate_pt_entry(entry); invalidate_pt_entry(&pte);
} else { } else {
set_pt_pfaa(entry, page_addr); set_pt_pfaa(&pte, page_addr);
validate_pt_entry(entry); validate_pt_entry(&pte);
} }
if (flags & ZPCI_TABLE_PROTECTED) if (flags & ZPCI_TABLE_PROTECTED)
entry_set_protected(entry); entry_set_protected(&pte);
else else
entry_clr_protected(entry); entry_clr_protected(&pte);
xchg(ptep, pte);
} }
static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
...@@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, ...@@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
{ {
unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
phys_addr_t page_addr = (pa & PAGE_MASK); phys_addr_t page_addr = (pa & PAGE_MASK);
unsigned long irq_flags;
unsigned long *entry; unsigned long *entry;
int i, rc = 0; int i, rc = 0;
if (!nr_pages) if (!nr_pages)
return -EINVAL; return -EINVAL;
spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); if (!zdev->dma_table)
if (!zdev->dma_table) { return -EINVAL;
rc = -EINVAL;
goto out_unlock;
}
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
...@@ -173,8 +192,6 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, ...@@ -173,8 +192,6 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
dma_update_cpu_trans(entry, page_addr, flags); dma_update_cpu_trans(entry, page_addr, flags);
} }
} }
out_unlock:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
return rc; return rc;
} }
...@@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev) ...@@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
WARN_ON(zdev->s390_domain); WARN_ON(zdev->s390_domain);
spin_lock_init(&zdev->iommu_bitmap_lock); spin_lock_init(&zdev->iommu_bitmap_lock);
spin_lock_init(&zdev->dma_table_lock);
zdev->dma_table = dma_alloc_cpu_table(); zdev->dma_table = dma_alloc_cpu_table();
if (!zdev->dma_table) { if (!zdev->dma_table) {
......
...@@ -20,7 +20,6 @@ struct s390_domain { ...@@ -20,7 +20,6 @@ struct s390_domain {
struct iommu_domain domain; struct iommu_domain domain;
struct list_head devices; struct list_head devices;
unsigned long *dma_table; unsigned long *dma_table;
spinlock_t dma_table_lock;
spinlock_t list_lock; spinlock_t list_lock;
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) ...@@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
s390_domain->domain.geometry.aperture_start = 0; s390_domain->domain.geometry.aperture_start = 0;
s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
spin_lock_init(&s390_domain->dma_table_lock);
spin_lock_init(&s390_domain->list_lock); spin_lock_init(&s390_domain->list_lock);
INIT_LIST_HEAD_RCU(&s390_domain->devices); INIT_LIST_HEAD_RCU(&s390_domain->devices);
...@@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, ...@@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
unsigned long nr_pages, int flags) unsigned long nr_pages, int flags)
{ {
phys_addr_t page_addr = pa & PAGE_MASK; phys_addr_t page_addr = pa & PAGE_MASK;
unsigned long irq_flags, i;
unsigned long *entry; unsigned long *entry;
unsigned long i;
int rc; int rc;
if (!nr_pages)
return 0;
spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
if (unlikely(!entry)) { if (unlikely(!entry)) {
...@@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, ...@@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
page_addr += PAGE_SIZE; page_addr += PAGE_SIZE;
dma_addr += PAGE_SIZE; dma_addr += PAGE_SIZE;
} }
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
return 0; return 0;
...@@ -296,7 +289,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, ...@@ -296,7 +289,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
break; break;
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
} }
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
return rc; return rc;
} }
...@@ -304,14 +296,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, ...@@ -304,14 +296,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
dma_addr_t dma_addr, unsigned long nr_pages) dma_addr_t dma_addr, unsigned long nr_pages)
{ {
unsigned long irq_flags, i;
unsigned long *entry; unsigned long *entry;
unsigned long i;
int rc = 0; int rc = 0;
if (!nr_pages)
return 0;
spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
if (unlikely(!entry)) { if (unlikely(!entry)) {
...@@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, ...@@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
dma_addr += PAGE_SIZE; dma_addr += PAGE_SIZE;
} }
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
return rc; return rc;
} }
...@@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, ...@@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova) dma_addr_t iova)
{ {
struct s390_domain *s390_domain = to_s390_domain(domain); struct s390_domain *s390_domain = to_s390_domain(domain);
unsigned long *sto, *pto, *rto, flags; unsigned long *rto, *sto, *pto;
unsigned long ste, pte, rte;
unsigned int rtx, sx, px; unsigned int rtx, sx, px;
phys_addr_t phys = 0; phys_addr_t phys = 0;
...@@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, ...@@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
px = calc_px(iova); px = calc_px(iova);
rto = s390_domain->dma_table; rto = s390_domain->dma_table;
spin_lock_irqsave(&s390_domain->dma_table_lock, flags); rte = READ_ONCE(rto[rtx]);
if (rto && reg_entry_isvalid(rto[rtx])) { if (reg_entry_isvalid(rte)) {
sto = get_rt_sto(rto[rtx]); sto = get_rt_sto(rte);
if (sto && reg_entry_isvalid(sto[sx])) { ste = READ_ONCE(sto[sx]);
pto = get_st_pto(sto[sx]); if (reg_entry_isvalid(ste)) {
if (pto && pt_entry_isvalid(pto[px])) pto = get_st_pto(ste);
phys = pto[px] & ZPCI_PTE_ADDR_MASK; pte = READ_ONCE(pto[px]);
if (pt_entry_isvalid(pte))
phys = pte & ZPCI_PTE_ADDR_MASK;
} }
} }
spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
return phys; return phys;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment