Commit 2b681faf authored by Joerg Roedel's avatar Joerg Roedel

Merge branch 'amd-iommu/pagetable' into amd-iommu/2.6.32

Conflicts:
	arch/x86/kernel/amd_iommu.c
parents 03362a05 abdc5eb3
......@@ -147,19 +147,25 @@
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
#define PAGE_MODE_3_LEVEL 0x03
#define IOMMU_PDE_NL_0 0x000ULL
#define IOMMU_PDE_NL_1 0x200ULL
#define IOMMU_PDE_NL_2 0x400ULL
#define IOMMU_PDE_NL_3 0x600ULL
#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)
#define IOMMU_MAP_SIZE_L1 (1ULL << 21)
#define IOMMU_MAP_SIZE_L2 (1ULL << 30)
#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
#define PAGE_MODE_4_LEVEL 0x04
#define PAGE_MODE_5_LEVEL 0x05
#define PAGE_MODE_6_LEVEL 0x06
#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
(0xffffffffffffffffULL))
#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL)
#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL)
#define PM_MAP_4k 0
#define PM_ADDR_MASK 0x000ffffffffff000ULL
#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \
(~((1ULL << (12 + ((lvl) * 9))) - 1)))
#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
#define IOMMU_PTE_P (1ULL << 0)
#define IOMMU_PTE_TV (1ULL << 1)
......@@ -168,11 +174,6 @@
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
#define IOMMU_L1_PDE(address) \
((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define IOMMU_L2_PDE(address) \
((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
......@@ -230,6 +231,7 @@ struct protection_domain {
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
unsigned long flags; /* flags to find out type of domain */
bool updated; /* complete domain flush required */
unsigned dev_cnt; /* devices assigned to this domain */
void *priv; /* private data */
};
......
......@@ -47,7 +47,6 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock);
*/
static struct protection_domain *pt_domain;
#ifdef CONFIG_IOMMU_API
static struct iommu_ops amd_iommu_ops;
/*
......@@ -60,13 +59,16 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
static u64* alloc_pte(struct protection_domain *dom,
unsigned long address, u64
**pte_page, gfp_t gfp);
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address, int end_lvl,
u64 **pte_page, gfp_t gfp);
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages);
static void reset_iommu_command_buffer(struct amd_iommu *iommu);
static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address, int map_size);
static void update_domain(struct protection_domain *domain);
#ifdef CONFIG_AMD_IOMMU_STATS
......@@ -535,12 +537,15 @@ static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
}
}
void amd_iommu_flush_all_devices(void)
static void flush_devices_by_domain(struct protection_domain *domain)
{
struct amd_iommu *iommu;
int i;
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
(amd_iommu_pd_table[i] != domain))
continue;
iommu = amd_iommu_rlookup_table[i];
if (!iommu)
......@@ -567,6 +572,11 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu)
iommu->reset_in_progress = false;
}
void amd_iommu_flush_all_devices(void)
{
flush_devices_by_domain(NULL);
}
/****************************************************************************
*
* The functions below are used the create the page table mappings for
......@@ -584,18 +594,21 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu)
static int iommu_map_page(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
int prot,
int map_size)
{
u64 __pte, *pte;
bus_addr = PAGE_ALIGN(bus_addr);
phys_addr = PAGE_ALIGN(phys_addr);
/* only support 512GB address spaces for now */
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
BUG_ON(!PM_ALIGNED(map_size, bus_addr));
BUG_ON(!PM_ALIGNED(map_size, phys_addr));
if (!(prot & IOMMU_PROT_MASK))
return -EINVAL;
pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);
if (IOMMU_PTE_PRESENT(*pte))
return -EBUSY;
......@@ -608,28 +621,17 @@ static int iommu_map_page(struct protection_domain *dom,
*pte = __pte;
update_domain(dom);
return 0;
}
static void iommu_unmap_page(struct protection_domain *dom,
unsigned long bus_addr)
unsigned long bus_addr, int map_size)
{
u64 *pte;
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
u64 *pte = fetch_pte(dom, bus_addr, map_size);
if (pte)
*pte = 0;
}
......@@ -685,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
for (addr = e->address_start; addr < e->address_end;
addr += PAGE_SIZE) {
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
PM_MAP_4k);
if (ret)
return ret;
/*
......@@ -740,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
* This function checks if there is a PTE for a given dma address. If
* there is one, it returns the pointer to it.
*/
static u64* fetch_pte(struct protection_domain *domain,
unsigned long address)
static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address, int map_size)
{
int level;
u64 *pte;
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
while (level > map_size) {
if (!IOMMU_PTE_PRESENT(*pte))
return NULL;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(address)];
if (!IOMMU_PTE_PRESENT(*pte))
return NULL;
level -= 1;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(address)];
pte = &pte[PM_LEVEL_INDEX(level, address)];
if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
pte = NULL;
break;
}
}
return pte;
}
......@@ -797,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu,
u64 *pte, *pte_page;
for (i = 0; i < num_ptes; ++i) {
pte = alloc_pte(&dma_dom->domain, address,
pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
&pte_page, gfp);
if (!pte)
goto out_free;
......@@ -830,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu,
for (i = dma_dom->aperture[index]->offset;
i < dma_dom->aperture_size;
i += PAGE_SIZE) {
u64 *pte = fetch_pte(&dma_dom->domain, i);
u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
if (!pte || !IOMMU_PTE_PRESENT(*pte))
continue;
dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
}
update_domain(&dma_dom->domain);
return 0;
out_free:
update_domain(&dma_dom->domain);
free_page((unsigned long)dma_dom->aperture[index]->bitmap);
kfree(dma_dom->aperture[index]);
......@@ -1079,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
dma_dom->domain.id = domain_id_alloc();
if (dma_dom->domain.id == 0)
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
dma_dom->domain.priv = dma_dom;
......@@ -1133,20 +1145,9 @@ static struct protection_domain *domain_for_device(u16 devid)
return dom;
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void __attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
static void set_dte_entry(u16 devid, struct protection_domain *domain)
{
u64 pte_root;
/* lock domain */
spin_lock(&domain->lock);
pte_root = virt_to_phys(domain->pt_root);
u64 pte_root = virt_to_phys(domain->pt_root);
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
......@@ -1157,6 +1158,21 @@ static void __attach_device(struct amd_iommu *iommu,
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
amd_iommu_pd_table[devid] = domain;
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void __attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
{
/* lock domain */
spin_lock(&domain->lock);
/* update DTE entry */
set_dte_entry(devid, domain);
domain->dev_cnt += 1;
......@@ -1164,6 +1180,10 @@ static void __attach_device(struct amd_iommu *iommu,
spin_unlock(&domain->lock);
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
......@@ -1389,39 +1409,91 @@ static int get_device_resources(struct device *dev,
return 1;
}
static void update_device_table(struct protection_domain *domain)
{
unsigned long flags;
int i;
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
if (amd_iommu_pd_table[i] != domain)
continue;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
set_dte_entry(i, domain);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
}
static void update_domain(struct protection_domain *domain)
{
if (!domain->updated)
return;
update_device_table(domain);
flush_devices_by_domain(domain);
iommu_flush_domain(domain->id);
domain->updated = false;
}
/*
* If the pte_page is not yet allocated this function is called
* This function is used to add another level to an IO page table. Adding
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits.
*/
static u64* alloc_pte(struct protection_domain *dom,
unsigned long address, u64 **pte_page, gfp_t gfp)
static bool increase_address_space(struct protection_domain *domain,
gfp_t gfp)
{
u64 *pte, *page;
u64 *pte;
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
if (domain->mode == PAGE_MODE_6_LEVEL)
/* address space already 64 bit large */
return false;
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(gfp);
if (!page)
return NULL;
*pte = IOMMU_L2_PDE(virt_to_phys(page));
}
pte = (void *)get_zeroed_page(gfp);
if (!pte)
return false;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(address)];
*pte = PM_LEVEL_PDE(domain->mode,
virt_to_phys(domain->pt_root));
domain->pt_root = pte;
domain->mode += 1;
domain->updated = true;
return true;
}
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address,
int end_lvl,
u64 **pte_page,
gfp_t gfp)
{
u64 *pte, *page;
int level;
while (address > PM_LEVEL_SIZE(domain->mode))
increase_address_space(domain, gfp);
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
while (level > end_lvl) {
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(gfp);
if (!page)
return NULL;
*pte = IOMMU_L1_PDE(virt_to_phys(page));
*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
}
level -= 1;
pte = IOMMU_PTE_PAGE(*pte);
if (pte_page)
if (pte_page && level == end_lvl)
*pte_page = pte;
pte = &pte[IOMMU_PTE_L0_INDEX(address)];
pte = &pte[PM_LEVEL_INDEX(level, address)];
}
return pte;
}
......@@ -1441,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
if (!pte) {
pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
GFP_ATOMIC);
aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
} else
pte += IOMMU_PTE_L0_INDEX(address);
pte += PM_LEVEL_INDEX(0, address);
update_domain(&dom->domain);
return pte;
}
......@@ -1506,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
if (!pte)
return;
pte += IOMMU_PTE_L0_INDEX(address);
pte += PM_LEVEL_INDEX(0, address);
WARN_ON(!*pte);
......@@ -2240,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
paddr &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
ret = iommu_map_page(domain, iova, paddr, prot);
ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
if (ret)
return ret;
......@@ -2261,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
iova &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
iommu_unmap_page(domain, iova);
iommu_unmap_page(domain, iova, PM_MAP_4k);
iova += PAGE_SIZE;
}
......@@ -2276,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
phys_addr_t paddr;
u64 *pte;
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = fetch_pte(domain, iova, PM_MAP_4k);
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
if (!pte || !IOMMU_PTE_PRESENT(*pte))
return 0;
paddr = *pte & IOMMU_PAGE_MASK;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment