Commit 3b6411c2 authored by Ping Gao's avatar Ping Gao Committed by Zhenyu Wang

drm/i915/gvt: implement scratch page table tree for shadow PPGTT

All the unused entries in the page table tree(PML4E->PDPE->PDE->PTE)
should point to scratch page table/scratch page to avoid page walk error
due to the page prefetching.
When removing an entry in shadow PPGTT,  it need map to scratch page
also, the older implementation use single scratch page to assign to all
level entries, it doesn't align the page walk behavior when removed
entry is in PML, PDP, PD.  To avoid potential page walk error this patch
implement a scratch page tree to replace the single scratch page.

v2: more details in commit message address Kevin's comments.
Signed-off-by: default avatarPing Gao <ping.a.gao@intel.com>
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent 2fb39fad
...@@ -138,36 +138,6 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, ...@@ -138,36 +138,6 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
memcpy(&(e)->val64, &v, sizeof(v)); \ memcpy(&(e)->val64, &v, sizeof(v)); \
} while (0) } while (0)
enum {
GTT_TYPE_INVALID = -1,
GTT_TYPE_GGTT_PTE,
GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_2M_ENTRY,
GTT_TYPE_PPGTT_PTE_1G_ENTRY,
GTT_TYPE_PPGTT_PTE_ENTRY,
GTT_TYPE_PPGTT_PDE_ENTRY,
GTT_TYPE_PPGTT_PDP_ENTRY,
GTT_TYPE_PPGTT_PML4_ENTRY,
GTT_TYPE_PPGTT_ROOT_ENTRY,
GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
GTT_TYPE_PPGTT_ENTRY,
GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_PPGTT_PDE_PT,
GTT_TYPE_PPGTT_PDP_PT,
GTT_TYPE_PPGTT_PML4_PT,
GTT_TYPE_MAX,
};
/* /*
* Mappings between GTT_TYPE* enumerations. * Mappings between GTT_TYPE* enumerations.
* Following information can be found according to the given type: * Following information can be found according to the given type:
...@@ -842,13 +812,18 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, ...@@ -842,13 +812,18 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
{ {
struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
struct intel_vgpu_ppgtt_spt *s; struct intel_vgpu_ppgtt_spt *s;
intel_gvt_gtt_type_t cur_pt_type;
if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type))))
return -EINVAL; return -EINVAL;
if (ops->get_pfn(e) == vgpu->gtt.scratch_page_mfn) if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
cur_pt_type = get_next_pt_type(e->type) + 1;
if (ops->get_pfn(e) ==
vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
return 0; return 0;
}
s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
if (!s) { if (!s) {
gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n", gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
...@@ -1015,7 +990,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, ...@@ -1015,7 +990,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
if (!ops->test_present(&e)) if (!ops->test_present(&e))
return 0; return 0;
if (ops->get_pfn(&e) == vgpu->gtt.scratch_page_mfn) if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn)
return 0; return 0;
if (gtt_type_is_pt(get_next_pt_type(we->type))) { if (gtt_type_is_pt(get_next_pt_type(we->type))) {
...@@ -1030,7 +1005,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, ...@@ -1030,7 +1005,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
if (ret) if (ret)
goto fail; goto fail;
} }
ops->set_pfn(&e, vgpu->gtt.scratch_page_mfn); ops->set_pfn(&e, vgpu->gtt.scratch_pt[sp->type].page_mfn);
ppgtt_set_shadow_entry(spt, &e, index); ppgtt_set_shadow_entry(spt, &e, index);
return 0; return 0;
fail: fail:
...@@ -1921,47 +1896,101 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ...@@ -1921,47 +1896,101 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
return ret; return ret;
} }
static int create_scratch_page(struct intel_vgpu *vgpu) static int alloc_scratch_pages(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t type)
{ {
struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_vgpu_gtt *gtt = &vgpu->gtt;
void *p; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
void *vaddr; int page_entry_num = GTT_PAGE_SIZE >>
vgpu->gvt->device_info.gtt_entry_size_shift;
struct page *scratch_pt;
unsigned long mfn; unsigned long mfn;
int i;
void *p;
if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
return -EINVAL;
gtt->scratch_page = alloc_page(GFP_KERNEL); scratch_pt = alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO);
if (!gtt->scratch_page) { if (!scratch_pt) {
gvt_err("Failed to allocate scratch page.\n"); gvt_err("fail to allocate scratch page\n");
return -ENOMEM; return -ENOMEM;
} }
/* set to zero */ p = kmap_atomic(scratch_pt);
p = kmap_atomic(gtt->scratch_page); mfn = intel_gvt_hypervisor_virt_to_mfn(p);
memset(p, 0, PAGE_SIZE); if (mfn == INTEL_GVT_INVALID_ADDR) {
gvt_err("fail to translate vaddr:0x%llx\n", (u64)p);
kunmap_atomic(p); kunmap_atomic(p);
__free_page(scratch_pt);
return -EFAULT;
}
gtt->scratch_pt[type].page_mfn = mfn;
gtt->scratch_pt[type].page = scratch_pt;
gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
vgpu->id, type, mfn);
/* Build the tree by full filled the scratch pt with the entries which
* point to the next level scratch pt or scratch page. The
* scratch_pt[type] indicate the scratch pt/scratch page used by the
* 'type' pt.
* e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
* GTT_TYPE_PPGTT_PDE_PT level pt, that means this scatch_pt it self
* is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
*/
if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
struct intel_gvt_gtt_entry se;
/* translate page to mfn */ memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
vaddr = page_address(gtt->scratch_page); se.type = get_entry_type(type - 1);
mfn = intel_gvt_hypervisor_virt_to_mfn(vaddr); ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
if (mfn == INTEL_GVT_INVALID_ADDR) { /* The entry parameters like present/writeable/cache type
gvt_err("fail to translate vaddr: 0x%p\n", vaddr); * set to the same as i915's scratch page tree.
__free_page(gtt->scratch_page); */
gtt->scratch_page = NULL; se.val64 |= _PAGE_PRESENT | _PAGE_RW;
return -ENXIO; if (type == GTT_TYPE_PPGTT_PDE_PT)
se.val64 |= PPAT_CACHED_INDEX;
for (i = 0; i < page_entry_num; i++)
ops->set_entry(p, &se, i, false, 0, vgpu);
} }
gtt->scratch_page_mfn = mfn; kunmap_atomic(p);
gvt_dbg_core("vgpu%d create scratch page: mfn=0x%lx\n", vgpu->id, mfn);
return 0; return 0;
} }
static void release_scratch_page(struct intel_vgpu *vgpu) static int release_scratch_page_tree(struct intel_vgpu *vgpu)
{ {
if (vgpu->gtt.scratch_page != NULL) { int i;
__free_page(vgpu->gtt.scratch_page);
vgpu->gtt.scratch_page = NULL; for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
vgpu->gtt.scratch_page_mfn = 0; if (vgpu->gtt.scratch_pt[i].page != NULL) {
__free_page(vgpu->gtt.scratch_pt[i].page);
vgpu->gtt.scratch_pt[i].page = NULL;
vgpu->gtt.scratch_pt[i].page_mfn = 0;
}
} }
return 0;
}
static int create_scratch_page_tree(struct intel_vgpu *vgpu)
{
int i, ret;
for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
ret = alloc_scratch_pages(vgpu, i);
if (ret)
goto err;
}
return 0;
err:
release_scratch_page_tree(vgpu);
return ret;
} }
/** /**
...@@ -1995,7 +2024,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) ...@@ -1995,7 +2024,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
gtt->ggtt_mm = ggtt_mm; gtt->ggtt_mm = ggtt_mm;
return create_scratch_page(vgpu); return create_scratch_page_tree(vgpu);
} }
/** /**
...@@ -2014,7 +2043,7 @@ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) ...@@ -2014,7 +2043,7 @@ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
struct intel_vgpu_mm *mm; struct intel_vgpu_mm *mm;
ppgtt_free_all_shadow_page(vgpu); ppgtt_free_all_shadow_page(vgpu);
release_scratch_page(vgpu); release_scratch_page_tree(vgpu);
list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
mm = container_of(pos, struct intel_vgpu_mm, list); mm = container_of(pos, struct intel_vgpu_mm, list);
......
...@@ -88,6 +88,36 @@ enum { ...@@ -88,6 +88,36 @@ enum {
INTEL_GVT_MM_PPGTT, INTEL_GVT_MM_PPGTT,
}; };
typedef enum {
GTT_TYPE_INVALID = -1,
GTT_TYPE_GGTT_PTE,
GTT_TYPE_PPGTT_PTE_4K_ENTRY,
GTT_TYPE_PPGTT_PTE_2M_ENTRY,
GTT_TYPE_PPGTT_PTE_1G_ENTRY,
GTT_TYPE_PPGTT_PTE_ENTRY,
GTT_TYPE_PPGTT_PDE_ENTRY,
GTT_TYPE_PPGTT_PDP_ENTRY,
GTT_TYPE_PPGTT_PML4_ENTRY,
GTT_TYPE_PPGTT_ROOT_ENTRY,
GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
GTT_TYPE_PPGTT_ENTRY,
GTT_TYPE_PPGTT_PTE_PT,
GTT_TYPE_PPGTT_PDE_PT,
GTT_TYPE_PPGTT_PDP_PT,
GTT_TYPE_PPGTT_PML4_PT,
GTT_TYPE_MAX,
} intel_gvt_gtt_type_t;
struct intel_vgpu_mm { struct intel_vgpu_mm {
int type; int type;
bool initialized; bool initialized;
...@@ -151,6 +181,12 @@ extern void intel_vgpu_destroy_mm(struct kref *mm_ref); ...@@ -151,6 +181,12 @@ extern void intel_vgpu_destroy_mm(struct kref *mm_ref);
struct intel_vgpu_guest_page; struct intel_vgpu_guest_page;
struct intel_vgpu_scratch_pt {
struct page *page;
unsigned long page_mfn;
};
struct intel_vgpu_gtt { struct intel_vgpu_gtt {
struct intel_vgpu_mm *ggtt_mm; struct intel_vgpu_mm *ggtt_mm;
unsigned long active_ppgtt_mm_bitmap; unsigned long active_ppgtt_mm_bitmap;
...@@ -160,8 +196,8 @@ struct intel_vgpu_gtt { ...@@ -160,8 +196,8 @@ struct intel_vgpu_gtt {
atomic_t n_write_protected_guest_page; atomic_t n_write_protected_guest_page;
struct list_head oos_page_list_head; struct list_head oos_page_list_head;
struct list_head post_shadow_list_head; struct list_head post_shadow_list_head;
struct page *scratch_page; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
unsigned long scratch_page_mfn;
}; };
extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment