drm/i915/gvt: implement scratch page table tree for shadow PPGTT

All the unused entries in the page table tree(PML4E->PDPE->PDE->PTE) should point to scratch page table/scratch page to avoid page walk error due to the page prefetching. When removing an entry in shadow PPGTT, it need map to scratch page also, the older implementation use single scratch page to assign to all level entries, it doesn't align the page walk behavior when removed entry is in PML, PDP, PD. To avoid potential page walk error this patch implement a scratch page tree to replace the single scratch page. v2: more details in commit message address Kevin's comments. Signed-off-by: Ping Gao <ping.a.gao@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>

drm/i915/gvt: implement scratch page table tree for shadow PPGTT
All the unused entries in the page table tree(PML4E->PDPE->PDE->PTE) should point to scratch page table/scratch page to avoid page walk error due to the page prefetching. When removing an entry in shadow PPGTT, it need map to scratch page also, the older implementation use single scratch page to assign to all level entries, it doesn't align the page walk behavior when removed entry is in PML, PDP, PD. To avoid potential page walk error this patch implement a scratch page tree to replace the single scratch page. v2: more details in commit message address Kevin's comments. Signed-off-by: Ping Gao <ping.a.gao@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
3b6411c2 · Ping Gao · Zhenyu Wang · 2fb39fad · 3b6411c2 · 3b6411c2
Commit 3b6411c2 authored Nov 04, 2016 by Ping Gao Committed by Zhenyu Wang Nov 07, 2016
Show whitespace changes
Inline Side-by-side

Showing with 128 additions and 63 deletions

drivers/gpu/drm/i915/gvt/gtt.c drivers/gpu/drm/i915/gvt/gtt.c +90 -61

drivers/gpu/drm/i915/gvt/gtt.h drivers/gpu/drm/i915/gvt/gtt.h +38 -2

No files found.
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -138,36 +138,6 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
 	memcpy(&(e)->val64, &v, sizeof(v)); \
 } while (0)
-enum {
-	GTT_TYPE_INVALID = -1,
-	GTT_TYPE_GGTT_PTE,
-	GTT_TYPE_PPGTT_PTE_4K_ENTRY,
-	GTT_TYPE_PPGTT_PTE_2M_ENTRY,
-	GTT_TYPE_PPGTT_PTE_1G_ENTRY,
-	GTT_TYPE_PPGTT_PTE_ENTRY,
-	GTT_TYPE_PPGTT_PDE_ENTRY,
-	GTT_TYPE_PPGTT_PDP_ENTRY,
-	GTT_TYPE_PPGTT_PML4_ENTRY,
-	GTT_TYPE_PPGTT_ROOT_ENTRY,
-	GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
-	GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
-	GTT_TYPE_PPGTT_ENTRY,
-	GTT_TYPE_PPGTT_PTE_PT,
-	GTT_TYPE_PPGTT_PDE_PT,
-	GTT_TYPE_PPGTT_PDP_PT,
-	GTT_TYPE_PPGTT_PML4_PT,
-	GTT_TYPE_MAX,
-};
 /*
 * Mappings between GTT_TYPE* enumerations.
 * Following information can be found according to the given type:
@@ -842,13 +812,18 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
 {
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 	struct intel_vgpu_ppgtt_spt *s;
+	intel_gvt_gtt_type_t cur_pt_type;
 	if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type))))
 		return -EINVAL;
-	if (ops->get_pfn(e) == vgpu->gtt.scratch_page_mfn)
+	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
+		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		cur_pt_type = get_next_pt_type(e->type) + 1;
+		if (ops->get_pfn(e) ==
+			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
 			return 0;
+	}
 	s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
 	if (!s) {
 		gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
@@ -1015,7 +990,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 	if (!ops->test_present(&e))
 		return 0;
-	if (ops->get_pfn(&e) == vgpu->gtt.scratch_page_mfn)
+	if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn)
 		return 0;
 	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
@@ -1030,7 +1005,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 		if (ret)
 			goto fail;
 	}
-	ops->set_pfn(&e, vgpu->gtt.scratch_page_mfn);
+	ops->set_pfn(&e, vgpu->gtt.scratch_pt[sp->type].page_mfn);
 	ppgtt_set_shadow_entry(spt, &e, index);
 	return 0;
 fail:
@@ -1921,47 +1896,101 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	return ret;
 }
-static int create_scratch_page(struct intel_vgpu *vgpu)
+static int alloc_scratch_pages(struct intel_vgpu *vgpu,
+		intel_gvt_gtt_type_t type)
 {
 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
-	void *p;
+	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
-	void *vaddr;
+	int page_entry_num = GTT_PAGE_SIZE >>
+				vgpu->gvt->device_info.gtt_entry_size_shift;
+	struct page *scratch_pt;
 	unsigned long mfn;
+	int i;
+	void *p;
+	if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
+		return -EINVAL;
-	gtt->scratch_page = alloc_page(GFP_KERNEL);
+	scratch_pt = alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO);
-	if (!gtt->scratch_page) {
+	if (!scratch_pt) {
-		gvt_err("Failed to allocate scratch page.\n");
+		gvt_err("fail to allocate scratch page\n");
 		return -ENOMEM;
 	}
-	/* set to zero */
+	p = kmap_atomic(scratch_pt);
-	p = kmap_atomic(gtt->scratch_page);
+	mfn = intel_gvt_hypervisor_virt_to_mfn(p);
-	memset(p, 0, PAGE_SIZE);
+	if (mfn == INTEL_GVT_INVALID_ADDR) {
+		gvt_err("fail to translate vaddr:0x%llx\n", (u64)p);
 		kunmap_atomic(p);
+		__free_page(scratch_pt);
+		return -EFAULT;
+	}
+	gtt->scratch_pt[type].page_mfn = mfn;
+	gtt->scratch_pt[type].page = scratch_pt;
+	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
+			vgpu->id, type, mfn);
+	/* Build the tree by full filled the scratch pt with the entries which
+	 * point to the next level scratch pt or scratch page. The
+	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
+	 * 'type' pt.
+	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
+	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scatch_pt it self
+	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
+	 */
+	if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
+		struct intel_gvt_gtt_entry se;
-	/* translate page to mfn */
+		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
-	vaddr = page_address(gtt->scratch_page);
+		se.type = get_entry_type(type - 1);
-	mfn = intel_gvt_hypervisor_virt_to_mfn(vaddr);
+		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
-	if (mfn == INTEL_GVT_INVALID_ADDR) {
+		/* The entry parameters like present/writeable/cache type
-		gvt_err("fail to translate vaddr: 0x%p\n", vaddr);
+		 * set to the same as i915's scratch page tree.
-		__free_page(gtt->scratch_page);
+		 */
-		gtt->scratch_page = NULL;
+		se.val64 |= _PAGE_PRESENT | _PAGE_RW;
-		return -ENXIO;
+		if (type == GTT_TYPE_PPGTT_PDE_PT)
+			se.val64 |= PPAT_CACHED_INDEX;
+		for (i = 0; i < page_entry_num; i++)
+			ops->set_entry(p, &se, i, false, 0, vgpu);
 	}
-	gtt->scratch_page_mfn = mfn;
+	kunmap_atomic(p);
-	gvt_dbg_core("vgpu%d create scratch page: mfn=0x%lx\n", vgpu->id, mfn);
 	return 0;
 }
-static void release_scratch_page(struct intel_vgpu *vgpu)
+static int release_scratch_page_tree(struct intel_vgpu *vgpu)
 {
-	if (vgpu->gtt.scratch_page != NULL) {
+	int i;
-		__free_page(vgpu->gtt.scratch_page);
-		vgpu->gtt.scratch_page = NULL;
+	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
-		vgpu->gtt.scratch_page_mfn = 0;
+		if (vgpu->gtt.scratch_pt[i].page != NULL) {
+			__free_page(vgpu->gtt.scratch_pt[i].page);
+			vgpu->gtt.scratch_pt[i].page = NULL;
+			vgpu->gtt.scratch_pt[i].page_mfn = 0;
+		}
 	}
+	return 0;
+}
+static int create_scratch_page_tree(struct intel_vgpu *vgpu)
+{
+	int i, ret;
+	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
+		ret = alloc_scratch_pages(vgpu, i);
+		if (ret)
+			goto err;
+	}
+	return 0;
+err:
+	release_scratch_page_tree(vgpu);
+	return ret;
 }
 /**
@@ -1995,7 +2024,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 	gtt->ggtt_mm = ggtt_mm;
-	return create_scratch_page(vgpu);
+	return create_scratch_page_tree(vgpu);
 }
 /**
@@ -2014,7 +2043,7 @@ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
 	struct intel_vgpu_mm *mm;
 	ppgtt_free_all_shadow_page(vgpu);
-	release_scratch_page(vgpu);
+	release_scratch_page_tree(vgpu);
 	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
 		mm = container_of(pos, struct intel_vgpu_mm, list);

--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -88,6 +88,36 @@ enum {
 	INTEL_GVT_MM_PPGTT,
 };
+typedef enum {
+	GTT_TYPE_INVALID = -1,
+	GTT_TYPE_GGTT_PTE,
+	GTT_TYPE_PPGTT_PTE_4K_ENTRY,
+	GTT_TYPE_PPGTT_PTE_2M_ENTRY,
+	GTT_TYPE_PPGTT_PTE_1G_ENTRY,
+	GTT_TYPE_PPGTT_PTE_ENTRY,
+	GTT_TYPE_PPGTT_PDE_ENTRY,
+	GTT_TYPE_PPGTT_PDP_ENTRY,
+	GTT_TYPE_PPGTT_PML4_ENTRY,
+	GTT_TYPE_PPGTT_ROOT_ENTRY,
+	GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
+	GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
+	GTT_TYPE_PPGTT_ENTRY,
+	GTT_TYPE_PPGTT_PTE_PT,
+	GTT_TYPE_PPGTT_PDE_PT,
+	GTT_TYPE_PPGTT_PDP_PT,
+	GTT_TYPE_PPGTT_PML4_PT,
+	GTT_TYPE_MAX,
+} intel_gvt_gtt_type_t;
 struct intel_vgpu_mm {
 	int type;
 	bool initialized;
@@ -151,6 +181,12 @@ extern void intel_vgpu_destroy_mm(struct kref *mm_ref);
 struct intel_vgpu_guest_page;
+struct intel_vgpu_scratch_pt {
+	struct page *page;
+	unsigned long page_mfn;
+};
 struct intel_vgpu_gtt {
 	struct intel_vgpu_mm *ggtt_mm;
 	unsigned long active_ppgtt_mm_bitmap;
@@ -160,8 +196,8 @@ struct intel_vgpu_gtt {
 	atomic_t n_write_protected_guest_page;
 	struct list_head oos_page_list_head;
 	struct list_head post_shadow_list_head;
-	struct page *scratch_page;
+	struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
-	unsigned long scratch_page_mfn;
 };
 extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);