Commit 4f183645 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gtt: Use optimised memset32/64 for clearing PTE

Replace the open-coded memset loops with the memset32/64 routines that
reduce to a single instruction or two:

add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-83 (-83)
Function                                     old     new   delta
gen6_ppgtt_clear_range                       371     344     -27
gen8_ppgtt_clear_pd                          575     519     -56
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190304230646.23714-1-chris@chris-wilson.co.uk
parent f139da13
...@@ -809,8 +809,6 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, ...@@ -809,8 +809,6 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
u64 start, u64 length) u64 start, u64 length)
{ {
unsigned int num_entries = gen8_pte_count(start, length); unsigned int num_entries = gen8_pte_count(start, length);
unsigned int pte = gen8_pte_index(start);
unsigned int pte_end = pte + num_entries;
gen8_pte_t *vaddr; gen8_pte_t *vaddr;
GEM_BUG_ON(num_entries > pt->used_ptes); GEM_BUG_ON(num_entries > pt->used_ptes);
...@@ -820,8 +818,7 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, ...@@ -820,8 +818,7 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
return true; return true;
vaddr = kmap_atomic_px(pt); vaddr = kmap_atomic_px(pt);
while (pte < pte_end) memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
vaddr[pte++] = vm->scratch_pte;
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
return false; return false;
...@@ -1672,8 +1669,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, ...@@ -1672,8 +1669,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
while (num_entries) { while (num_entries) {
struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++]; struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
const unsigned int end = min(pte + num_entries, GEN6_PTES); const unsigned int count = min(num_entries, GEN6_PTES - pte);
const unsigned int count = end - pte;
gen6_pte_t *vaddr; gen6_pte_t *vaddr;
GEM_BUG_ON(pt == vm->scratch_pt); GEM_BUG_ON(pt == vm->scratch_pt);
...@@ -1693,9 +1689,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, ...@@ -1693,9 +1689,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
*/ */
vaddr = kmap_atomic_px(pt); vaddr = kmap_atomic_px(pt);
do { memset32(vaddr + pte, scratch_pte, count);
vaddr[pte++] = scratch_pte;
} while (pte < end);
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
pte = 0; pte = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment