Commit 7c2fa7fa authored by Chris Wilson's avatar Chris Wilson

drm/i915: Stop caching the "golden" renderstate

As we now record the default HW state and so only emit the "golden"
renderstate once to prepare the HW, there is no advantage in keeping the
renderstate batch around as it will never be used again.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-8-chris@chris-wilson.co.uk
parent d2b4b979
...@@ -67,7 +67,6 @@ ...@@ -67,7 +67,6 @@
#include "i915_gem_fence_reg.h" #include "i915_gem_fence_reg.h"
#include "i915_gem_object.h" #include "i915_gem_object.h"
#include "i915_gem_gtt.h" #include "i915_gem_gtt.h"
#include "i915_gem_render_state.h"
#include "i915_gem_request.h" #include "i915_gem_request.h"
#include "i915_gem_timeline.h" #include "i915_gem_timeline.h"
......
...@@ -26,10 +26,12 @@ ...@@ -26,10 +26,12 @@
*/ */
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_render_state.h"
#include "intel_renderstate.h" #include "intel_renderstate.h"
struct intel_render_state { struct intel_render_state {
const struct intel_renderstate_rodata *rodata; const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
struct i915_vma *vma; struct i915_vma *vma;
u32 batch_offset; u32 batch_offset;
u32 batch_size; u32 batch_size;
...@@ -40,6 +42,9 @@ struct intel_render_state { ...@@ -40,6 +42,9 @@ struct intel_render_state {
static const struct intel_renderstate_rodata * static const struct intel_renderstate_rodata *
render_state_get_rodata(const struct intel_engine_cs *engine) render_state_get_rodata(const struct intel_engine_cs *engine)
{ {
if (engine->id != RCS)
return NULL;
switch (INTEL_GEN(engine->i915)) { switch (INTEL_GEN(engine->i915)) {
case 6: case 6:
return &gen6_null_state; return &gen6_null_state;
...@@ -74,17 +79,16 @@ static int render_state_setup(struct intel_render_state *so, ...@@ -74,17 +79,16 @@ static int render_state_setup(struct intel_render_state *so,
struct drm_i915_private *i915) struct drm_i915_private *i915)
{ {
const struct intel_renderstate_rodata *rodata = so->rodata; const struct intel_renderstate_rodata *rodata = so->rodata;
struct drm_i915_gem_object *obj = so->vma->obj;
unsigned int i = 0, reloc_index = 0; unsigned int i = 0, reloc_index = 0;
unsigned int needs_clflush; unsigned int needs_clflush;
u32 *d; u32 *d;
int ret; int ret;
ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
if (ret) if (ret)
return ret; return ret;
d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0));
while (i < rodata->batch_items) { while (i < rodata->batch_items) {
u32 s = rodata->batch[i]; u32 s = rodata->batch[i];
...@@ -112,7 +116,7 @@ static int render_state_setup(struct intel_render_state *so, ...@@ -112,7 +116,7 @@ static int render_state_setup(struct intel_render_state *so,
goto err; goto err;
} }
so->batch_offset = so->vma->node.start; so->batch_offset = i915_ggtt_offset(so->vma);
so->batch_size = rodata->batch_items * sizeof(u32); so->batch_size = rodata->batch_items * sizeof(u32);
while (i % CACHELINE_DWORDS) while (i % CACHELINE_DWORDS)
...@@ -160,9 +164,9 @@ static int render_state_setup(struct intel_render_state *so, ...@@ -160,9 +164,9 @@ static int render_state_setup(struct intel_render_state *so,
drm_clflush_virt_range(d, i * sizeof(u32)); drm_clflush_virt_range(d, i * sizeof(u32));
kunmap_atomic(d); kunmap_atomic(d);
ret = i915_gem_object_set_to_gtt_domain(obj, false); ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
out: out:
i915_gem_obj_finish_shmem_access(obj); i915_gem_obj_finish_shmem_access(so->obj);
return ret; return ret;
err: err:
...@@ -173,112 +177,61 @@ static int render_state_setup(struct intel_render_state *so, ...@@ -173,112 +177,61 @@ static int render_state_setup(struct intel_render_state *so,
#undef OUT_BATCH #undef OUT_BATCH
int i915_gem_render_state_init(struct intel_engine_cs *engine) int i915_gem_render_state_emit(struct drm_i915_gem_request *rq)
{ {
struct intel_render_state *so; struct intel_engine_cs *engine = rq->engine;
const struct intel_renderstate_rodata *rodata; struct intel_render_state so = {}; /* keep the compiler happy */
struct drm_i915_gem_object *obj; int err;
int ret;
if (engine->id != RCS) so.rodata = render_state_get_rodata(engine);
if (!so.rodata)
return 0; return 0;
rodata = render_state_get_rodata(engine); if (so.rodata->batch_items * 4 > PAGE_SIZE)
if (!rodata)
return 0;
if (rodata->batch_items * 4 > PAGE_SIZE)
return -EINVAL; return -EINVAL;
so = kmalloc(sizeof(*so), GFP_KERNEL); so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (!so) if (IS_ERR(so.obj))
return -ENOMEM; return PTR_ERR(so.obj);
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_free;
}
so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.base, NULL);
if (IS_ERR(so->vma)) { if (IS_ERR(so.vma)) {
ret = PTR_ERR(so->vma); err = PTR_ERR(so.vma);
goto err_obj; goto err_obj;
} }
so->rodata = rodata; err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
engine->render_state = so; if (err)
return 0; goto err_vma;
err_obj:
i915_gem_object_put(obj);
err_free:
kfree(so);
return ret;
}
int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
{
struct intel_render_state *so;
int ret;
lockdep_assert_held(&req->i915->drm.struct_mutex);
so = req->engine->render_state;
if (!so)
return 0;
/* Recreate the page after shrinking */
if (!i915_gem_object_has_pages(so->vma->obj))
so->batch_offset = -1;
ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (ret)
return ret;
if (so->vma->node.start != so->batch_offset) { err = render_state_setup(&so, rq->i915);
ret = render_state_setup(so, req->i915); if (err)
if (ret) goto err_unpin;
goto err_unpin;
}
ret = req->engine->emit_flush(req, EMIT_INVALIDATE); err = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret) if (err)
goto err_unpin; goto err_unpin;
ret = req->engine->emit_bb_start(req, err = engine->emit_bb_start(rq,
so->batch_offset, so->batch_size, so.batch_offset, so.batch_size,
I915_DISPATCH_SECURE); I915_DISPATCH_SECURE);
if (ret) if (err)
goto err_unpin; goto err_unpin;
if (so->aux_size > 8) { if (so.aux_size > 8) {
ret = req->engine->emit_bb_start(req, err = engine->emit_bb_start(rq,
so->aux_offset, so->aux_size, so.aux_offset, so.aux_size,
I915_DISPATCH_SECURE); I915_DISPATCH_SECURE);
if (ret) if (err)
goto err_unpin; goto err_unpin;
} }
i915_vma_move_to_active(so->vma, req, 0); i915_vma_move_to_active(so.vma, rq, 0);
err_unpin: err_unpin:
i915_vma_unpin(so->vma); i915_vma_unpin(so.vma);
return ret; err_vma:
} i915_vma_close(so.vma);
err_obj:
void i915_gem_render_state_fini(struct intel_engine_cs *engine) __i915_gem_object_release_unless_active(so.obj);
{ return err;
struct intel_render_state *so;
struct drm_i915_gem_object *obj;
so = fetch_and_zero(&engine->render_state);
if (!so)
return;
obj = so->vma->obj;
i915_vma_close(so->vma);
__i915_gem_object_release_unless_active(obj);
kfree(so);
} }
...@@ -26,8 +26,6 @@ ...@@ -26,8 +26,6 @@
struct drm_i915_gem_request; struct drm_i915_gem_request;
int i915_gem_render_state_init(struct intel_engine_cs *engine); int i915_gem_render_state_emit(struct drm_i915_gem_request *rq);
int i915_gem_render_state_emit(struct drm_i915_gem_request *req);
void i915_gem_render_state_fini(struct intel_engine_cs *engine);
#endif /* _I915_GEM_RENDER_STATE_H_ */ #endif /* _I915_GEM_RENDER_STATE_H_ */
...@@ -641,21 +641,15 @@ int intel_engine_init_common(struct intel_engine_cs *engine) ...@@ -641,21 +641,15 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret) if (ret)
goto err_unpin_preempt; goto err_unpin_preempt;
ret = i915_gem_render_state_init(engine);
if (ret)
goto err_breadcrumbs;
if (HWS_NEEDS_PHYSICAL(engine->i915)) if (HWS_NEEDS_PHYSICAL(engine->i915))
ret = init_phys_status_page(engine); ret = init_phys_status_page(engine);
else else
ret = init_status_page(engine); ret = init_status_page(engine);
if (ret) if (ret)
goto err_rs_fini; goto err_breadcrumbs;
return 0; return 0;
err_rs_fini:
i915_gem_render_state_fini(engine);
err_breadcrumbs: err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine); intel_engine_fini_breadcrumbs(engine);
err_unpin_preempt: err_unpin_preempt:
...@@ -682,7 +676,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) ...@@ -682,7 +676,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
else else
cleanup_status_page(engine); cleanup_status_page(engine);
i915_gem_render_state_fini(engine);
intel_engine_fini_breadcrumbs(engine); intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine); intel_engine_cleanup_cmd_parser(engine);
i915_gem_batch_pool_fini(&engine->batch_pool); i915_gem_batch_pool_fini(&engine->batch_pool);
......
...@@ -136,6 +136,7 @@ ...@@ -136,6 +136,7 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_render_state.h"
#include "intel_mocs.h" #include "intel_mocs.h"
#define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST_QFULL (1 << 0x2)
......
...@@ -28,9 +28,12 @@ ...@@ -28,9 +28,12 @@
*/ */
#include <linux/log2.h> #include <linux/log2.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include "i915_drv.h"
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_render_state.h"
#include "i915_trace.h" #include "i915_trace.h"
#include "intel_drv.h" #include "intel_drv.h"
......
...@@ -165,7 +165,6 @@ struct i915_ctx_workarounds { ...@@ -165,7 +165,6 @@ struct i915_ctx_workarounds {
}; };
struct drm_i915_gem_request; struct drm_i915_gem_request;
struct intel_render_state;
/* /*
* Engine IDs definitions. * Engine IDs definitions.
...@@ -307,7 +306,6 @@ struct intel_engine_cs { ...@@ -307,7 +306,6 @@ struct intel_engine_cs {
struct intel_timeline *timeline; struct intel_timeline *timeline;
struct drm_i915_gem_object *default_state; struct drm_i915_gem_object *default_state;
struct intel_render_state *render_state;
atomic_t irq_count; atomic_t irq_count;
unsigned long irq_posted; unsigned long irq_posted;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment