Commit f153d286 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: move context switch handling into common code v2

It was a source of bugs to repeat that in each IP version.

v2: rename parameter
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9f8fb5a2
...@@ -283,7 +283,7 @@ struct amdgpu_ring_funcs { ...@@ -283,7 +283,7 @@ struct amdgpu_ring_funcs {
int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
/* command emit functions */ /* command emit functions */
void (*emit_ib)(struct amdgpu_ring *ring, void (*emit_ib)(struct amdgpu_ring *ring,
struct amdgpu_ib *ib); struct amdgpu_ib *ib, bool ctx_switch);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags); uint64_t seq, unsigned flags);
void (*emit_pipeline_sync)(struct amdgpu_ring *ring); void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
...@@ -2221,7 +2221,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) ...@@ -2221,7 +2221,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) #define amdgpu_ring_emit_ib(r, ib, c) (r)->funcs->emit_ib((r), (ib), (c))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
......
...@@ -121,18 +121,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -121,18 +121,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0]; struct amdgpu_ib *ib = &ibs[0];
uint64_t ctx, old_ctx;
struct fence *hwf; struct fence *hwf;
struct amdgpu_vm *vm = NULL; struct amdgpu_vm *vm = NULL;
unsigned i, patch_offset = ~0; unsigned i, patch_offset = ~0;
bool skip_preamble; bool skip_preamble, need_ctx_switch;
int r = 0; int r = 0;
if (num_ibs == 0) if (num_ibs == 0)
return -EINVAL; return -EINVAL;
ctx = ibs->ctx;
if (job) /* for domain0 job like ring test, ibs->job is not assigned */ if (job) /* for domain0 job like ring test, ibs->job is not assigned */
vm = job->vm; vm = job->vm;
...@@ -156,7 +154,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -156,7 +154,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
patch_offset = amdgpu_ring_init_cond_exec(ring); patch_offset = amdgpu_ring_init_cond_exec(ring);
if (vm) { if (vm) {
/* do context switch */
r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr, r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
ib->gds_base, ib->gds_size, ib->gds_base, ib->gds_size,
ib->gws_base, ib->gws_size, ib->gws_base, ib->gws_size,
...@@ -173,16 +170,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -173,16 +170,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* always set cond_exec_polling to CONTINUE */ /* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1; *ring->cond_exe_cpu_addr = 1;
skip_preamble = ring->current_ctx == ctx; skip_preamble = ring->current_ctx == ib->ctx;
old_ctx = ring->current_ctx; need_ctx_switch = ring->current_ctx != ib->ctx;
for (i = 0; i < num_ibs; ++i) { for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
/* drop preamble IBs if we don't have a context switch */ /* drop preamble IBs if we don't have a context switch */
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
continue; continue;
amdgpu_ring_emit_ib(ring, ib); amdgpu_ring_emit_ib(ring, ib, need_ctx_switch);
ring->current_ctx = ctx; need_ctx_switch = false;
} }
if (ring->funcs->emit_hdp_invalidate) if (ring->funcs->emit_hdp_invalidate)
...@@ -191,7 +189,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -191,7 +189,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
r = amdgpu_fence_emit(ring, &hwf); r = amdgpu_fence_emit(ring, &hwf);
if (r) { if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r); dev_err(adev->dev, "failed to emit fence (%d)\n", r);
ring->current_ctx = old_ctx;
if (ib->vm_id) if (ib->vm_id)
amdgpu_vm_reset_id(adev, ib->vm_id); amdgpu_vm_reset_id(adev, ib->vm_id);
amdgpu_ring_undo(ring); amdgpu_ring_undo(ring);
...@@ -212,6 +209,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -212,6 +209,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (patch_offset != ~0 && ring->funcs->patch_cond_exec) if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset); amdgpu_ring_patch_cond_exec(ring, patch_offset);
ring->current_ctx = ibs->ctx;
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
return 0; return 0;
} }
......
...@@ -762,7 +762,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) ...@@ -762,7 +762,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
* @ib: the IB to execute * @ib: the IB to execute
* *
*/ */
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch)
{ {
amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, VCE_CMD_IB);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
......
...@@ -34,7 +34,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -34,7 +34,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct fence **fence); bool direct, struct fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags); unsigned flags);
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
......
...@@ -210,7 +210,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) ...@@ -210,7 +210,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (CIK). * Schedule an IB in the DMA ring (CIK).
*/ */
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
u32 extra_bits = ib->vm_id & 0xf; u32 extra_bits = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
......
...@@ -2030,13 +2030,12 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, ...@@ -2030,13 +2030,12 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
* on the gfx ring for execution by the GPU. * on the gfx ring for execution by the GPU.
*/ */
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
bool need_ctx_switch = ring->current_ctx != ib->ctx;
u32 header, control = 0; u32 header, control = 0;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
if (need_ctx_switch) if (ctx_switch)
next_rptr += 2; next_rptr += 2;
next_rptr += 4; next_rptr += 4;
...@@ -2047,7 +2046,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, ...@@ -2047,7 +2046,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (need_ctx_switch) { if (ctx_switch) {
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0);
} }
...@@ -2070,7 +2069,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, ...@@ -2070,7 +2069,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
} }
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
u32 header, control = 0; u32 header, control = 0;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
......
...@@ -5646,13 +5646,12 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) ...@@ -5646,13 +5646,12 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
} }
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
bool need_ctx_switch = ring->current_ctx != ib->ctx;
u32 header, control = 0; u32 header, control = 0;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
if (need_ctx_switch) if (ctx_switch)
next_rptr += 2; next_rptr += 2;
next_rptr += 4; next_rptr += 4;
...@@ -5663,7 +5662,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, ...@@ -5663,7 +5662,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (need_ctx_switch) { if (ctx_switch) {
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0);
} }
...@@ -5686,7 +5685,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, ...@@ -5686,7 +5685,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
} }
static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
u32 header, control = 0; u32 header, control = 0;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
......
...@@ -242,7 +242,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) ...@@ -242,7 +242,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VI). * Schedule an IB in the DMA ring (VI).
*/ */
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
u32 vmid = ib->vm_id & 0xf; u32 vmid = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
......
...@@ -400,7 +400,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) ...@@ -400,7 +400,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VI). * Schedule an IB in the DMA ring (VI).
*/ */
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
u32 vmid = ib->vm_id & 0xf; u32 vmid = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
......
...@@ -489,7 +489,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) ...@@ -489,7 +489,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer * Write ring commands to execute the indirect buffer
*/ */
static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
amdgpu_ring_write(ring, ib->gpu_addr); amdgpu_ring_write(ring, ib->gpu_addr);
......
...@@ -539,7 +539,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) ...@@ -539,7 +539,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer * Write ring commands to execute the indirect buffer
*/ */
static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
......
...@@ -631,7 +631,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) ...@@ -631,7 +631,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer * Write ring commands to execute the indirect buffer
*/ */
static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib, bool ctx_switch)
{ {
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment