Commit 3f4c175d authored by Jiadong.Zhu's avatar Jiadong.Zhu Committed by Alex Deucher

drm/amdgpu: MCBP based on DRM scheduler (v9)

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.
v8: Add time threshold to judge if preemption request is needed.
v9: Correct comment spelling. Set fence emit timestamp before rsu assignment.

Cc: Christian Koenig <Christian.Koenig@amd.com>
Cc: Luben Tuikov <Luben.Tuikov@amd.com>
Cc: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Cc: Michel Dänzer <michel@daenzer.net>
Signed-off-by: default avatarJiadong.Zhu <Jiadong.Zhu@amd.com>
Acked-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Acked-by: default avatarHuang Rui <ray.huang@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent be254550
...@@ -55,6 +55,7 @@ struct amdgpu_fence { ...@@ -55,6 +55,7 @@ struct amdgpu_fence {
/* RB, DMA, etc. */ /* RB, DMA, etc. */
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
ktime_t start_timestamp;
}; };
static struct kmem_cache *amdgpu_fence_slab; static struct kmem_cache *amdgpu_fence_slab;
...@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd ...@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
} }
} }
to_amdgpu_fence(fence)->start_timestamp = ktime_get();
/* This function can't be called concurrently anyway, otherwise /* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer. * emitting the fence would mess up the hardware ring buffer.
*/ */
...@@ -406,6 +409,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) ...@@ -406,6 +409,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
return lower_32_bits(emitted); return lower_32_bits(emitted);
} }
/**
* amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
* @ring: ring the fence is associated with
*
* Find the earliest fence unsignaled until now, calculate the time delta
* between the time fence emitted and now.
*/
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct dma_fence *fence;
uint32_t last_seq, sync_seq;
last_seq = atomic_read(&ring->fence_drv.last_seq);
sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
if (last_seq == sync_seq)
return 0;
++last_seq;
last_seq &= drv->num_fences_mask;
fence = drv->fences[last_seq];
if (!fence)
return 0;
return ktime_us_delta(ktime_get(),
to_amdgpu_fence(fence)->start_timestamp);
}
/**
* amdgpu_fence_update_start_timestamp - update the timestamp of the fence
* @ring: ring the fence is associated with
* @seq: the fence seq number to update.
* @timestamp: the start timestamp to update.
*
* The function called at the time the fence and related ib is about to
* resubmit to gpu in MCBP scenario. Thus we do not consider race condition
* with amdgpu_fence_process to modify the same fence.
*/
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct dma_fence *fence;
seq &= drv->num_fences_mask;
fence = drv->fences[seq];
if (!fence)
return;
to_amdgpu_fence(fence)->start_timestamp = timestamp;
}
/** /**
* amdgpu_fence_driver_start_ring - make the fence driver * amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring. * ready for use on the requested ring.
......
...@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
} }
} }
amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec) if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring); patch_offset = amdgpu_ring_init_cond_exec(ring);
...@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false); ring->funcs->emit_wave_limit(ring, false);
amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
return 0; return 0;
} }
......
...@@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring) ...@@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
} }
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_begin(ring);
}
void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
{
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_end(ring);
}
...@@ -145,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, ...@@ -145,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq, uint32_t wait_seq,
signed long timeout); signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
ktime_t timestamp);
/* /*
* Rings. * Rings.
*/ */
...@@ -313,6 +318,9 @@ struct amdgpu_ring { ...@@ -313,6 +318,9 @@ struct amdgpu_ring {
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_commit(struct amdgpu_ring *ring);
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "amdgpu_ring.h" #include "amdgpu_ring.h"
struct amdgpu_ring; struct amdgpu_ring;
/** /**
* struct amdgpu_mux_entry - the entry recording software rings copying information. * struct amdgpu_mux_entry - the entry recording software rings copying information.
* @ring: the pointer to the software ring. * @ring: the pointer to the software ring.
...@@ -37,6 +38,7 @@ struct amdgpu_ring; ...@@ -37,6 +38,7 @@ struct amdgpu_ring;
* @sw_cptr: the position of the copy pointer in the sw ring. * @sw_cptr: the position of the copy pointer in the sw ring.
* @sw_rptr: the read pointer in software ring. * @sw_rptr: the read pointer in software ring.
* @sw_wptr: the write pointer in software ring. * @sw_wptr: the write pointer in software ring.
* @list: list head for amdgpu_mux_chunk
*/ */
struct amdgpu_mux_entry { struct amdgpu_mux_entry {
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
...@@ -45,6 +47,7 @@ struct amdgpu_mux_entry { ...@@ -45,6 +47,7 @@ struct amdgpu_mux_entry {
u64 sw_cptr; u64 sw_cptr;
u64 sw_rptr; u64 sw_rptr;
u64 sw_wptr; u64 sw_wptr;
struct list_head list;
}; };
struct amdgpu_ring_mux { struct amdgpu_ring_mux {
...@@ -55,6 +58,26 @@ struct amdgpu_ring_mux { ...@@ -55,6 +58,26 @@ struct amdgpu_ring_mux {
unsigned int ring_entry_size; unsigned int ring_entry_size;
/*the lock for copy data from different software rings*/ /*the lock for copy data from different software rings*/
spinlock_t lock; spinlock_t lock;
bool s_resubmit;
uint32_t seqno_to_resubmit;
u64 wptr_resubmit;
struct timer_list resubmit_timer;
bool pending_trailing_fence_signaled;
};
/**
* struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
* @entry: the list entry.
* @sync_seq: the fence seqno related with the saved IB.
* @start:- start location on the software ring.
* @end:- end location on the software ring.
*/
struct amdgpu_mux_chunk {
struct list_head entry;
uint32_t sync_seq;
u64 start;
u64 end;
}; };
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
...@@ -64,15 +87,17 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring ...@@ -64,15 +87,17 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring
void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr); void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
const char *amdgpu_sw_ring_name(int idx); const char *amdgpu_sw_ring_name(int idx);
unsigned int amdgpu_sw_ring_priority(int idx); unsigned int amdgpu_sw_ring_priority(int idx);
#endif #endif
...@@ -541,6 +541,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, ...@@ -541,6 +541,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
return 0; return 0;
amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec) if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring); patch_offset = amdgpu_ring_init_cond_exec(ring);
...@@ -601,6 +602,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, ...@@ -601,6 +602,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring);
} }
amdgpu_ring_ib_end(ring);
return 0; return 0;
} }
......
...@@ -5456,7 +5456,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) ...@@ -5456,7 +5456,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
ring->trail_seq += 1; ring->trail_seq += 1;
amdgpu_ring_alloc(ring, 13); amdgpu_ring_alloc(ring, 13);
gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC); ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
/*reset the CP_VMID_PREEMPT after trailing fence*/ /*reset the CP_VMID_PREEMPT after trailing fence*/
amdgpu_ring_emit_wreg(ring, amdgpu_ring_emit_wreg(ring,
SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
...@@ -5882,8 +5882,9 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, ...@@ -5882,8 +5882,9 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) { switch (me_id) {
case 0: case 0:
/* Fence signals are handled on the software rings*/ if (adev->gfx.num_gfx_rings &&
if (adev->gfx.num_gfx_rings) { !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
/* Fence signals are handled on the software rings*/
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment