Commit 4d75658b authored by Alex Deucher's avatar Alex Deucher

drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx

Uses the new multi-ring infrastucture.  6xx/7xx has a single
async DMA ring.
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 71bfe916
This diff is collapsed.
......@@ -590,9 +590,59 @@
#define WAIT_2D_IDLECLEAN_bit (1 << 16)
#define WAIT_3D_IDLECLEAN_bit (1 << 17)
/* async DMA */
#define DMA_TILING_CONFIG 0x3ec4
#define DMA_CONFIG 0x3e4c
#define DMA_RB_CNTL 0xd000
# define DMA_RB_ENABLE (1 << 0)
# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define DMA_RB_BASE 0xd004
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_RB_RPTR_ADDR_HI 0xd01c
#define DMA_RB_RPTR_ADDR_LO 0xd020
#define DMA_IB_CNTL 0xd024
# define DMA_IB_ENABLE (1 << 0)
# define DMA_IB_SWAP_ENABLE (1 << 4)
#define DMA_IB_RPTR 0xd028
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_STATUS_REG 0xd034
# define DMA_IDLE (1 << 0)
#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
#define DMA_MODE 0xd0bc
/* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
#define DMA_PACKET_NOP 0xf
#define IH_RB_CNTL 0x3e00
# define IH_RB_ENABLE (1 << 0)
# define IH_IB_SIZE(x) ((x) << 1) /* log2 */
# define IH_RB_SIZE(x) ((x) << 1) /* log2 */
# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
......@@ -637,7 +687,9 @@
#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
#define SRBM_SOFT_RESET 0xe60
# define SOFT_RESET_DMA (1 << 12)
# define SOFT_RESET_RLC (1 << 13)
# define RV770_SOFT_RESET_DMA (1 << 20)
#define CP_INT_CNTL 0xc124
# define CNTX_BUSY_INT_ENABLE (1 << 19)
......
......@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
#define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */
#define RADEON_NUM_RINGS 3
#define RADEON_NUM_RINGS 4
/* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL
......@@ -122,6 +122,9 @@ extern int radeon_lockup_timeout;
#define CAYMAN_RING_TYPE_CP1_INDEX 1
#define CAYMAN_RING_TYPE_CP2_INDEX 2
/* R600+ has an async dma ring */
#define R600_RING_TYPE_DMA_INDEX 3
/* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20)
......@@ -787,6 +790,11 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
/* r600 async dma */
void r600_dma_stop(struct radeon_device *rdev);
int r600_dma_resume(struct radeon_device *rdev);
void r600_dma_fini(struct radeon_device *rdev);
/*
* CS.
*/
......@@ -883,6 +891,7 @@ struct radeon_wb {
#define RADEON_WB_CP_RPTR_OFFSET 1024
#define RADEON_WB_CP1_RPTR_OFFSET 1280
#define RADEON_WB_CP2_RPTR_OFFSET 1536
#define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048
#define R600_WB_EVENT_OFFSET 3072
......
......@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &r600_dma_ring_ib_execute,
.emit_fence = &r600_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
......@@ -963,8 +972,8 @@ static struct radeon_asic r600_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = &r600_copy_dma,
.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
},
......@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &r600_dma_ring_ib_execute,
.emit_fence = &r600_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
......@@ -1038,8 +1056,8 @@ static struct radeon_asic rs780_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = &r600_copy_dma,
.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
},
......@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &r600_dma_ring_ib_execute,
.emit_fence = &r600_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
......@@ -1113,8 +1140,8 @@ static struct radeon_asic rv770_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = &r600_copy_dma,
.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
},
......
......@@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *cp,
struct radeon_semaphore *semaphore,
bool emit_wait);
void r600_dma_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait);
void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_asic_reset(struct radeon_device *rdev);
int r600_set_surface_reg(struct radeon_device *rdev, int reg,
......@@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t offset, uint32_t obj_size);
void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence);
int r600_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence);
void r600_hpd_init(struct radeon_device *rdev);
void r600_hpd_fini(struct radeon_device *rdev);
bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
......
......@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
static int rv770_cp_load_microcode(struct radeon_device *rdev)
......@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
WREG32(GB_TILING_CONFIG, gb_tiling_config);
WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
WREG32(CGTS_SYS_TCC_DISABLE, 0);
WREG32(CGTS_TCC_DISABLE, 0);
......@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev)
static int rv770_startup(struct radeon_device *rdev)
{
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
struct radeon_ring *ring;
int r;
/* enable pcie gen2 link */
......@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev)
return r;
}
r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
return r;
}
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
......@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev)
}
r600_irq_set(rdev);
ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
DMA_RB_RPTR, DMA_RB_WPTR,
2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
if (r)
return r;
r = rv770_cp_load_microcode(rdev);
if (r)
return r;
......@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev)
if (r)
return r;
r = r600_dma_resume(rdev);
if (r)
return r;
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
......@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
r700_cp_stop(rdev);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
r600_dma_stop(rdev);
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
rv770_pcie_gart_disable(rdev);
......@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
......@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r700_cp_fini(rdev);
r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
......@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev)
{
r600_blit_fini(rdev);
r700_cp_fini(rdev);
r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
......
......@@ -109,6 +109,9 @@
#define PIPE_TILING__SHIFT 1
#define PIPE_TILING__MASK 0x0000000e
#define DMA_TILING_CONFIG 0x3ec8
#define DMA_TILING_CONFIG2 0xd0b8
#define GC_USER_SHADER_PIPE_CONFIG 0x8954
#define INACTIVE_QD_PIPES(x) ((x) << 8)
#define INACTIVE_QD_PIPES_MASK 0x0000FF00
......@@ -358,6 +361,26 @@
#define WAIT_UNTIL 0x8040
/* async DMA */
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
/* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
#define SRBM_STATUS 0x0E50
/* DCE 3.2 HDMI */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment