Commit 306eca05 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'amd-drm-fixes-6.9-2024-05-01' of...

Merge tag 'amd-drm-fixes-6.9-2024-05-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.9-2024-05-01:

amdgpu:
- Fix VRAM memory accounting
- DCN 3.1 fixes
- DCN 2.0 fix
- DCN 3.1.5 fix
- DCN 3.5 fix
- DCN 3.2.1 fix
- DP fixes
- Seamless boot fix
- Fix call order in amdgpu_ttm_move()
- Fix doorbell regression
- Disable panel replay temporarily

amdkfd:
- Flush wq before creating kfd process
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240501135054.1919108-1-alexander.deucher@amd.com
parents e67572cd ab72d594
......@@ -220,7 +220,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
vram_size - reserved_for_pt)) {
vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) {
ret = -ENOMEM;
goto release;
}
......
......@@ -1243,14 +1243,18 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
* amdgpu_bo_move_notify - notification about a memory move
* @bo: pointer to a buffer object
* @evict: if this move is evicting the buffer from the graphics address space
* @new_mem: new resource for backing the BO
*
* Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
* bookkeeping.
* TTM driver callback which is called when ttm moves a buffer.
*/
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_resource *old_mem = bo->resource;
struct amdgpu_bo *abo;
if (!amdgpu_bo_is_amdgpu_bo(bo))
......@@ -1262,12 +1266,12 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
amdgpu_bo_kunmap(abo);
if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach &&
bo->resource->mem_type != TTM_PL_SYSTEM)
old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
dma_buf_move_notify(abo->tbo.base.dma_buf);
/* remember the eviction */
if (evict)
atomic64_inc(&adev->num_evictions);
/* move_notify is called before move happens */
trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
old_mem ? old_mem->mem_type : -1);
}
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
......
......@@ -328,7 +328,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
size_t buffer_size, uint32_t *metadata_size,
uint64_t *flags);
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem);
void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
......
......@@ -419,7 +419,7 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
return false;
if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
res->mem_type == AMDGPU_PL_PREEMPT)
res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
return true;
if (res->mem_type != TTM_PL_VRAM)
......@@ -481,14 +481,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
bo->ttm == NULL)) {
amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
goto out;
return 0;
}
if (old_mem->mem_type == TTM_PL_SYSTEM &&
(new_mem->mem_type == TTM_PL_TT ||
new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
goto out;
return 0;
}
if ((old_mem->mem_type == TTM_PL_TT ||
old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
......@@ -498,9 +500,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, new_mem);
goto out;
return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
......@@ -512,8 +515,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
new_mem->mem_type == AMDGPU_PL_OA ||
new_mem->mem_type == AMDGPU_PL_DOORBELL) {
/* Nothing to save here */
amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
goto out;
return 0;
}
if (bo->type == ttm_bo_type_device &&
......@@ -525,22 +529,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
}
if (adev->mman.buffer_funcs_enabled) {
if (((old_mem->mem_type == TTM_PL_SYSTEM &&
new_mem->mem_type == TTM_PL_VRAM) ||
(old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM))) {
hop->fpfn = 0;
hop->lpfn = 0;
hop->mem_type = TTM_PL_TT;
hop->flags = TTM_PL_FLAG_TEMPORARY;
return -EMULTIHOP;
}
if (adev->mman.buffer_funcs_enabled &&
((old_mem->mem_type == TTM_PL_SYSTEM &&
new_mem->mem_type == TTM_PL_VRAM) ||
(old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM))) {
hop->fpfn = 0;
hop->lpfn = 0;
hop->mem_type = TTM_PL_TT;
hop->flags = TTM_PL_FLAG_TEMPORARY;
return -EMULTIHOP;
}
amdgpu_bo_move_notify(bo, evict, new_mem);
if (adev->mman.buffer_funcs_enabled)
r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
} else {
else
r = -ENODEV;
}
if (r) {
/* Check that all memory is CPU accessible */
......@@ -555,11 +560,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
out:
/* update statistics */
/* update statistics after the move */
if (evict)
atomic64_inc(&adev->num_evictions);
atomic64_add(bo->base.size, &adev->num_bytes_moved);
amdgpu_bo_move_notify(bo, evict);
return 0;
}
......@@ -1559,7 +1563,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
static void
amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
{
amdgpu_bo_move_notify(bo, false);
amdgpu_bo_move_notify(bo, false, NULL);
}
static struct ttm_device_funcs amdgpu_bo_driver = {
......
......@@ -829,6 +829,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
if (process) {
pr_debug("Process already found\n");
} else {
/* If the process just called exec(3), it is possible that the
* cleanup of the kfd_process (following the release of the mm
* of the old process image) is still in the cleanup work queue.
* Make sure to drain any job before trying to recreate any
* resource for this process.
*/
flush_workqueue(kfd_process_wq);
process = create_process(thread);
if (IS_ERR(process))
goto out;
......
......@@ -4537,15 +4537,18 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
/* Determine whether to enable Replay support by default. */
if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 1, 4):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
case IP_VERSION(3, 5, 0):
case IP_VERSION(3, 5, 1):
replay_feature_enabled = true;
break;
/*
* Disabled by default due to https://gitlab.freedesktop.org/drm/amd/-/issues/3344
* case IP_VERSION(3, 1, 4):
* case IP_VERSION(3, 1, 5):
* case IP_VERSION(3, 1, 6):
* case IP_VERSION(3, 2, 0):
* case IP_VERSION(3, 2, 1):
* case IP_VERSION(3, 5, 0):
* case IP_VERSION(3, 5, 1):
* replay_feature_enabled = true;
* break;
*/
default:
replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
break;
......
......@@ -1495,7 +1495,9 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -1596,7 +1598,9 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -1681,7 +1685,9 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -1780,7 +1786,9 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -1865,7 +1873,9 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -1964,7 +1974,9 @@ static ssize_t dp_dsc_slice_height_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -2045,7 +2057,9 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -2141,7 +2155,9 @@ static ssize_t dp_dsc_bits_per_pixel_write(struct file *f, const char __user *bu
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -2220,7 +2236,9 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -2276,7 +2294,9 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -2347,7 +2367,9 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......@@ -2418,7 +2440,9 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
pipe_ctx->stream->link == aconnector->dc_link)
pipe_ctx->stream->link == aconnector->dc_link &&
pipe_ctx->stream->sink &&
pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
......
......@@ -2948,6 +2948,7 @@ static enum bp_result construct_integrated_info(
result = get_integrated_info_v2_1(bp, info);
break;
case 2:
case 3:
result = get_integrated_info_v2_2(bp, info);
break;
default:
......
......@@ -145,6 +145,10 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
*/
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
if (safe_to_lower) {
if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
dcn315_smu_set_dtbclk(clk_mgr, false);
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
/* check that we're not already in lower */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
display_count = dcn315_get_active_display_cnt_wa(dc, context);
......@@ -160,6 +164,10 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
}
}
} else {
if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
dcn315_smu_set_dtbclk(clk_mgr, true);
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
/* check that we're not already in D0 */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
union display_idle_optimization_u idle_info = { 0 };
......
......@@ -712,8 +712,12 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
* since we calculate mode support based on softmax being the max UCLK
* frequency.
*/
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
if (dc->debug.disable_dc_mode_overwrite) {
dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
} else
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
} else {
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
}
......@@ -746,8 +750,13 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
/* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
if (clk_mgr_base->clks.p_state_change_support &&
(update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) &&
!dc->work_arounds.clock_update_disable_mask.uclk)
!dc->work_arounds.clock_update_disable_mask.uclk) {
if (dc->clk_mgr->dc_mode_softmax_enabled && dc->debug.disable_dc_mode_overwrite)
dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK,
max((int)dc->clk_mgr->bw_params->dc_mode_softmax_memclk, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)));
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz));
}
if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
clk_mgr_base->clks.num_ways > new_clocks->num_ways) {
......
......@@ -1801,6 +1801,9 @@ bool dc_validate_boot_timing(const struct dc *dc,
return false;
}
if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED)
return false;
if (dc->link_srv->edp_is_ilr_optimization_required(link, crtc_timing)) {
DC_LOG_EVENT_LINK_TRAINING("Seamless boot disabled to optimize eDP link rate\n");
return false;
......
......@@ -395,6 +395,12 @@ void dcn31_hpo_dp_link_enc_set_throttled_vcp_size(
x),
25));
// If y rounds up to integer, carry it over to x.
if (y >> 25) {
x += 1;
y = 0;
}
switch (stream_encoder_inst) {
case 0:
REG_SET_2(DP_DPHY_SYM32_VC_RATE_CNTL0, 0,
......
......@@ -291,6 +291,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
.do_urgent_latency_adjustment = false,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
.dispclk_dppclk_vco_speed_mhz = 2400.0,
.num_chans = 4,
.dummy_pstate_latency_us = 10.0
};
......@@ -438,6 +439,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
.do_urgent_latency_adjustment = false,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
.dispclk_dppclk_vco_speed_mhz = 2500.0,
};
void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
......
......@@ -270,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
/* Error check whether requested and allocated are equal */
req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
if (req_bw == link->dpia_bw_alloc_config.allocated_bw) {
if (req_bw && (req_bw == link->dpia_bw_alloc_config.allocated_bw)) {
DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
__func__, link->link_index);
}
......@@ -341,6 +341,14 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link)
ret = true;
init_usb4_bw_struct(link);
link->dpia_bw_alloc_config.bw_alloc_enabled = true;
/*
* During DP tunnel creation, CM preallocates BW and reduces estimated BW of other
* DPIA. CM release preallocation only when allocation is complete. Do zero alloc
* to make the CM to release preallocation and update estimated BW correctly for
* all DPIAs per host router
*/
link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0);
}
}
......
......@@ -2449,6 +2449,7 @@ static bool dcn20_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment