Commit 0990ca23 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-next-5.5-2019-11-08' of git://people.freedesktop.org/~agd5f/linux into drm-next

drm-next-5.5-2019-11-08:

amdgpu:
- Enable VCN dynamic powergating on RV/RV2
- Fixes for Navi14
- Misc Navi fixes
- Fix MSI-X tear down
- Misc Arturus fixes
- Fix xgmi powerstate handling
- Documenation fixes

scheduler:
- Fix static code checker warning
- Fix possible thread reactivation while thread is stopped
- Avoid cleanup if thread is parked

radeon:
- SI dpm fix ported from amdgpu
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191108212713.5078-1-alexander.deucher@amd.com
parents 77e0723b 53dbc27a
......@@ -82,12 +82,21 @@ AMDGPU XGMI Support
AMDGPU RAS Support
==================
The AMDGPU RAS interfaces are exposed via sysfs (for informational queries) and
debugfs (for error injection).
RAS debugfs/sysfs Control and Error Injection Interfaces
--------------------------------------------------------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:doc: AMDGPU RAS debugfs control interface
RAS Reboot Behavior for Unrecoverable Errors
--------------------------------------------------------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:doc: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
RAS Error Count sysfs Interface
-------------------------------
......@@ -109,6 +118,32 @@ RAS VRAM Bad Pages sysfs Interface
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:internal:
Sample Code
-----------
Sample code for testing error injection can be found here:
https://cgit.freedesktop.org/mesa/drm/tree/tests/amdgpu/ras_tests.c
This is part of the libdrm amdgpu unit tests which cover several areas of the GPU.
There are four sets of tests:
RAS Basic Test
The test verifies the RAS feature enabled status and makes sure the necessary sysfs and debugfs files
are present.
RAS Query Test
This test checks the RAS availability and enablement status for each supported IP block as well as
the error counts.
RAS Inject Test
This test injects errors for each IP.
RAS Disable Test
This test tests disabling of RAS features for each IP block.
GPU Power/Thermal Controls and Monitoring
=========================================
......
......@@ -977,6 +977,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
/* device pstate */
int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
......
......@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
unsigned long start_jiffies;
unsigned long end_jiffies;
struct dma_fence *fence = NULL;
struct dma_fence *fence;
int i, r;
start_jiffies = jiffies;
......@@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
dma_fence_put(fence);
if (r)
goto exit_do_move;
dma_fence_put(fence);
}
end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
if (fence)
dma_fence_put(fence);
return r;
}
......
......@@ -859,6 +859,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
int r = 0, i;
/* Avoid accidently unparking the sched thread during GPU reset */
mutex_lock(&adev->lock_reset);
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
......@@ -884,6 +887,8 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
kthread_unpark(ring->sched.thread);
}
mutex_unlock(&adev->lock_reset);
return 0;
}
......@@ -1036,6 +1041,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
if (!fences)
return -ENOMEM;
/* Avoid accidently unparking the sched thread during GPU reset */
mutex_lock(&adev->lock_reset);
/* stop the scheduler */
kthread_park(ring->sched.thread);
......@@ -1075,6 +1083,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
/* restart the scheduler */
kthread_unpark(ring->sched.thread);
mutex_unlock(&adev->lock_reset);
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
kfree(fences);
......
......@@ -2057,6 +2057,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
......@@ -2082,8 +2083,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
/* set to low pstate by default */
amdgpu_xgmi_set_pstate(adev, 0);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
mutex_lock(&mgpu_info.mutex);
/*
* Reset device p-state to low as this was booted with high.
*
* This should be performed only after all devices from the same
* hive get initialized.
*
* However, it's unknown how many device in the hive in advance.
* As this is counted one by one during devices initializations.
*
* So, we wait for all XGMI interlinked devices initialized.
* This may bring some delays as those devices may come from
* different hives. But that should be OK.
*/
if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
for (i = 0; i < mgpu_info.num_gpu; i++) {
gpu_instance = &(mgpu_info.gpu_ins[i]);
if (gpu_instance->adev->flags & AMD_IS_APU)
continue;
r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
if (r) {
DRM_ERROR("pstate setting failed (%d).\n", r);
break;
}
}
}
mutex_unlock(&mgpu_info.mutex);
}
return 0;
}
......
......@@ -127,18 +127,48 @@ struct amdgpu_xgmi {
};
struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to
* map FB). This is different compared to the agp/
* gart/vram_start/end field as the later is from
* GPU's view and aper_base is from CPU's view.
*/
resource_size_t aper_size;
resource_size_t aper_base;
/* for some chips with <= 32MB we need to lie
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
/* AGP aperture start and end in MC address space
* Driver find a hole in the MC address space
* to place AGP by setting MC_VM_AGP_BOT/TOP registers
* Under VMID0, logical address == MC address. AGP
* aperture maps to physical bus or IOVA addressed.
* AGP aperture is used to simulate FB in ZFB case.
* AGP aperture is also used for page table in system
* memory (mainly for APU).
*
*/
u64 agp_size;
u64 agp_start;
u64 agp_end;
/* GART aperture start and end in MC address space
* Driver find a hole in the MC address space
* to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
* registers
* Under VMID0, logical address inside GART aperture will
* be translated through gpuvm gart page table to access
* paged system memory
*/
u64 gart_size;
u64 gart_start;
u64 gart_end;
/* Frame buffer aperture of this GPU device. Different from
* fb_start (see below), this only covers the local GPU device.
* Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
* and calculate vram_start of this local device by adding an
* offset inside the XGMI hive.
* Under VMID0, logical address == MC address
*/
u64 vram_start;
u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
......
......@@ -311,7 +311,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
drm_irq_uninstall(adev->ddev);
adev->irq.installed = false;
if (adev->irq.msi_enabled)
pci_disable_msi(adev->pdev);
pci_free_irq_vectors(adev->pdev);
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
}
......
......@@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* As their names indicate, inject operation will write the
* value to the address.
*
* Second member: struct ras_debug_if::op.
* The second member: struct ras_debug_if::op.
* It has three kinds of operations.
*
* - 0: disable RAS on the block. Take ::head as its data.
......@@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* - 2: inject errors on the block. Take ::inject as its data.
*
* How to use the interface?
* programs:
* copy the struct ras_debug_if in your codes and initialize it.
* write the struct to the control node.
*
* Programs
*
* Copy the struct ras_debug_if in your codes and initialize it.
* Write the struct to the control node.
*
* Shells
*
* .. code-block:: bash
*
* echo op block [error [sub_block address value]] > .../ras/ras_ctrl
*
* Parameters:
*
* op: disable, enable, inject
* disable: only block is needed
* enable: block and error are needed
......@@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* .. note::
* Operation is only allowed on blocks which are supported.
* Operations are only allowed on blocks which are supported.
* Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
* to see which blocks support RAS on a particular asic.
*
*/
static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
......@@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
* Some boards contain an EEPROM which is used to persistently store a list of
* bad pages containing ECC errors detected in vram. This interface provides
* bad pages which experiences ECC errors in vram. This interface provides
* a way to reset the EEPROM, e.g., after testing error injection.
*
* Usage:
......@@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
/**
* DOC: AMDGPU RAS sysfs Error Count Interface
*
* It allows user to read the error count for each IP block on the gpu through
* It allows the user to read the error count for each IP block on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* It outputs the multiple lines which report the uncorrected (ue) and corrected
......@@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
}
/* sysfs end */
/**
* DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
*
* Normally when there is an uncorrectable error, the driver will reset
* the GPU to recover. However, in the event of an unrecoverable error,
* the driver provides an interface to reboot the system automatically
* in that event.
*
* The following file in debugfs provides that interface:
* /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
*
* Usage:
*
* .. code-block:: bash
*
* echo true > .../ras/auto_reboot
*
*/
/* debugfs begin */
static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
......
......@@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
fence = NULL;
r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) {
......@@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
fence = NULL;
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
......
......@@ -1906,9 +1906,6 @@ void amdgpu_ttm_late_init(struct amdgpu_device *adev)
void *stolen_vga_buf;
/* return the VGA stolen memory (if any) back to VRAM */
amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
/* return the IP Discovery TMR memory back to VRAM */
amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
}
/**
......@@ -1921,7 +1918,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
amdgpu_ttm_debugfs_fini(adev);
amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the IP Discovery TMR memory back to VRAM */
amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
if (adev->mman.aper_base_kaddr)
iounmap(adev->mman.aper_base_kaddr);
adev->mman.aper_base_kaddr = NULL;
......
......@@ -1418,6 +1418,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
/* make sure that the page tables covering the address range are
* actually allocated
*/
r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor,
params->direct);
if (r)
......@@ -1491,7 +1494,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
} while (frag_start < entry_end);
if (amdgpu_vm_pt_descendant(adev, &cursor)) {
/* Free all child entries */
/* Free all child entries.
* Update the tables with the flags and addresses and free up subsequent
* tables in the case of huge pages or freed up areas.
* This is the maximum you can free, because all other page tables are not
* completely covered by the range and so potentially still in use.
*/
while (cursor.pfn < frag_start) {
amdgpu_vm_free_pts(adev, params->vm, &cursor);
amdgpu_vm_pt_next(adev, &cursor);
......
......@@ -274,22 +274,55 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
struct amdgpu_device *tmp_adev;
bool update_hive_pstate = true;
bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
if (!hive)
return 0;
if (hive->pstate == pstate)
return 0;
mutex_lock(&hive->hive_lock);
if (hive->pstate == pstate) {
adev->pstate = is_high_pstate ? pstate : adev->pstate;
goto out;
}
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
if (is_support_sw_smu_xgmi(adev))
ret = smu_set_xgmi_pstate(&adev->smu, pstate);
if (ret)
else if (adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_xgmi_pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate);
if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
goto out;
}
/* Update device pstate */
adev->pstate = pstate;
/*
* Update the hive pstate only all devices of the hive
* are in the same pstate
*/
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
if (tmp_adev->pstate != adev->pstate) {
update_hive_pstate = false;
break;
}
}
if (update_hive_pstate || is_high_pstate)
hive->pstate = pstate;
out:
mutex_unlock(&hive->hive_lock);
return ret;
}
......@@ -364,6 +397,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
/* Set default device pstate */
adev->pstate = -1;
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
......
......@@ -2738,7 +2738,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
gfx_v9_1_init_rlc_save_restore_list(adev);
if (adev->asic_type == CHIP_VEGA12 ||
(adev->asic_type == CHIP_RAVEN &&
adev->rev_id >= 8))
gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
}
......@@ -3889,9 +3892,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
uint64_t clock;
mutex_lock(&adev->gfx.gpu_clock_mutex);
WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
uint32_t tmp, lsb, msb, i = 0;
do {
if (i != 0)
udelay(1);
tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
i++;
} while (unlikely(tmp != msb) && (i < adev->usec_timeout));
clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
} else {
WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
}
mutex_unlock(&adev->gfx.gpu_clock_mutex);
return clock;
}
......
......@@ -539,6 +539,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
{
/* TODO
* dummy implement for pcie_replay_count sysfs interface
* */
return 0;
}
static void nv_init_doorbell_index(struct amdgpu_device *adev)
{
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
......@@ -586,6 +596,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.need_full_reset = &nv_need_full_reset,
.get_pcie_usage = &nv_get_pcie_usage,
.need_reset_on_init = &nv_need_reset_on_init,
.get_pcie_replay_count = &nv_get_pcie_replay_count,
};
static int nv_common_early_init(void *handle)
......
......@@ -1145,7 +1145,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG;
} else if (adev->pdev->device == 0x15d8) {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
......@@ -1188,7 +1190,9 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG;
}
break;
case CHIP_ARCTURUS:
......
......@@ -4239,8 +4239,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
result = MODE_OK;
else
DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n",
mode->vdisplay,
mode->hdisplay,
mode->vdisplay,
mode->clock,
dc_result);
......
......@@ -3027,15 +3027,6 @@ void core_link_enable_stream(
CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
COLOR_DEPTH_UNDEFINED);
/* This second call is needed to reconfigure the DIG
* as a workaround for the incorrect value being applied
* from transmitter control.
*/
if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
stream->link->link_enc->funcs->setup(
stream->link->link_enc,
pipe_ctx->stream->signal);
#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
if (pipe_ctx->stream->timing.flags.DSC) {
if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
......
......@@ -220,6 +220,9 @@ enum pp_df_cstate {
((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \
(support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT)
#define XGMI_MODE_PSTATE_D3 0
#define XGMI_MODE_PSTATE_D0 1
struct seq_file;
enum amd_pp_clock_type;
struct amd_pp_simple_clock_info;
......@@ -318,6 +321,7 @@ struct amd_pm_funcs {
int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks);
int (*asic_reset_mode_2)(void *handle);
int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
};
#endif
......@@ -969,6 +969,14 @@ static int pp_dpm_switch_power_profile(void *handle,
workload = hwmgr->workload_setting[index];
}
if (type == PP_SMC_POWER_PROFILE_COMPUTE &&
hwmgr->hwmgr_func->disable_power_features_for_compute_performance) {
if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) {
mutex_unlock(&hwmgr->smu_lock);
return -EINVAL;
}
}
if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0);
mutex_unlock(&hwmgr->smu_lock);
......@@ -1566,6 +1574,23 @@ static int pp_set_df_cstate(void *handle, enum pp_df_cstate state)
return 0;
}
static int pp_set_xgmi_pstate(void *handle, uint32_t pstate)
{
struct pp_hwmgr *hwmgr = handle;
if (!hwmgr)
return -EINVAL;
if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate)
return 0;
mutex_lock(&hwmgr->smu_lock);
hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate);
mutex_unlock(&hwmgr->smu_lock);
return 0;
}
static const struct amd_pm_funcs pp_dpm_funcs = {
.load_firmware = pp_dpm_load_fw,
.wait_for_fw_loading_complete = pp_dpm_fw_loading_complete,
......@@ -1625,4 +1650,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
.asic_reset_mode_2 = pp_asic_reset_mode_2,
.smu_i2c_bus_access = pp_smu_i2c_bus_access,
.set_df_cstate = pp_set_df_cstate,
.set_xgmi_pstate = pp_set_xgmi_pstate,
};
......@@ -383,14 +383,25 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type)
return true;
}
/**
* smu_dpm_set_power_gate - power gate/ungate the specific IP block
*
* @smu: smu_context pointer
* @block_type: the IP block to power gate/ungate
* @gate: to power gate if true, ungate otherwise
*
* This API uses no smu->mutex lock protection due to:
* 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce).
* This is guarded to be race condition free by the caller.
* 2. Or get called on user setting request of power_dpm_force_performance_level.
* Under this case, the smu->mutex lock protection is already enforced on
* the parent API smu_force_performance_level of the call path.
*/
int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
bool gate)
{
int ret = 0;
mutex_lock(&smu->mutex);
switch (block_type) {
case AMD_IP_BLOCK_TYPE_UVD:
ret = smu_dpm_set_uvd_enable(smu, gate);
......@@ -408,8 +419,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
break;
}
mutex_unlock(&smu->mutex);
return ret;
}
......@@ -526,7 +535,7 @@ bool is_support_sw_smu(struct amdgpu_device *adev)
bool is_support_sw_smu_xgmi(struct amdgpu_device *adev)
{
if (amdgpu_dpm != 1)
if (!is_support_sw_smu(adev))
return false;
if (adev->asic_type == CHIP_VEGA20)
......@@ -705,6 +714,9 @@ static int smu_set_funcs(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
smu->od_enabled = true;
switch (adev->asic_type) {
case CHIP_VEGA20:
vega20_set_ppt_funcs(smu);
......@@ -716,6 +728,8 @@ static int smu_set_funcs(struct amdgpu_device *adev)
break;
case CHIP_ARCTURUS:
arcturus_set_ppt_funcs(smu);
/* OD is not supported on Arcturus */
smu->od_enabled =false;
break;
case CHIP_RENOIR:
renoir_set_ppt_funcs(smu);
......@@ -724,9 +738,6 @@ static int smu_set_funcs(struct amdgpu_device *adev)
return -EINVAL;
}
if (adev->pm.pp_feature & PP_OVERDRIVE_MASK)
smu->od_enabled = true;
return 0;
}
......
......@@ -3689,6 +3689,13 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
PP_ASSERT_WITH_CODE(!result,
"Failed to upload PPtable!", return result);
/*
* If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag.
* That effectively disables AVFS feature.
*/
if(hwmgr->hardcode_pp_table != NULL)
data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
vega10_update_avfs(hwmgr);
/*
......@@ -5263,6 +5270,59 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_
return 0;
}
static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable)
{
struct vega10_hwmgr *data = hwmgr->backend;
uint32_t feature_mask = 0;
if (disable) {
feature_mask |= data->smu_features[GNLD_ULV].enabled ?
data->smu_features[GNLD_ULV].smu_feature_bitmap : 0;
feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ?
data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0;
feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ?
data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0;
feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ?
data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0;
feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ?
data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0;
} else {
feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ?
data->smu_features[GNLD_ULV].smu_feature_bitmap : 0;
feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ?
data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0;
feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ?
data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0;
feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ?
data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0;
feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ?
data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0;
}
if (feature_mask)
PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
!disable, feature_mask),
"enable/disable power features for compute performance Failed!",
return -EINVAL);
if (disable) {
data->smu_features[GNLD_ULV].enabled = false;
data->smu_features[GNLD_DS_GFXCLK].enabled = false;
data->smu_features[GNLD_DS_SOCCLK].enabled = false;
data->smu_features[GNLD_DS_LCLK].enabled = false;
data->smu_features[GNLD_DS_DCEFCLK].enabled = false;
} else {
data->smu_features[GNLD_ULV].enabled = true;
data->smu_features[GNLD_DS_GFXCLK].enabled = true;
data->smu_features[GNLD_DS_SOCCLK].enabled = true;
data->smu_features[GNLD_DS_LCLK].enabled = true;
data->smu_features[GNLD_DS_DCEFCLK].enabled = true;
}
return 0;
}
static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.backend_init = vega10_hwmgr_backend_init,
.backend_fini = vega10_hwmgr_backend_fini,
......@@ -5330,6 +5390,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.get_ppfeature_status = vega10_get_ppfeature_status,
.set_ppfeature_status = vega10_set_ppfeature_status,
.set_mp1_state = vega10_set_mp1_state,
.disable_power_features_for_compute_performance =
vega10_disable_power_features_for_compute_performance,
};
int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
......
......@@ -4176,6 +4176,20 @@ static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr,
return ret;
}
static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr,
uint32_t pstate)
{
int ret;
ret = smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetXgmiMode,
pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
if (ret)
pr_err("SetXgmiPstate failed!\n");
return ret;
}
static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
/* init/fini related */
.backend_init = vega20_hwmgr_backend_init,
......@@ -4245,6 +4259,7 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
.set_mp1_state = vega20_set_mp1_state,
.smu_i2c_bus_access = vega20_smu_i2c_bus_access,
.set_df_cstate = vega20_set_df_cstate,
.set_xgmi_pstate = vega20_set_xgmi_pstate,
};
int vega20_hwmgr_init(struct pp_hwmgr *hwmgr)
......
......@@ -356,6 +356,9 @@ struct pp_hwmgr_func {
int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode);
int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire);
int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state);
int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate);
int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr,
bool disable);
};
struct pp_table_func {
......
......@@ -159,7 +159,7 @@
//FIXME need updating
// Debug Overrides Bitmask
#define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000001
#define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK 0x00000002
#define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000002
// I2C Config Bit Defines
#define I2C_CONTROLLER_ENABLED 1
......
......@@ -27,7 +27,7 @@
#define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU11_DRIVER_IF_VERSION_VG20 0x13
#define SMU11_DRIVER_IF_VERSION_ARCT 0x0F
#define SMU11_DRIVER_IF_VERSION_ARCT 0x10
#define SMU11_DRIVER_IF_VERSION_NV10 0x33
#define SMU11_DRIVER_IF_VERSION_NV14 0x34
......
......@@ -180,11 +180,13 @@ static int renoir_print_clk_levels(struct smu_context *smu,
int i, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
SmuMetrics_t metrics = {0};
SmuMetrics_t metrics;
if (!clk_table || clk_type >= SMU_CLK_COUNT)
return -EINVAL;
memset(&metrics, 0, sizeof(metrics));
ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
(void *)&metrics, false);
if (ret)
......
......@@ -368,6 +368,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
version_major = le16_to_cpu(hdr->header.header_version_major);
version_minor = le16_to_cpu(hdr->header.header_version_minor);
if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) {
pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id);
switch (version_minor) {
case 0:
ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size);
......@@ -384,6 +385,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
return ret;
} else {
pr_info("use vbios provided pptable\n");
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
powerplayinfo);
......@@ -1463,16 +1465,13 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
return ret;
}
#define XGMI_STATE_D0 1
#define XGMI_STATE_D3 0
int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
uint32_t pstate)
{
int ret = 0;
ret = smu_send_smc_msg_with_param(smu,
SMU_MSG_SetXgmiMode,
pstate ? XGMI_STATE_D0 : XGMI_STATE_D3);
pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3);
return ret;
}
......
......@@ -23,6 +23,7 @@
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/completion.h>
#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
......@@ -68,6 +69,8 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
if (!entity->rq_list)
return -ENOMEM;
init_completion(&entity->entity_idle);
for (i = 0; i < num_rq_list; ++i)
entity->rq_list[i] = rq_list[i];
......@@ -286,11 +289,12 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
*/
if (spsc_queue_count(&entity->job_queue)) {
if (sched) {
/* Park the kernel for a moment to make sure it isn't processing
* our enity.
/*
* Wait for thread to idle to make sure it isn't processing
* this entity.
*/
kthread_park(sched->thread);
kthread_unpark(sched->thread);
wait_for_completion(&entity->entity_idle);
}
if (entity->dependency) {
dma_fence_remove_callback(entity->dependency,
......
......@@ -47,6 +47,7 @@
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/completion.h>
#include <uapi/linux/sched/types.h>
#include <drm/drm_print.h>
......@@ -134,6 +135,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
list_for_each_entry_continue(entity, &rq->entities, list) {
if (drm_sched_entity_is_ready(entity)) {
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
return entity;
}
......@@ -144,6 +146,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
if (drm_sched_entity_is_ready(entity)) {
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
return entity;
}
......@@ -496,8 +499,10 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
fence = sched->ops->run_job(s_job);
if (IS_ERR_OR_NULL(fence)) {
if (IS_ERR(fence))
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
s_job->s_fence->parent = NULL;
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
} else {
s_job->s_fence->parent = fence;
}
......@@ -645,9 +650,13 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
struct drm_sched_job *job;
unsigned long flags;
/* Don't destroy jobs while the timeout worker is running */
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!cancel_delayed_work(&sched->work_tdr))
/*
* Don't destroy jobs while the timeout worker is running OR thread
* is being parked and hence assumed to not touch ring_mirror_list
*/
if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!cancel_delayed_work(&sched->work_tdr)) ||
__kthread_should_park(sched->thread))
return NULL;
spin_lock_irqsave(&sched->job_list_lock, flags);
......@@ -724,6 +733,9 @@ static int drm_sched_main(void *param)
continue;
sched_job = drm_sched_entity_pop_job(entity);
complete(&entity->entity_idle);
if (!sched_job)
continue;
......@@ -746,8 +758,9 @@ static int drm_sched_main(void *param)
r);
dma_fence_put(fence);
} else {
if (IS_ERR(fence))
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
drm_sched_process_job(NULL, &sched_job->cb);
}
......
......@@ -26,6 +26,7 @@
#include <drm/spsc_queue.h>
#include <linux/dma-fence.h>
#include <linux/completion.h>
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
......@@ -71,6 +72,7 @@ enum drm_sched_priority {
* @last_scheduled: points to the finished fence of the last scheduled job.
* @last_user: last group leader pushing a job into the entity.
* @stopped: Marks the enity as removed from rq and destined for termination.
* @entity_idle: Signals when enityt is not in use
*
* Entities will emit jobs in order to their corresponding hardware
* ring, and the scheduler will alternate between entities based on
......@@ -94,6 +96,7 @@ struct drm_sched_entity {
struct dma_fence *last_scheduled;
struct task_struct *last_user;
bool stopped;
struct completion entity_idle;
};
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment