Commit 61109488 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-3.10-2' of git://people.freedesktop.org/~agd5f/linux into drm-next

Just some fixes that have accumulated over the last couple of
weeks and some new PCI ids.

* 'drm-next-3.10-2' of git://people.freedesktop.org/~agd5f/linux:
  drm/radeon: fix handling of v6 power tables
  drm/radeon: clarify family checks in pm table parsing
  drm/radeon: consolidate UVD clock programming
  drm/radeon: fix UPLL_REF_DIV_MASK definition
  radeon: add bo tracking debugfs
  drm/radeon: add new richland pci ids
  drm/radeon: add some new SI PCI ids
  drm/radeon: fix scratch reg handling for UVD fence
  drm/radeon: allocate SA bo in the requested domain
  drm/radeon: fix possible segfault when parsing pm tables
  drm/radeon: fix endian bugs in atom_allocate_fb_scratch()
parents f49e7259 441e76ca
......@@ -1394,10 +1394,10 @@ int atom_allocate_fb_scratch(struct atom_context *ctx)
firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
DRM_DEBUG("atom firmware requested %08x %dkb\n",
firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware,
firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb);
le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
usage_bytes = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb * 1024;
usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
}
ctx->scratch_size_bytes = 0;
if (usage_bytes == 0)
......
......@@ -989,62 +989,10 @@ int sumo_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
return r;
}
static int evergreen_uvd_calc_post_div(unsigned target_freq,
unsigned vco_freq,
unsigned *div)
{
/* target larger than vco frequency ? */
if (vco_freq < target_freq)
return -1; /* forget it */
/* Fclk = Fvco / PDIV */
*div = vco_freq / target_freq;
/* we alway need a frequency less than or equal the target */
if ((vco_freq / *div) > target_freq)
*div += 1;
/* dividers above 5 must be even */
if (*div > 5 && *div % 2)
*div += 1;
/* out of range ? */
if (*div >= 128)
return -1; /* forget it */
return vco_freq / *div;
}
static int evergreen_uvd_send_upll_ctlreq(struct radeon_device *rdev)
{
unsigned i;
/* assert UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
/* wait for CTLACK and CTLACK2 to get asserted */
for (i = 0; i < 100; ++i) {
uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
break;
mdelay(10);
}
if (i == 100)
return -ETIMEDOUT;
/* deassert UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
return 0;
}
int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
{
/* start off with something large */
int optimal_diff_score = 0x7FFFFFF;
unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
unsigned vco_freq;
unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
int r;
/* bypass vclk and dclk with bclk */
......@@ -1061,40 +1009,11 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
return 0;
}
/* loop through vco from low to high */
for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
int calc_clk, diff_score, diff_vclk, diff_dclk;
unsigned vclk_div, dclk_div;
/* fb div out of range ? */
if (fb_div > 0x03FFFFFF)
break; /* it can oly get worse */
/* calc vclk with current vco freq. */
calc_clk = evergreen_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
if (calc_clk == -1)
break; /* vco is too big, it has to stop. */
diff_vclk = vclk - calc_clk;
/* calc dclk with current vco freq. */
calc_clk = evergreen_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
if (calc_clk == -1)
break; /* vco is too big, it has to stop. */
diff_dclk = dclk - calc_clk;
/* determine if this vco setting is better than current optimal settings */
diff_score = abs(diff_vclk) + abs(diff_dclk);
if (diff_score < optimal_diff_score) {
optimal_fb_div = fb_div;
optimal_vclk_div = vclk_div;
optimal_dclk_div = dclk_div;
optimal_vco_freq = vco_freq;
optimal_diff_score = diff_score;
if (optimal_diff_score == 0)
break; /* it can't get better than this */
}
}
r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
16384, 0x03FFFFFF, 0, 128, 5,
&fb_div, &vclk_div, &dclk_div);
if (r)
return r;
/* set VCO_MODE to 1 */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
......@@ -1108,7 +1027,7 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
mdelay(1);
r = evergreen_uvd_send_upll_ctlreq(rdev);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
......@@ -1119,19 +1038,19 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
/* set feedback divider */
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
/* set ref divider to 0 */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
if (optimal_vco_freq < 187500)
if (fb_div < 307200)
WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
else
WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
/* set PDIV_A and PDIV_B */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
/* give the PLL some time to settle */
......@@ -1145,7 +1064,7 @@ int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
/* switch from bypass mode to normal mode */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
r = evergreen_uvd_send_upll_ctlreq(rdev);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
......
......@@ -59,7 +59,7 @@
# define UPLL_SLEEP_MASK 0x00000002
# define UPLL_BYPASS_EN_MASK 0x00000004
# define UPLL_CTLREQ_MASK 0x00000008
# define UPLL_REF_DIV_MASK 0x001F0000
# define UPLL_REF_DIV_MASK 0x003F0000
# define UPLL_VCO_MODE_MASK 0x00000200
# define UPLL_CTLACK_MASK 0x40000000
# define UPLL_CTLACK2_MASK 0x80000000
......
......@@ -749,7 +749,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
(rdev->pdev->device == 0x990F) ||
(rdev->pdev->device == 0x9910) ||
(rdev->pdev->device == 0x9917) ||
(rdev->pdev->device == 0x9999)) {
(rdev->pdev->device == 0x9999) ||
(rdev->pdev->device == 0x999C)) {
rdev->config.cayman.max_simds_per_se = 6;
rdev->config.cayman.max_backends_per_se = 2;
} else if ((rdev->pdev->device == 0x9903) ||
......@@ -758,7 +759,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
(rdev->pdev->device == 0x990D) ||
(rdev->pdev->device == 0x990E) ||
(rdev->pdev->device == 0x9913) ||
(rdev->pdev->device == 0x9918)) {
(rdev->pdev->device == 0x9918) ||
(rdev->pdev->device == 0x999D)) {
rdev->config.cayman.max_simds_per_se = 4;
rdev->config.cayman.max_backends_per_se = 2;
} else if ((rdev->pdev->device == 0x9919) ||
......
......@@ -1208,6 +1208,10 @@
#define UVD_CONTEXT_ID 0xf6f4
# define UPLL_CTLREQ_MASK 0x00000008
# define UPLL_CTLACK_MASK 0x40000000
# define UPLL_CTLACK2_MASK 0x80000000
/*
* PM4
*/
......
......@@ -358,7 +358,8 @@ struct radeon_bo {
struct radeon_device *rdev;
struct drm_gem_object gem_base;
struct ttm_bo_kmap_obj dma_buf_vmap;
struct ttm_bo_kmap_obj dma_buf_vmap;
pid_t pid;
};
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
......@@ -372,6 +373,8 @@ struct radeon_bo_list {
u32 tiling_flags;
};
int radeon_gem_debugfs_init(struct radeon_device *rdev);
/* sub-allocation manager, it has to be protected by another lock.
* By conception this is an helper for other part of the driver
* like the indirect buffer or semaphore, which both have their
......@@ -1159,6 +1162,17 @@ void radeon_uvd_free_handles(struct radeon_device *rdev,
struct drm_file *filp);
int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
void radeon_uvd_note_usage(struct radeon_device *rdev);
int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
unsigned vclk, unsigned dclk,
unsigned vco_min, unsigned vco_max,
unsigned fb_factor, unsigned fb_mask,
unsigned pd_min, unsigned pd_max,
unsigned pd_even,
unsigned *optimal_fb_div,
unsigned *optimal_vclk_div,
unsigned *optimal_dclk_div);
int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
unsigned cg_upll_func_cntl);
struct r600_audio {
int channels;
......
......@@ -2028,6 +2028,8 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
num_modes = power_info->info.ucNumOfPowerModeEntries;
if (num_modes > ATOM_MAX_NUMBEROF_POWER_BLOCK)
num_modes = ATOM_MAX_NUMBEROF_POWER_BLOCK;
if (num_modes == 0)
return state_index;
rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) * num_modes, GFP_KERNEL);
if (!rdev->pm.power_state)
return state_index;
......@@ -2307,7 +2309,7 @@ static void radeon_atombios_parse_pplib_non_clock_info(struct radeon_device *rde
rdev->pm.default_power_state_index = state_index;
rdev->pm.power_state[state_index].default_clock_mode =
&rdev->pm.power_state[state_index].clock_info[mode_index - 1];
if (ASIC_IS_DCE5(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
if ((rdev->family >= CHIP_BARTS) && !(rdev->flags & RADEON_IS_IGP)) {
/* NI chips post without MC ucode, so default clocks are strobe mode only */
rdev->pm.default_sclk = rdev->pm.power_state[state_index].clock_info[0].sclk;
rdev->pm.default_mclk = rdev->pm.power_state[state_index].clock_info[0].mclk;
......@@ -2345,7 +2347,7 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev,
sclk |= clock_info->rs780.ucLowEngineClockHigh << 16;
rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk;
}
} else if (ASIC_IS_DCE6(rdev)) {
} else if (rdev->family >= CHIP_TAHITI) {
sclk = le16_to_cpu(clock_info->si.usEngineClockLow);
sclk |= clock_info->si.ucEngineClockHigh << 16;
mclk = le16_to_cpu(clock_info->si.usMemoryClockLow);
......@@ -2358,7 +2360,7 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev,
le16_to_cpu(clock_info->si.usVDDC);
rdev->pm.power_state[state_index].clock_info[mode_index].voltage.vddci =
le16_to_cpu(clock_info->si.usVDDCI);
} else if (ASIC_IS_DCE4(rdev)) {
} else if (rdev->family >= CHIP_CEDAR) {
sclk = le16_to_cpu(clock_info->evergreen.usEngineClockLow);
sclk |= clock_info->evergreen.ucEngineClockHigh << 16;
mclk = le16_to_cpu(clock_info->evergreen.usMemoryClockLow);
......@@ -2432,6 +2434,8 @@ static int radeon_atombios_parse_power_table_4_5(struct radeon_device *rdev)
power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
radeon_atombios_add_pplib_thermal_controller(rdev, &power_info->pplib.sThermalController);
if (power_info->pplib.ucNumStates == 0)
return state_index;
rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) *
power_info->pplib.ucNumStates, GFP_KERNEL);
if (!rdev->pm.power_state)
......@@ -2514,6 +2518,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
u16 data_offset;
u8 frev, crev;
u8 *power_state_offset;
if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset))
......@@ -2530,15 +2535,17 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
non_clock_info_array = (struct _NonClockInfoArray *)
(mode_info->atom_context->bios + data_offset +
le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
if (state_array->ucNumEntries == 0)
return state_index;
rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) *
state_array->ucNumEntries, GFP_KERNEL);
if (!rdev->pm.power_state)
return state_index;
power_state_offset = (u8 *)state_array->states;
for (i = 0; i < state_array->ucNumEntries; i++) {
mode_index = 0;
power_state = (union pplib_power_state *)&state_array->states[i];
/* XXX this might be an inagua bug... */
non_clock_array_index = i; /* power_state->v2.nonClockInfoIndex */
power_state = (union pplib_power_state *)power_state_offset;
non_clock_array_index = power_state->v2.nonClockInfoIndex;
non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
&non_clock_info_array->nonClockInfo[non_clock_array_index];
rdev->pm.power_state[i].clock_info = kzalloc(sizeof(struct radeon_pm_clock_info) *
......@@ -2550,9 +2557,6 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
if (power_state->v2.ucNumDPMLevels) {
for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
clock_array_index = power_state->v2.clockInfoIndex[j];
/* XXX this might be an inagua bug... */
if (clock_array_index >= clock_info_array->ucNumEntries)
continue;
clock_info = (union pplib_clock_info *)
&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
valid = radeon_atombios_parse_pplib_clock_info(rdev,
......@@ -2574,6 +2578,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
non_clock_info);
state_index++;
}
power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
}
/* if multiple clock modes, mark the lowest as no display */
for (i = 0; i < state_index; i++) {
......@@ -2620,7 +2625,9 @@ void radeon_atombios_get_power_modes(struct radeon_device *rdev)
default:
break;
}
} else {
}
if (state_index == 0) {
rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state), GFP_KERNEL);
if (rdev->pm.power_state) {
rdev->pm.power_state[0].clock_info =
......
......@@ -1178,6 +1178,11 @@ int radeon_device_init(struct radeon_device *rdev,
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
r = radeon_gem_debugfs_init(rdev);
if (r) {
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
}
if (rdev->flags & RADEON_IS_AGP && !rdev->accel_working) {
/* Acceleration not working on AGP card try again
* with fallback to PCI or PCIE GART
......
......@@ -767,8 +767,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
rdev->fence_drv[ring].scratch_reg = 0;
if (ring != R600_RING_TYPE_UVD_INDEX) {
rdev->fence_drv[ring].scratch_reg = 0;
index = R600_WB_EVENT_OFFSET + ring * 4;
rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
......
......@@ -84,6 +84,7 @@ int radeon_gem_object_create(struct radeon_device *rdev, int size,
return r;
}
*obj = &robj->gem_base;
robj->pid = task_pid_nr(current);
mutex_lock(&rdev->gem.mutex);
list_add_tail(&robj->list, &rdev->gem.objects);
......@@ -575,3 +576,52 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
{
return drm_gem_handle_delete(file_priv, handle);
}
#if defined(CONFIG_DEBUG_FS)
static int radeon_debugfs_gem_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct radeon_device *rdev = dev->dev_private;
struct radeon_bo *rbo;
unsigned i = 0;
mutex_lock(&rdev->gem.mutex);
list_for_each_entry(rbo, &rdev->gem.objects, list) {
unsigned domain;
const char *placement;
domain = radeon_mem_type_to_domain(rbo->tbo.mem.mem_type);
switch (domain) {
case RADEON_GEM_DOMAIN_VRAM:
placement = "VRAM";
break;
case RADEON_GEM_DOMAIN_GTT:
placement = " GTT";
break;
case RADEON_GEM_DOMAIN_CPU:
default:
placement = " CPU";
break;
}
seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
i, radeon_bo_size(rbo) >> 10, radeon_bo_size(rbo) >> 20,
placement, (unsigned long)rbo->pid);
i++;
}
mutex_unlock(&rdev->gem.mutex);
return 0;
}
static struct drm_info_list radeon_debugfs_gem_list[] = {
{"radeon_gem_info", &radeon_debugfs_gem_info, 0, NULL},
};
#endif
int radeon_gem_debugfs_init(struct radeon_device *rdev)
{
#if defined(CONFIG_DEBUG_FS)
return radeon_debugfs_add_files(rdev, radeon_debugfs_gem_list, 1);
#endif
return 0;
}
......@@ -64,7 +64,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
}
r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
RADEON_GEM_DOMAIN_CPU, NULL, &sa_manager->bo);
domain, NULL, &sa_manager->bo);
if (r) {
dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
......
......@@ -692,3 +692,140 @@ void radeon_uvd_note_usage(struct radeon_device *rdev)
if (set_clocks)
radeon_set_uvd_clocks(rdev, 53300, 40000);
}
static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
unsigned target_freq,
unsigned pd_min,
unsigned pd_even)
{
unsigned post_div = vco_freq / target_freq;
/* adjust to post divider minimum value */
if (post_div < pd_min)
post_div = pd_min;
/* we alway need a frequency less than or equal the target */
if ((vco_freq / post_div) > target_freq)
post_div += 1;
/* post dividers above a certain value must be even */
if (post_div > pd_even && post_div % 2)
post_div += 1;
return post_div;
}
/**
* radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
*
* @rdev: radeon_device pointer
* @vclk: wanted VCLK
* @dclk: wanted DCLK
* @vco_min: minimum VCO frequency
* @vco_max: maximum VCO frequency
* @fb_factor: factor to multiply vco freq with
* @fb_mask: limit and bitmask for feedback divider
* @pd_min: post divider minimum
* @pd_max: post divider maximum
* @pd_even: post divider must be even above this value
* @optimal_fb_div: resulting feedback divider
* @optimal_vclk_div: resulting vclk post divider
* @optimal_dclk_div: resulting dclk post divider
*
* Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
* Returns zero on success -EINVAL on error.
*/
int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
unsigned vclk, unsigned dclk,
unsigned vco_min, unsigned vco_max,
unsigned fb_factor, unsigned fb_mask,
unsigned pd_min, unsigned pd_max,
unsigned pd_even,
unsigned *optimal_fb_div,
unsigned *optimal_vclk_div,
unsigned *optimal_dclk_div)
{
unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
/* start off with something large */
unsigned optimal_score = ~0;
/* loop through vco from low to high */
vco_min = max(max(vco_min, vclk), dclk);
for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
unsigned vclk_div, dclk_div, score;
do_div(fb_div, ref_freq);
/* fb div out of range ? */
if (fb_div > fb_mask)
break; /* it can oly get worse */
fb_div &= fb_mask;
/* calc vclk divider with current vco freq */
vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
pd_min, pd_even);
if (vclk_div > pd_max)
break; /* vco is too big, it has to stop */
/* calc dclk divider with current vco freq */
dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
pd_min, pd_even);
if (vclk_div > pd_max)
break; /* vco is too big, it has to stop */
/* calc score with current vco freq */
score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
/* determine if this vco setting is better than current optimal settings */
if (score < optimal_score) {
*optimal_fb_div = fb_div;
*optimal_vclk_div = vclk_div;
*optimal_dclk_div = dclk_div;
optimal_score = score;
if (optimal_score == 0)
break; /* it can't get better than this */
}
}
/* did we found a valid setup ? */
if (optimal_score == ~0)
return -EINVAL;
return 0;
}
int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
unsigned cg_upll_func_cntl)
{
unsigned i;
/* make sure UPLL_CTLREQ is deasserted */
WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
mdelay(10);
/* assert UPLL_CTLREQ */
WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
/* wait for CTLACK and CTLACK2 to get asserted */
for (i = 0; i < 100; ++i) {
uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
if ((RREG32(cg_upll_func_cntl) & mask) == mask)
break;
mdelay(10);
}
/* deassert UPLL_CTLREQ */
WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
if (i == 100) {
DRM_ERROR("Timeout setting UVD clocks!\n");
return -ETIMEDOUT;
}
return 0;
}
......@@ -44,56 +44,9 @@ void rv770_fini(struct radeon_device *rdev);
static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
static int rv770_uvd_calc_post_div(unsigned target_freq,
unsigned vco_freq,
unsigned *div)
{
/* Fclk = Fvco / PDIV */
*div = vco_freq / target_freq;
/* we alway need a frequency less than or equal the target */
if ((vco_freq / *div) > target_freq)
*div += 1;
/* out of range ? */
if (*div > 30)
return -1; /* forget it */
*div -= 1;
return vco_freq / (*div + 1);
}
static int rv770_uvd_send_upll_ctlreq(struct radeon_device *rdev)
{
unsigned i;
/* assert UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
/* wait for CTLACK and CTLACK2 to get asserted */
for (i = 0; i < 100; ++i) {
uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
break;
mdelay(10);
}
if (i == 100)
return -ETIMEDOUT;
/* deassert UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
return 0;
}
int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
{
/* start off with something large */
int optimal_diff_score = 0x7FFFFFF;
unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
unsigned vco_freq, vco_min = 50000, vco_max = 160000;
unsigned ref_freq = rdev->clock.spll.reference_freq;
unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
int r;
/* RV740 uses evergreen uvd clk programming */
......@@ -111,44 +64,15 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
return 0;
}
/* loop through vco from low to high */
vco_min = max(max(vco_min, vclk), dclk);
for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 500) {
uint64_t fb_div = (uint64_t)vco_freq * 43663;
int calc_clk, diff_score, diff_vclk, diff_dclk;
unsigned vclk_div, dclk_div;
do_div(fb_div, ref_freq);
fb_div |= 1;
/* fb div out of range ? */
if (fb_div > 0x03FFFFFF)
break; /* it can oly get worse */
/* calc vclk with current vco freq. */
calc_clk = rv770_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
if (calc_clk == -1)
break; /* vco is too big, it has to stop. */
diff_vclk = vclk - calc_clk;
/* calc dclk with current vco freq. */
calc_clk = rv770_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
if (calc_clk == -1)
break; /* vco is too big, it has to stop. */
diff_dclk = dclk - calc_clk;
/* determine if this vco setting is better than current optimal settings */
diff_score = abs(diff_vclk) + abs(diff_dclk);
if (diff_score < optimal_diff_score) {
optimal_fb_div = fb_div;
optimal_vclk_div = vclk_div;
optimal_dclk_div = dclk_div;
optimal_vco_freq = vco_freq;
optimal_diff_score = diff_score;
if (optimal_diff_score == 0)
break; /* it can't get better than this */
}
}
r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000,
43663, 0x03FFFFFE, 1, 30, ~0,
&fb_div, &vclk_div, &dclk_div);
if (r)
return r;
fb_div |= 1;
vclk_div -= 1;
dclk_div -= 1;
/* set UPLL_FB_DIV to 0x50000 */
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(0x50000), ~UPLL_FB_DIV_MASK);
......@@ -160,7 +84,7 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(1), ~UPLL_FB_DIV(1));
r = rv770_uvd_send_upll_ctlreq(rdev);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
......@@ -170,13 +94,13 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
/* set the required FB_DIV, REF_DIV, Post divder values */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REF_DIV(1), ~UPLL_REF_DIV_MASK);
WREG32_P(CG_UPLL_FUNC_CNTL_2,
UPLL_SW_HILEN(optimal_vclk_div >> 1) |
UPLL_SW_LOLEN((optimal_vclk_div >> 1) + (optimal_vclk_div & 1)) |
UPLL_SW_HILEN2(optimal_dclk_div >> 1) |
UPLL_SW_LOLEN2((optimal_dclk_div >> 1) + (optimal_dclk_div & 1)),
UPLL_SW_HILEN(vclk_div >> 1) |
UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) |
UPLL_SW_HILEN2(dclk_div >> 1) |
UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)),
~UPLL_SW_MASK);
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div),
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div),
~UPLL_FB_DIV_MASK);
/* give the PLL some time to settle */
......@@ -191,7 +115,7 @@ int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
WREG32_P(CG_UPLL_FUNC_CNTL_3, 0, ~UPLL_FB_DIV(1));
r = rv770_uvd_send_upll_ctlreq(rdev);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
......
......@@ -45,7 +45,7 @@
# define UPLL_BYPASS_EN_MASK 0x00000004
# define UPLL_CTLREQ_MASK 0x00000008
# define UPLL_REF_DIV(x) ((x) << 16)
# define UPLL_REF_DIV_MASK 0x001F0000
# define UPLL_REF_DIV_MASK 0x003F0000
# define UPLL_CTLACK_MASK 0x40000000
# define UPLL_CTLACK2_MASK 0x80000000
#define CG_UPLL_FUNC_CNTL_2 0x71c
......
......@@ -5415,62 +5415,9 @@ uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
return clock;
}
static int si_uvd_calc_post_div(unsigned target_freq,
unsigned vco_freq,
unsigned *div)
{
/* target larger than vco frequency ? */
if (vco_freq < target_freq)
return -1; /* forget it */
/* Fclk = Fvco / PDIV */
*div = vco_freq / target_freq;
/* we alway need a frequency less than or equal the target */
if ((vco_freq / *div) > target_freq)
*div += 1;
/* dividers above 5 must be even */
if (*div > 5 && *div % 2)
*div += 1;
/* out of range ? */
if (*div >= 128)
return -1; /* forget it */
return vco_freq / *div;
}
static int si_uvd_send_upll_ctlreq(struct radeon_device *rdev)
{
unsigned i;
/* assert UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
/* wait for CTLACK and CTLACK2 to get asserted */
for (i = 0; i < 100; ++i) {
uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
break;
mdelay(10);
}
if (i == 100)
return -ETIMEDOUT;
/* deassert UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
return 0;
}
int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
{
/* start off with something large */
int optimal_diff_score = 0x7FFFFFF;
unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
unsigned vco_freq;
unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
int r;
/* bypass vclk and dclk with bclk */
......@@ -5487,40 +5434,11 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
return 0;
}
/* loop through vco from low to high */
for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
int calc_clk, diff_score, diff_vclk, diff_dclk;
unsigned vclk_div, dclk_div;
/* fb div out of range ? */
if (fb_div > 0x03FFFFFF)
break; /* it can oly get worse */
/* calc vclk with current vco freq. */
calc_clk = si_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
if (calc_clk == -1)
break; /* vco is too big, it has to stop. */
diff_vclk = vclk - calc_clk;
/* calc dclk with current vco freq. */
calc_clk = si_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
if (calc_clk == -1)
break; /* vco is too big, it has to stop. */
diff_dclk = dclk - calc_clk;
/* determine if this vco setting is better than current optimal settings */
diff_score = abs(diff_vclk) + abs(diff_dclk);
if (diff_score < optimal_diff_score) {
optimal_fb_div = fb_div;
optimal_vclk_div = vclk_div;
optimal_dclk_div = dclk_div;
optimal_vco_freq = vco_freq;
optimal_diff_score = diff_score;
if (optimal_diff_score == 0)
break; /* it can't get better than this */
}
}
r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
16384, 0x03FFFFFF, 0, 128, 5,
&fb_div, &vclk_div, &dclk_div);
if (r)
return r;
/* set RESET_ANTI_MUX to 0 */
WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
......@@ -5537,7 +5455,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
mdelay(1);
r = si_uvd_send_upll_ctlreq(rdev);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
......@@ -5548,19 +5466,19 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
/* set feedback divider */
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
/* set ref divider to 0 */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
if (optimal_vco_freq < 187500)
if (fb_div < 307200)
WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
else
WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
/* set PDIV_A and PDIV_B */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
/* give the PLL some time to settle */
......@@ -5574,7 +5492,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
/* switch from bypass mode to normal mode */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
r = si_uvd_send_upll_ctlreq(rdev);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
......
......@@ -36,7 +36,7 @@
# define UPLL_BYPASS_EN_MASK 0x00000004
# define UPLL_CTLREQ_MASK 0x00000008
# define UPLL_VCO_MODE_MASK 0x00000600
# define UPLL_REF_DIV_MASK 0x001F0000
# define UPLL_REF_DIV_MASK 0x003F0000
# define UPLL_CTLACK_MASK 0x40000000
# define UPLL_CTLACK2_MASK 0x80000000
#define CG_UPLL_FUNC_CNTL_2 0x638
......
......@@ -240,6 +240,7 @@
{0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6821, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6822, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6823, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6824, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
......@@ -247,11 +248,13 @@
{0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
{0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
{0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
......@@ -603,6 +606,8 @@
{0x1002, 0x9999, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x999A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x999B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x999C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x999D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
{0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment