From cec2a270dbaafba7e2340e9489a5658c67960962 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 17 Jun 2011 16:33:13 +1000
Subject: [PATCH] drm/nva3/pm: tidy and add some comments here and there

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nva3_pm.c | 125 +++++++++++++++++++-----------
 1 file changed, 78 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nva3_pm.c b/drivers/gpu/drm/nouveau/nva3_pm.c
index a9e3de4a9520..98ea3aa0bb65 100644
--- a/drivers/gpu/drm/nouveau/nva3_pm.c
+++ b/drivers/gpu/drm/nouveau/nva3_pm.c
@@ -28,15 +28,15 @@
 #include "nouveau_pm.h"
 
 static u32 read_clk(struct drm_device *, int, bool);
-static u32 read_pll(struct drm_device *, u32, int);
+static u32 read_pll(struct drm_device *, int, u32);
 
 static u32
 read_vco(struct drm_device *dev, int clk)
 {
 	u32 sctl = nv_rd32(dev, 0x4120 + (clk * 4));
 	if ((sctl & 0x00000030) != 0x00000030)
-		return read_pll(dev, 0x00e820, 0x41);
-	return read_pll(dev, 0x00e8a0, 0x42);
+		return read_pll(dev, 0x41, 0x00e820);
+	return read_pll(dev, 0x42, 0x00e8a0);
 }
 
 static u32
@@ -44,6 +44,7 @@ read_clk(struct drm_device *dev, int clk, bool ignore_en)
 {
 	u32 sctl, sdiv, sclk;
 
+	/* refclk for the 0xe8xx plls always 27KHz */
 	if (clk >= 0x40)
 		return 27000;
 
@@ -68,7 +69,7 @@ read_clk(struct drm_device *dev, int clk, bool ignore_en)
 }
 
 static u32
-read_pll(struct drm_device *dev, u32 pll, int clk)
+read_pll(struct drm_device *dev, int clk, u32 pll)
 {
 	u32 ctrl = nv_rd32(dev, pll + 0);
 	u32 sclk, P = 1, N = 1, M = 1;
@@ -78,6 +79,8 @@ read_pll(struct drm_device *dev, u32 pll, int clk)
 		M = (coef & 0x000000ff) >> 0;
 		N = (coef & 0x0000ff00) >> 8;
 		P = (coef & 0x003f0000) >> 16;
+
+		/* not post-divider on these.. */
 		if ((pll & 0x00ff00) == 0x00e800)
 			P = 1;
 
@@ -95,7 +98,7 @@ struct creg {
 };
 
 static int
-calc_clk(struct drm_device *dev, u32 pll, int clk, u32 khz, struct creg *reg)
+calc_clk(struct drm_device *dev, int clk, u32 pll, u32 khz, struct creg *reg)
 {
 	struct pll_lims limits;
 	u32 oclk, sclk, sdiv;
@@ -104,6 +107,10 @@ calc_clk(struct drm_device *dev, u32 pll, int clk, u32 khz, struct creg *reg)
 
 	reg->pll = 0;
 	reg->clk = 0;
+	if (!khz) {
+		NV_DEBUG(dev, "no clock for 0x%04x/0x%02x\n", pll, clk);
+		return 0;
+	}
 
 	switch (khz) {
 	case 27000:
@@ -118,6 +125,14 @@ calc_clk(struct drm_device *dev, u32 pll, int clk, u32 khz, struct creg *reg)
 	default:
 		sclk = read_vco(dev, clk);
 		sdiv = min((sclk * 2) / (khz - 2999), (u32)65);
+		/* if the clock has a PLL attached, and we can get a within
+		 * [-2, 3) MHz of a divider, we'll disable the PLL and use
+		 * the divider instead.
+		 *
+		 * divider can go as low as 2, limited here because NVIDIA
+		 * and the VBIOS on my NVA8 seem to prefer using the PLL
+		 * for 810MHz - is there a good reason?
+		 */
 		if (sdiv > 4) {
 			oclk = (sclk * 2) / sdiv;
 			diff = khz - oclk;
@@ -126,6 +141,12 @@ calc_clk(struct drm_device *dev, u32 pll, int clk, u32 khz, struct creg *reg)
 				return oclk;
 			}
 		}
+
+		if (!pll) {
+			NV_ERROR(dev, "bad freq %02x: %d %d\n", clk, khz, sclk);
+			return -ERANGE;
+		}
+
 		break;
 	}
 
@@ -145,12 +166,53 @@ calc_clk(struct drm_device *dev, u32 pll, int clk, u32 khz, struct creg *reg)
 	return ret;
 }
 
+static void
+prog_pll(struct drm_device *dev, int clk, u32 pll, struct creg *reg)
+{
+	const u32 src0 = 0x004120 + (clk * 4);
+	const u32 src1 = 0x004160 + (clk * 4);
+	const u32 ctrl = pll + 0;
+	const u32 coef = pll + 4;
+	u32 cntl;
+
+	if (!reg->clk && !reg->pll) {
+		NV_DEBUG(dev, "no clock for %02x\n", clk);
+		return;
+	}
+
+	cntl = nv_rd32(dev, ctrl) & 0xfffffff2;
+	if (reg->pll) {
+		nv_mask(dev, src0, 0x00000101, 0x00000101);
+		nv_wr32(dev, coef, reg->pll);
+		nv_wr32(dev, ctrl, cntl | 0x00000015);
+		nv_mask(dev, src1, 0x00000100, 0x00000000);
+		nv_mask(dev, src1, 0x00000001, 0x00000000);
+	} else {
+		nv_mask(dev, src1, 0x003f3141, 0x00000101 | reg->clk);
+		nv_wr32(dev, ctrl, cntl | 0x0000001d);
+		nv_mask(dev, ctrl, 0x00000001, 0x00000000);
+		nv_mask(dev, src0, 0x00000100, 0x00000000);
+		nv_mask(dev, src0, 0x00000001, 0x00000000);
+	}
+}
+
+static void
+prog_clk(struct drm_device *dev, int clk, struct creg *reg)
+{
+	if (!reg->clk) {
+		NV_DEBUG(dev, "no clock for %02x\n", clk);
+		return;
+	}
+
+	nv_mask(dev, 0x004120 + (clk * 4), 0x003f3141, 0x00000101 | reg->clk);
+}
+
 int
 nva3_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 {
-	perflvl->core   = read_pll(dev, 0x4200, 0);
-	perflvl->shader = read_pll(dev, 0x4220, 1);
-	perflvl->memory = read_pll(dev, 0x4000, 2);
+	perflvl->core   = read_pll(dev, 0x00, 0x4200);
+	perflvl->shader = read_pll(dev, 0x01, 0x4220);
+	perflvl->memory = read_pll(dev, 0x02, 0x4000);
 	perflvl->unka0  = read_clk(dev, 0x20, false);
 	perflvl->vdec   = read_clk(dev, 0x21, false);
 	return 0;
@@ -174,23 +236,23 @@ nva3_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	ret = calc_clk(dev, 0x4200, 0x10, perflvl->core, &info->nclk);
+	ret = calc_clk(dev, 0x10, 0x4200, perflvl->core, &info->nclk);
 	if (ret < 0)
 		goto out;
 
-	ret = calc_clk(dev, 0x4220, 0x11, perflvl->shader, &info->sclk);
+	ret = calc_clk(dev, 0x11, 0x4220, perflvl->shader, &info->sclk);
 	if (ret < 0)
 		goto out;
 
-	ret = calc_clk(dev, 0x4000, 0x12, perflvl->memory, &info->mclk);
+	ret = calc_clk(dev, 0x12, 0x4000, perflvl->memory, &info->mclk);
 	if (ret < 0)
 		goto out;
 
-	ret = calc_clk(dev, 0x0000, 0x20, perflvl->unka0, &info->unka0);
+	ret = calc_clk(dev, 0x20, 0x0000, perflvl->unka0, &info->unka0);
 	if (ret < 0)
 		goto out;
 
-	ret = calc_clk(dev, 0x0000, 0x21, perflvl->vdec, &info->vdec);
+	ret = calc_clk(dev, 0x21, 0x0000, perflvl->vdec, &info->vdec);
 	if (ret < 0)
 		goto out;
 
@@ -202,51 +264,20 @@ nva3_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 	return info;
 }
 
-static void
-prog_pll(struct drm_device *dev, u32 pll, int clk, struct creg *reg)
-{
-	const u32 src0 = 0x004120 + (clk * 4);
-	const u32 src1 = 0x004160 + (clk * 4);
-	const u32 ctrl = pll + 0;
-	const u32 coef = pll + 4;
-	u32 cntl;
-
-	cntl = nv_rd32(dev, ctrl) & 0xfffffff2;
-	if (reg->pll) {
-		nv_mask(dev, src0, 0x00000101, 0x00000101);
-		nv_wr32(dev, coef, reg->pll);
-		nv_wr32(dev, ctrl, cntl | 0x00000015);
-		nv_mask(dev, src1, 0x00000100, 0x00000000);
-		nv_mask(dev, src1, 0x00000001, 0x00000000);
-	} else {
-		nv_mask(dev, src1, 0x003f3141, 0x00000101 | reg->clk);
-		nv_wr32(dev, ctrl, cntl | 0x0000001d);
-		nv_mask(dev, ctrl, 0x00000001, 0x00000000);
-		nv_mask(dev, src0, 0x00000100, 0x00000000);
-		nv_mask(dev, src0, 0x00000001, 0x00000000);
-	}
-}
-
-static void
-prog_clk(struct drm_device *dev, int clk, struct creg *reg)
-{
-	nv_mask(dev, 0x004120 + (clk * 4), 0x003f3141, 0x00000101 | reg->clk);
-}
-
 void
 nva3_pm_clocks_set(struct drm_device *dev, void *pre_state)
 {
 	struct nva3_pm_state *info = pre_state;
 
-	prog_pll(dev, 0x004200, 0, &info->nclk);
-	prog_pll(dev, 0x004220, 1, &info->sclk);
+	prog_pll(dev, 0x00, 0x004200, &info->nclk);
+	prog_pll(dev, 0x01, 0x004220, &info->sclk);
 	prog_clk(dev, 0x20, &info->unka0);
 	prog_clk(dev, 0x21, &info->vdec);
 
 	nv_wr32(dev, 0x100210, 0);
 	nv_wr32(dev, 0x1002dc, 1);
 	nv_wr32(dev, 0x004018, 0x00001000);
-	prog_pll(dev, 0x004000, 2, &info->mclk);
+	prog_pll(dev, 0x02, 0x004000, &info->mclk);
 	if (nv_rd32(dev, 0x4000) & 0x00000008)
 		nv_wr32(dev, 0x004018, 0x1000d000);
 	else
-- 
2.30.9