Commit 58361aae authored by Oded Gabbay's avatar Oded Gabbay

habanalabs: set max power according to card type

In Gaudi, the default max power setting is different between PCI and PMC
cards. Therefore, the driver need to set the default after knowing what is
the card type.

The current code has a bug where it limits the maximum power of the PMC
card to 200W after a reset occurs.
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 36545279
...@@ -1069,7 +1069,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -1069,7 +1069,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
goto out_err; goto out_err;
} }
hl_set_max_power(hdev, hdev->max_power); hl_set_max_power(hdev);
} else { } else {
rc = hdev->asic_funcs->soft_reset_late_init(hdev); rc = hdev->asic_funcs->soft_reset_late_init(hdev);
if (rc) { if (rc) {
...@@ -1318,6 +1318,11 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1318,6 +1318,11 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto out_disabled; goto out_disabled;
} }
/* Need to call this again because the max power might change,
* depending on card type for certain ASICs
*/
hl_set_max_power(hdev);
/* /*
* hl_hwmon_init() must be called after device_late_init(), because only * hl_hwmon_init() must be called after device_late_init(), because only
* there we get the information from the device about which * there we get the information from the device about which
......
...@@ -1462,6 +1462,8 @@ struct hl_device_idle_busy_ts { ...@@ -1462,6 +1462,8 @@ struct hl_device_idle_busy_ts {
* details. * details.
* @in_reset: is device in reset flow. * @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile. * @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
* type of the current device.
* @cs_active_cnt: number of active command submissions on this device (active * @cs_active_cnt: number of active command submissions on this device (active
* means already in H/W queues) * means already in H/W queues)
* @major: habanalabs kernel driver major. * @major: habanalabs kernel driver major.
...@@ -1566,6 +1568,7 @@ struct hl_device { ...@@ -1566,6 +1568,7 @@ struct hl_device {
u64 clock_gating_mask; u64 clock_gating_mask;
atomic_t in_reset; atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile; enum hl_pll_frequency curr_pll_profile;
enum armcp_card_types card_type;
int cs_active_cnt; int cs_active_cnt;
u32 major; u32 major;
u32 high_pll; u32 high_pll;
...@@ -1858,7 +1861,7 @@ int hl_get_pwm_info(struct hl_device *hdev, ...@@ -1858,7 +1861,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value); long value);
u64 hl_get_max_power(struct hl_device *hdev); u64 hl_get_max_power(struct hl_device *hdev);
void hl_set_max_power(struct hl_device *hdev, u64 value); void hl_set_max_power(struct hl_device *hdev);
int hl_set_voltage(struct hl_device *hdev, int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value); int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev, int hl_set_current(struct hl_device *hdev,
......
...@@ -81,7 +81,7 @@ u64 hl_get_max_power(struct hl_device *hdev) ...@@ -81,7 +81,7 @@ u64 hl_get_max_power(struct hl_device *hdev)
return result; return result;
} }
void hl_set_max_power(struct hl_device *hdev, u64 value) void hl_set_max_power(struct hl_device *hdev)
{ {
struct armcp_packet pkt; struct armcp_packet pkt;
int rc; int rc;
...@@ -90,7 +90,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) ...@@ -90,7 +90,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(value); pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL); 0, NULL);
...@@ -316,7 +316,7 @@ static ssize_t max_power_store(struct device *dev, ...@@ -316,7 +316,7 @@ static ssize_t max_power_store(struct device *dev,
} }
hdev->max_power = value; hdev->max_power = value;
hl_set_max_power(hdev, value); hl_set_max_power(hdev);
out: out:
return count; return count;
...@@ -422,6 +422,7 @@ int hl_sysfs_init(struct hl_device *hdev) ...@@ -422,6 +422,7 @@ int hl_sysfs_init(struct hl_device *hdev)
hdev->pm_mng_profile = PM_AUTO; hdev->pm_mng_profile = PM_AUTO;
else else
hdev->pm_mng_profile = PM_MANUAL; hdev->pm_mng_profile = PM_MANUAL;
hdev->max_power = hdev->asic_prop.max_power_default; hdev->max_power = hdev->asic_prop.max_power_default;
hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
......
...@@ -456,7 +456,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) ...@@ -456,7 +456,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->num_of_events = GAUDI_EVENT_SIZE; prop->num_of_events = GAUDI_EVENT_SIZE;
prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->tpc_enabled_mask = TPC_ENABLED_MASK;
prop->max_power_default = MAX_POWER_DEFAULT; prop->max_power_default = MAX_POWER_DEFAULT_PCI;
prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
...@@ -6055,6 +6055,15 @@ static int gaudi_armcp_info_get(struct hl_device *hdev) ...@@ -6055,6 +6055,15 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN); CARD_NAME_MAX_LEN);
hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type);
if (hdev->card_type == armcp_card_type_pci)
prop->max_power_default = MAX_POWER_DEFAULT_PCI;
else if (hdev->card_type == armcp_card_type_pmc)
prop->max_power_default = MAX_POWER_DEFAULT_PMC;
hdev->max_power = prop->max_power_default;
return 0; return 0;
} }
......
...@@ -41,7 +41,8 @@ ...@@ -41,7 +41,8 @@
#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */ #define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */
#define MAX_POWER_DEFAULT 200000 /* 200W */ #define MAX_POWER_DEFAULT_PCI 200000 /* 200W */
#define MAX_POWER_DEFAULT_PMC 350000 /* 350W */
#define GAUDI_CPU_TIMEOUT_USEC 15000000 /* 15s */ #define GAUDI_CPU_TIMEOUT_USEC 15000000 /* 15s */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment