Commit 6c245386 authored by yipechai's avatar yipechai Committed by Alex Deucher

drm/amdgpu: Modify xgmi block to fit for the unified ras block data and ops

1.Modify gmc block to fit for the unified ras block data and ops.
2.Change amdgpu_xgmi_ras_funcs to amdgpu_xgmi_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of gmc ras variable so that gmc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register gmc ras block into amdgpu device ras block link list.
5.Remove the redundant code about gmc in amdgpu_ras.c after using the unified ras block.
Signed-off-by: default avataryipechai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarJohn Clements <john.clements@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8b0fb0e9
...@@ -454,12 +454,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) ...@@ -454,12 +454,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r; return r;
} }
if (!adev->gmc.xgmi.connected_to_cpu) if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs; adev->gmc.xgmi.ras = &xgmi_ras;
amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
}
if (adev->gmc.xgmi.ras_funcs && if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
adev->gmc.xgmi.ras_funcs->ras_late_init) { r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
if (r) if (r)
return r; return r;
} }
...@@ -505,9 +506,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) ...@@ -505,9 +506,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->mmhub.ras_funcs->ras_fini) adev->mmhub.ras_funcs->ras_fini)
adev->mmhub.ras_funcs->ras_fini(adev); adev->mmhub.ras_funcs->ras_fini(adev);
if (adev->gmc.xgmi.ras_funcs && if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini) adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
if (adev->hdp.ras_funcs && if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->ras_fini) adev->hdp.ras_funcs->ras_fini)
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/types.h> #include <linux/types.h>
#include "amdgpu_irq.h" #include "amdgpu_irq.h"
#include "amdgpu_ras.h"
/* VA hole for 48bit addresses on Vega10 */ /* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL #define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
...@@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs { ...@@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs {
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
}; };
struct amdgpu_xgmi_ras_funcs { struct amdgpu_xgmi_ras {
int (*ras_late_init)(struct amdgpu_device *adev); struct amdgpu_ras_block_object ras_block;
void (*ras_fini)(struct amdgpu_device *adev);
int (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
}; };
struct amdgpu_xgmi { struct amdgpu_xgmi {
...@@ -159,7 +156,7 @@ struct amdgpu_xgmi { ...@@ -159,7 +156,7 @@ struct amdgpu_xgmi {
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
bool connected_to_cpu; bool connected_to_cpu;
bool pending_reset; bool pending_reset;
const struct amdgpu_xgmi_ras_funcs *ras_funcs; struct amdgpu_xgmi_ras *ras;
}; };
struct amdgpu_gmc { struct amdgpu_gmc {
......
...@@ -1012,9 +1012,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -1012,9 +1012,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data); adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL: case AMDGPU_RAS_BLOCK__XGMI_WAFL:
if (adev->gmc.xgmi.ras_funcs && if (!block_obj || !block_obj->hw_ops) {
adev->gmc.xgmi.ras_funcs->query_ras_error_count) dev_info(adev->dev, "%s doesn't config ras function \n",
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data); get_ras_block_str(&info->head));
return -EINVAL;
}
if (block_obj->hw_ops->query_ras_error_count)
block_obj->hw_ops->query_ras_error_count(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__HDP: case AMDGPU_RAS_BLOCK__HDP:
if (adev->hdp.ras_funcs && if (adev->hdp.ras_funcs &&
......
...@@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) ...@@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return psp_xgmi_terminate(&adev->psp); return psp_xgmi_terminate(&adev->psp);
} }
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info)
{ {
int r; int r;
struct ras_ih_if ih_info = { struct ras_ih_if ih_info = {
...@@ -746,7 +746,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) ...@@ -746,7 +746,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
adev->gmc.xgmi.num_physical_nodes == 0) adev->gmc.xgmi.num_physical_nodes == 0)
return 0; return 0;
adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
if (!adev->gmc.xgmi.ras_if) { if (!adev->gmc.xgmi.ras_if) {
adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
...@@ -865,7 +865,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, ...@@ -865,7 +865,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
return 0; return 0;
} }
static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status) void *ras_error_status)
{ {
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
...@@ -874,7 +874,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, ...@@ -874,7 +874,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
uint32_t ue_cnt = 0, ce_cnt = 0; uint32_t ue_cnt = 0, ce_cnt = 0;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
return -EINVAL; return ;
err_data->ue_count = 0; err_data->ue_count = 0;
err_data->ce_count = 0; err_data->ce_count = 0;
...@@ -940,17 +940,23 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, ...@@ -940,17 +940,23 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
break; break;
} }
adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
err_data->ue_count += ue_cnt; err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt; err_data->ce_count += ce_cnt;
return 0;
} }
const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = { struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
.ras_late_init = amdgpu_xgmi_ras_late_init,
.ras_fini = amdgpu_xgmi_ras_fini,
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count, .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
}; };
struct amdgpu_xgmi_ras xgmi_ras = {
.ras_block = {
.name = "xgmi",
.block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
.hw_ops = &xgmi_ras_hw_ops,
.ras_late_init = amdgpu_xgmi_ras_late_init,
.ras_fini = amdgpu_xgmi_ras_fini,
},
};
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include <drm/task_barrier.h> #include <drm/task_barrier.h>
#include "amdgpu_psp.h" #include "amdgpu_psp.h"
#include "amdgpu_ras.h"
struct amdgpu_hive_info { struct amdgpu_hive_info {
struct kobject kobj; struct kobject kobj;
...@@ -50,7 +50,7 @@ struct amdgpu_pcs_ras_field { ...@@ -50,7 +50,7 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift; uint32_t pcs_err_shift;
}; };
extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs; extern struct amdgpu_xgmi_ras xgmi_ras;
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment