Commit a564808e authored by xinhui pan's avatar xinhui pan Committed by Alex Deucher

drm/amdgpu: handle ras reset

add another flag to allow IP do a gpu reset after device init.
Signed-off-by: default avatarxinhui pan <xinhui.pan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7af23ebe
...@@ -119,6 +119,7 @@ const char *ras_block_string[] = { ...@@ -119,6 +119,7 @@ const char *ras_block_string[] = {
#define ras_block_str(i) (ras_block_string[i]) #define ras_block_str(i) (ras_block_string[i])
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
static void amdgpu_ras_self_test(struct amdgpu_device *adev) static void amdgpu_ras_self_test(struct amdgpu_device *adev)
...@@ -1358,6 +1359,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) ...@@ -1358,6 +1359,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
} }
/* recovery end */ /* recovery end */
/* return 0 if ras will reset gpu and repost.*/
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
unsigned int block)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (!ras)
return -EINVAL;
ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
return 0;
}
/* /*
* check hardware's ras ability which will be saved in hw_supported. * check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and * if hardware does not support ras, we can skip some ras initializtion and
...@@ -1433,7 +1447,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev) ...@@ -1433,7 +1447,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
return -EINVAL; return -EINVAL;
} }
/* do some init work after IP late init as dependence */ /* do some init work after IP late init as dependence.
* TODO
* gpu reset will re-enable ras, need fint out one way to run it again.
* for now, if a gpu reset happened, unless IP enable its ras, the ras state
* will be showed as disabled.
*/
void amdgpu_ras_post_init(struct amdgpu_device *adev) void amdgpu_ras_post_init(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
...@@ -1462,6 +1481,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) ...@@ -1462,6 +1481,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
} }
} }
} }
if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
/* setup ras obj state as disabled.
* for init_by_vbios case.
* if we want to enable ras, just enable it in a normal way.
* If we want do disable it, need setup ras obj as enabled,
* then issue another TA disable cmd.
* See feature_enable_on_boot
*/
amdgpu_ras_disable_all_features(adev, 1);
amdgpu_ras_reset_gpu(adev, 0);
}
} }
/* do some fini work before IP fini as dependence */ /* do some fini work before IP fini as dependence */
......
...@@ -175,6 +175,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, ...@@ -175,6 +175,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
return ras && (ras->supported & (1 << block)); return ras && (ras->supported & (1 << block));
} }
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
unsigned int block);
int amdgpu_ras_query_error_count(struct amdgpu_device *adev, int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce); bool is_ce);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment