Commit 8f3013e0 authored by Himal Prasad Ghimiray's avatar Himal Prasad Ghimiray Committed by Rodrigo Vivi

drm/xe: Introduce fault injection for gt reset

To trigger gt reset failure:
 echo 100 >  /sys/kernel/debug/dri/<cardX>/fail_gt_reset/probability
 echo 2 >  /sys/kernel/debug/dri/<cardX>/fail_gt_reset/times

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarHimal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 4f027e30
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "xe_debugfs.h" #include "xe_debugfs.h"
#include <linux/fault-inject.h>
#include <linux/string_helpers.h> #include <linux/string_helpers.h>
#include <drm/drm_debugfs.h> #include <drm/drm_debugfs.h>
...@@ -20,6 +21,10 @@ ...@@ -20,6 +21,10 @@
#include "xe_vm.h" #include "xe_vm.h"
#endif #endif
#ifdef CONFIG_FAULT_INJECTION
DECLARE_FAULT_ATTR(gt_reset_failure);
#endif
static struct xe_device *node_to_xe(struct drm_info_node *node) static struct xe_device *node_to_xe(struct drm_info_node *node)
{ {
return to_xe_device(node->minor->dev); return to_xe_device(node->minor->dev);
...@@ -135,4 +140,9 @@ void xe_debugfs_register(struct xe_device *xe) ...@@ -135,4 +140,9 @@ void xe_debugfs_register(struct xe_device *xe)
for_each_gt(gt, xe, id) for_each_gt(gt, xe, id)
xe_gt_debugfs_register(gt); xe_gt_debugfs_register(gt);
#ifdef CONFIG_FAULT_INJECTION
fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
#endif
} }
...@@ -524,6 +524,11 @@ static int gt_reset(struct xe_gt *gt) ...@@ -524,6 +524,11 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_info(gt, "reset started\n"); xe_gt_info(gt, "reset started\n");
if (xe_fault_inject_gt_reset()) {
err = -ECANCELED;
goto err_fail;
}
xe_gt_sanitize(gt); xe_gt_sanitize(gt);
xe_device_mem_access_get(gt_to_xe(gt)); xe_device_mem_access_get(gt_to_xe(gt));
...@@ -562,6 +567,7 @@ static int gt_reset(struct xe_gt *gt) ...@@ -562,6 +567,7 @@ static int gt_reset(struct xe_gt *gt)
err_msg: err_msg:
XE_WARN_ON(xe_uc_start(&gt->uc)); XE_WARN_ON(xe_uc_start(&gt->uc));
xe_device_mem_access_put(gt_to_xe(gt)); xe_device_mem_access_put(gt_to_xe(gt));
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
/* Notify userspace about gt reset failure */ /* Notify userspace about gt reset failure */
...@@ -583,7 +589,7 @@ void xe_gt_reset_async(struct xe_gt *gt) ...@@ -583,7 +589,7 @@ void xe_gt_reset_async(struct xe_gt *gt)
xe_gt_info(gt, "trying reset\n"); xe_gt_info(gt, "trying reset\n");
/* Don't do a reset while one is already in flight */ /* Don't do a reset while one is already in flight */
if (xe_uc_reset_prepare(&gt->uc)) if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
return; return;
xe_gt_info(gt, "reset queued\n"); xe_gt_info(gt, "reset queued\n");
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#define _XE_GT_H_ #define _XE_GT_H_
#include <drm/drm_util.h> #include <drm/drm_util.h>
#include <linux/fault-inject.h>
#include "xe_device_types.h" #include "xe_device_types.h"
#include "xe_hw_engine.h" #include "xe_hw_engine.h"
...@@ -16,6 +17,19 @@ ...@@ -16,6 +17,19 @@
for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
xe_hw_engine_is_valid((hwe__))) xe_hw_engine_is_valid((hwe__)))
#ifdef CONFIG_FAULT_INJECTION
extern struct fault_attr gt_reset_failure;
static inline bool xe_fault_inject_gt_reset(void)
{
return should_fail(&gt_reset_failure, 1);
}
#else
static inline bool xe_fault_inject_gt_reset(void)
{
return false;
}
#endif
struct xe_gt *xe_gt_alloc(struct xe_tile *tile); struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment