Commit da361dd1 authored by shaoyunl's avatar shaoyunl Committed by Alex Deucher

drm/amdgpu: Implement get num of hops between two xgmi device

KFD need to provide the info for upper level to determine the data path
Signed-off-by: default avatarshaoyunl <shaoyun.liu@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d8e408a8
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "amdgpu_gfx.h" #include "amdgpu_gfx.h"
#include <linux/module.h> #include <linux/module.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
static const unsigned int compute_vmid_bitmap = 0xFF00; static const unsigned int compute_vmid_bitmap = 0xFF00;
...@@ -518,6 +519,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) ...@@ -518,6 +519,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
return adev->gmc.xgmi.hive_id; return adev->gmc.xgmi.hive_id;
} }
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
{
struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
struct amdgpu_device *adev = (struct amdgpu_device *)dst;
int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
if (ret < 0) {
DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
adev->gmc.xgmi.physical_node_id,
peer_adev->gmc.xgmi.physical_node_id, ret);
ret = 0;
}
return (uint8_t)ret;
}
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
{ {
......
...@@ -170,6 +170,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, ...@@ -170,6 +170,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
#define read_user_wptr(mmptr, wptr, dst) \ #define read_user_wptr(mmptr, wptr, dst) \
({ \ ({ \
......
...@@ -95,12 +95,26 @@ struct psp_funcs ...@@ -95,12 +95,26 @@ struct psp_funcs
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
}; };
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
struct psp_xgmi_context { struct psp_xgmi_context {
uint8_t initialized; uint8_t initialized;
uint32_t session_id; uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo; struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr; uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf; void *xgmi_shared_buf;
struct psp_xgmi_topology_info top_info;
}; };
struct psp_ras_context { struct psp_ras_context {
...@@ -181,18 +195,6 @@ struct amdgpu_psp_funcs { ...@@ -181,18 +195,6 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID); enum AMDGPU_UCODE_ID);
}; };
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
......
...@@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev ...@@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
/* Each psp need to set the latest topology */ /* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp, ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices, hive->number_devices,
&hive->topology_info); &adev->psp.xgmi_context.top_info);
if (ret) if (ret)
dev_err(adev->dev, dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d", "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
...@@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev ...@@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
return ret; return ret;
} }
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
return top->nodes[i].num_hops;
return -EINVAL;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev) int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{ {
struct psp_xgmi_topology_info *hive_topology; struct psp_xgmi_topology_info *top_info;
struct amdgpu_hive_info *hive; struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry; struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL; struct amdgpu_device *tmp_adev = NULL;
...@@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) ...@@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit; goto exit;
} }
hive_topology = &hive->topology_info; top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head) list_for_each_entry(entry, &hive->device_list, head)
hive_topology->nodes[count++].node_id = entry->node_id; top_info->nodes[count++].node_id = entry->node_id;
hive->number_devices = count; hive->number_devices = count;
/* Each psp need to get the latest topology */ /* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info);
if (ret) { if (ret) {
dev_err(tmp_adev->dev, dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d", "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
struct amdgpu_hive_info { struct amdgpu_hive_info {
uint64_t hive_id; uint64_t hive_id;
struct list_head device_list; struct list_head device_list;
struct psp_xgmi_topology_info topology_info;
int number_devices; int number_devices;
struct mutex hive_lock, reset_lock; struct mutex hive_lock, reset_lock;
struct kobject *kobj; struct kobject *kobj;
...@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev ...@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev) struct amdgpu_device *bo_adev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment