Commit ada2b29c authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay

drm/amdkfd: Make doorbell size ASIC-dependent

This prepares for GFXv9 (Vega10), which has 64-bit doorbells.
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 642a0e80
...@@ -41,6 +41,7 @@ static const struct kfd_device_info kaveri_device_info = { ...@@ -41,6 +41,7 @@ static const struct kfd_device_info kaveri_device_info = {
.max_pasid_bits = 16, .max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */ /* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -55,6 +56,7 @@ static const struct kfd_device_info carrizo_device_info = { ...@@ -55,6 +56,7 @@ static const struct kfd_device_info carrizo_device_info = {
.max_pasid_bits = 16, .max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */ /* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -70,6 +72,7 @@ static const struct kfd_device_info hawaii_device_info = { ...@@ -70,6 +72,7 @@ static const struct kfd_device_info hawaii_device_info = {
.max_pasid_bits = 16, .max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */ /* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -83,6 +86,7 @@ static const struct kfd_device_info tonga_device_info = { ...@@ -83,6 +86,7 @@ static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA, .asic_family = CHIP_TONGA,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -96,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = { ...@@ -96,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.asic_family = CHIP_TONGA, .asic_family = CHIP_TONGA,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -109,6 +114,7 @@ static const struct kfd_device_info fiji_device_info = { ...@@ -109,6 +114,7 @@ static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI, .asic_family = CHIP_FIJI,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -122,6 +128,7 @@ static const struct kfd_device_info fiji_vf_device_info = { ...@@ -122,6 +128,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI, .asic_family = CHIP_FIJI,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -136,6 +143,7 @@ static const struct kfd_device_info polaris10_device_info = { ...@@ -136,6 +143,7 @@ static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10, .asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -149,6 +157,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { ...@@ -149,6 +157,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10, .asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
...@@ -162,6 +171,7 @@ static const struct kfd_device_info polaris11_device_info = { ...@@ -162,6 +171,7 @@ static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11, .asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16, .max_pasid_bits = 16,
.max_no_of_hqd = 24, .max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t), .ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik, .event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4, .num_of_watch_points = 4,
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
static DEFINE_IDA(doorbell_ida); static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices; static unsigned int max_doorbell_slices;
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
/* /*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that * Each device exposes a doorbell aperture, a PCI MMIO aperture that
...@@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices; ...@@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
*/ */
/* # of doorbell bytes allocated for each process. */ /* # of doorbell bytes allocated for each process. */
static inline size_t doorbell_process_allocation(void) static size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{ {
return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * return roundup(kfd->device_info->doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE); PAGE_SIZE);
} }
...@@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) ...@@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
doorbell_start_offset = doorbell_start_offset =
roundup(kfd->shared_resources.doorbell_start_offset, roundup(kfd->shared_resources.doorbell_start_offset,
doorbell_process_allocation()); kfd_doorbell_process_slice(kfd));
doorbell_aperture_size = doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size, rounddown(kfd->shared_resources.doorbell_aperture_size,
doorbell_process_allocation()); kfd_doorbell_process_slice(kfd));
if (doorbell_aperture_size > doorbell_start_offset) if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit = doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) / (doorbell_aperture_size - doorbell_start_offset) /
doorbell_process_allocation(); kfd_doorbell_process_slice(kfd);
else else
return -ENOSPC; return -ENOSPC;
...@@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) ...@@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
doorbell_process_allocation()); kfd_doorbell_process_slice(kfd));
if (!kfd->doorbell_kernel_ptr) if (!kfd->doorbell_kernel_ptr)
return -ENOMEM; return -ENOMEM;
...@@ -132,16 +131,16 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) ...@@ -132,16 +131,16 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
phys_addr_t address; phys_addr_t address;
struct kfd_dev *dev; struct kfd_dev *dev;
/* Find kfd device according to gpu id */
dev = kfd_device_by_id(vma->vm_pgoff);
if (!dev)
return -EINVAL;
/* /*
* For simplicitly we only allow mapping of the entire doorbell * For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process. * allocation of a single device & process.
*/ */
if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Find kfd device according to gpu id */
dev = kfd_device_by_id(vma->vm_pgoff);
if (!dev)
return -EINVAL; return -EINVAL;
/* Calculate physical address of doorbell */ /* Calculate physical address of doorbell */
...@@ -158,19 +157,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) ...@@ -158,19 +157,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
" vm_flags == 0x%04lX\n" " vm_flags == 0x%04lX\n"
" size == 0x%04lX\n", " size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags, (unsigned long long) vma->vm_start, address, vma->vm_flags,
doorbell_process_allocation()); kfd_doorbell_process_slice(dev));
return io_remap_pfn_range(vma, return io_remap_pfn_range(vma,
vma->vm_start, vma->vm_start,
address >> PAGE_SHIFT, address >> PAGE_SHIFT,
doorbell_process_allocation(), kfd_doorbell_process_slice(dev),
vma->vm_page_prot); vma->vm_page_prot);
} }
/* get kernel iomem pointer for a doorbell */ /* get kernel iomem pointer for a doorbell */
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off) unsigned int *doorbell_off)
{ {
u32 inx; u32 inx;
...@@ -185,6 +184,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, ...@@ -185,6 +184,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL; return NULL;
inx *= kfd->device_info->doorbell_size / sizeof(u32);
/* /*
* Calculating the kernel doorbell offset using the first * Calculating the kernel doorbell offset using the first
* doorbell page. * doorbell page.
...@@ -210,7 +211,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) ...@@ -210,7 +211,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
mutex_unlock(&kfd->doorbell_mutex); mutex_unlock(&kfd->doorbell_mutex);
} }
inline void write_kernel_doorbell(u32 __iomem *db, u32 value) void write_kernel_doorbell(void __iomem *db, u32 value)
{ {
if (db) { if (db) {
writel(value, db); writel(value, db);
...@@ -228,20 +229,21 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, ...@@ -228,20 +229,21 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
{ {
/* /*
* doorbell_id_offset accounts for doorbells taken by KGD. * doorbell_id_offset accounts for doorbells taken by KGD.
* index * doorbell_process_allocation/sizeof(u32) adjusts to * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
* the process's doorbells. * the process's doorbells. The offset returned is in dword
* units regardless of the ASIC-dependent doorbell size.
*/ */
return kfd->doorbell_id_offset + return kfd->doorbell_id_offset +
process->doorbell_index process->doorbell_index
* doorbell_process_allocation() / sizeof(u32) + * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
queue_id; queue_id * kfd->device_info->doorbell_size / sizeof(u32);
} }
uint64_t kfd_get_number_elems(struct kfd_dev *kfd) uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{ {
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
kfd->shared_resources.doorbell_start_offset) / kfd->shared_resources.doorbell_start_offset) /
doorbell_process_allocation() + 1; kfd_doorbell_process_slice(kfd) + 1;
return num_of_elems; return num_of_elems;
...@@ -251,7 +253,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, ...@@ -251,7 +253,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process) struct kfd_process *process)
{ {
return dev->doorbell_base + return dev->doorbell_base +
process->doorbell_index * doorbell_process_allocation(); process->doorbell_index * kfd_doorbell_process_slice(dev);
} }
int kfd_alloc_process_doorbells(struct kfd_process *process) int kfd_alloc_process_doorbells(struct kfd_process *process)
......
...@@ -160,6 +160,7 @@ struct kfd_device_info { ...@@ -160,6 +160,7 @@ struct kfd_device_info {
const struct kfd_event_interrupt_class *event_interrupt_class; const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits; unsigned int max_pasid_bits;
unsigned int max_no_of_hqd; unsigned int max_no_of_hqd;
unsigned int doorbell_size;
size_t ih_ring_entry_size; size_t ih_ring_entry_size;
uint8_t num_of_watch_points; uint8_t num_of_watch_points;
uint16_t mqd_size_aligned; uint16_t mqd_size_aligned;
...@@ -364,7 +365,7 @@ struct queue_properties { ...@@ -364,7 +365,7 @@ struct queue_properties {
uint32_t queue_percent; uint32_t queue_percent;
uint32_t *read_ptr; uint32_t *read_ptr;
uint32_t *write_ptr; uint32_t *write_ptr;
uint32_t __iomem *doorbell_ptr; void __iomem *doorbell_ptr;
uint32_t doorbell_off; uint32_t doorbell_off;
bool is_interop; bool is_interop;
bool is_evicted; bool is_evicted;
...@@ -728,11 +729,11 @@ void kfd_pasid_free(unsigned int pasid); ...@@ -728,11 +729,11 @@ void kfd_pasid_free(unsigned int pasid);
int kfd_doorbell_init(struct kfd_dev *kfd); int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd);
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off); unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db); u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(u32 __iomem *db, u32 value); void write_kernel_doorbell(void __iomem *db, u32 value);
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
struct kfd_process *process, struct kfd_process *process,
unsigned int queue_id); unsigned int queue_id);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment