Commit fcd1b2b9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drm-fixes-2022-07-15' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "This is the regular fixes pull for this week. This has a bunch of
  amdgpu fixes, major one reverts the buddy allocator until it can be
  tested more, otherwise just small ones, then i915 has a bunch of
  fixes.

  The outstanding firmware regressions reported by phoronix will
  hopefully be dealt with ASAP.

  amdgpu:
   - revert buddy allocator support for now
   - DP MST blank screen fix for specific platforms
   - MEC firmware check fix for GC 10.3.7
   - Deep color fix for DCE
   - Fix possible divide by 0
   - Coverage blend mode fix
   - Fix cursor only commit timestamps

  i915:
   - Selftest fix
   - TTM fix sg_table construction
   - Error return fixes
   - Fix a performance regression related to waitboost
   - Fix GT resets"

* tag 'drm-fixes-2022-07-15' of git://anongit.freedesktop.org/drm/drm:
  drm/amd/display: Ensure valid event timestamp for cursor-only commits
  drm/amd/display: correct check of coverage blend mode
  drm/amd/pm: Prevent divide by zero
  drm/amd/display: Only use depth 36 bpp linebuffers on DCN display engines.
  drm/amdkfd: correct the MEC atomic support firmware checking for GC 10.3.7
  drm/amd/display: Ignore First MST Sideband Message Return Error
  drm/i915/selftests: fix subtraction overflow bug
  drm/i915/gem: Look for waitboosting across the whole object prior to individual waits
  drm/i915/gt: Serialize TLB invalidates with GT resets
  drm/i915/gt: Serialize GRDOM access between multiple engine resets
  drm/i915/ttm: fix sg_table construction
  drm/i915/selftests: fix a couple IS_ERR() vs NULL tests
  drm/i915: Fix vm use-after-free in vma destruction
  drm/i915/guc: ADL-N should use the same GuC FW as ADL-S
  drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector()
  drm/i915/gvt: IS_ERR() vs NULL bug in intel_gvt_update_reg_whitelist()
  Revert "drm/amdgpu: add drm buddy support to amdgpu"
parents 862161e8 093f8d8f
......@@ -256,7 +256,6 @@ config DRM_AMDGPU
select HWMON
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
select DRM_BUDDY
help
Choose this option if you have a recent AMD Radeon graphics card.
......
......@@ -30,15 +30,12 @@
#include <drm/ttm/ttm_resource.h>
#include <drm/ttm/ttm_range_manager.h>
#include "amdgpu_vram_mgr.h"
/* state back for walking over vram_mgr and gtt_mgr allocations */
struct amdgpu_res_cursor {
uint64_t start;
uint64_t size;
uint64_t remaining;
void *node;
uint32_t mem_type;
struct drm_mm_node *node;
};
/**
......@@ -55,41 +52,19 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
{
struct drm_buddy_block *block;
struct list_head *head, *next;
struct drm_mm_node *node;
if (!res)
goto fallback;
if (!res || res->mem_type == TTM_PL_SYSTEM) {
cur->start = start;
cur->size = size;
cur->remaining = size;
cur->node = NULL;
WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
return;
}
BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
cur->mem_type = res->mem_type;
switch (cur->mem_type) {
case TTM_PL_VRAM:
head = &to_amdgpu_vram_mgr_resource(res)->blocks;
block = list_first_entry_or_null(head,
struct drm_buddy_block,
link);
if (!block)
goto fallback;
while (start >= amdgpu_vram_mgr_block_size(block)) {
start -= amdgpu_vram_mgr_block_size(block);
next = block->link.next;
if (next != head)
block = list_entry(next, struct drm_buddy_block, link);
}
cur->start = amdgpu_vram_mgr_block_start(block) + start;
cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
cur->remaining = size;
cur->node = block;
break;
case TTM_PL_TT:
node = to_ttm_range_mgr_node(res)->mm_nodes;
while (start >= node->size << PAGE_SHIFT)
start -= node++->size << PAGE_SHIFT;
......@@ -98,20 +73,6 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
cur->size = min((node->size << PAGE_SHIFT) - start, size);
cur->remaining = size;
cur->node = node;
break;
default:
goto fallback;
}
return;
fallback:
cur->start = start;
cur->size = size;
cur->remaining = size;
cur->node = NULL;
WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
return;
}
/**
......@@ -124,9 +85,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
*/
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
{
struct drm_buddy_block *block;
struct drm_mm_node *node;
struct list_head *next;
struct drm_mm_node *node = cur->node;
BUG_ON(size > cur->remaining);
......@@ -140,27 +99,9 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
return;
}
switch (cur->mem_type) {
case TTM_PL_VRAM:
block = cur->node;
next = block->link.next;
block = list_entry(next, struct drm_buddy_block, link);
cur->node = block;
cur->start = amdgpu_vram_mgr_block_start(block);
cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
break;
case TTM_PL_TT:
node = cur->node;
cur->node = ++node;
cur->start = node->start << PAGE_SHIFT;
cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
break;
default:
return;
}
}
#endif
......@@ -26,7 +26,6 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
#include "amdgpu_vram_mgr.h"
#include "amdgpu.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
......@@ -39,6 +38,15 @@
#define AMDGPU_POISON 0xd0bed0be
struct amdgpu_vram_mgr {
struct ttm_resource_manager manager;
struct drm_mm mm;
spinlock_t lock;
struct list_head reservations_pending;
struct list_head reserved_pages;
atomic64_t vis_usage;
};
struct amdgpu_gtt_mgr {
struct ttm_resource_manager manager;
struct drm_mm mm;
......
......@@ -32,10 +32,8 @@
#include "atom.h"
struct amdgpu_vram_reservation {
u64 start;
u64 size;
struct list_head allocated;
struct list_head blocks;
struct list_head node;
struct drm_mm_node mm_node;
};
static inline struct amdgpu_vram_mgr *
......@@ -188,18 +186,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
};
/**
* amdgpu_vram_mgr_vis_size - Calculate visible block size
* amdgpu_vram_mgr_vis_size - Calculate visible node size
*
* @adev: amdgpu_device pointer
* @block: DRM BUDDY block structure
* @node: MM node structure
*
* Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
* Calculate how many bytes of the MM node are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
struct drm_buddy_block *block)
struct drm_mm_node *node)
{
u64 start = amdgpu_vram_mgr_block_start(block);
u64 end = start + amdgpu_vram_mgr_block_size(block);
uint64_t start = node->start << PAGE_SHIFT;
uint64_t end = (node->size + node->start) << PAGE_SHIFT;
if (start >= adev->gmc.visible_vram_size)
return 0;
......@@ -220,9 +218,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct drm_buddy_block *block;
u64 usage = 0;
unsigned pages = res->num_pages;
struct drm_mm_node *mm;
u64 usage;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
return amdgpu_bo_size(bo);
......@@ -230,8 +228,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
return 0;
list_for_each_entry(block, &vres->blocks, link)
usage += amdgpu_vram_mgr_vis_size(adev, block);
mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
for (usage = 0; pages; pages -= mm->size, mm++)
usage += amdgpu_vram_mgr_vis_size(adev, mm);
return usage;
}
......@@ -241,30 +240,23 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct drm_buddy *mm = &mgr->mm;
struct drm_mm *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
struct drm_buddy_block *block;
uint64_t vis_usage;
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
rsv->size, mm->chunk_size, &rsv->allocated,
DRM_BUDDY_RANGE_ALLOCATION))
continue;
block = amdgpu_vram_mgr_first_block(&rsv->allocated);
if (!block)
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
if (drm_mm_reserve_node(mm, &rsv->mm_node))
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
rsv->start, rsv->size);
rsv->mm_node.start, rsv->mm_node.size);
vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
atomic64_add(vis_usage, &mgr->vis_usage);
spin_lock(&man->bdev->lru_lock);
man->usage += rsv->size;
man->usage += rsv->mm_node.size << PAGE_SHIFT;
spin_unlock(&man->bdev->lru_lock);
list_move(&rsv->blocks, &mgr->reserved_pages);
list_move(&rsv->node, &mgr->reserved_pages);
}
}
......@@ -286,16 +278,14 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
if (!rsv)
return -ENOMEM;
INIT_LIST_HEAD(&rsv->allocated);
INIT_LIST_HEAD(&rsv->blocks);
rsv->start = start;
rsv->size = size;
INIT_LIST_HEAD(&rsv->node);
rsv->mm_node.start = start >> PAGE_SHIFT;
rsv->mm_node.size = size >> PAGE_SHIFT;
mutex_lock(&mgr->lock);
list_add_tail(&rsv->blocks, &mgr->reservations_pending);
spin_lock(&mgr->lock);
list_add_tail(&rsv->node, &mgr->reservations_pending);
amdgpu_vram_mgr_do_reserve(&mgr->manager);
mutex_unlock(&mgr->lock);
spin_unlock(&mgr->lock);
return 0;
}
......@@ -317,19 +307,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
struct amdgpu_vram_reservation *rsv;
int ret;
mutex_lock(&mgr->lock);
spin_lock(&mgr->lock);
list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
if (rsv->start <= start &&
(start < (rsv->start + rsv->size))) {
list_for_each_entry(rsv, &mgr->reservations_pending, node) {
if ((rsv->mm_node.start <= start) &&
(start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = -EBUSY;
goto out;
}
}
list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
if (rsv->start <= start &&
(start < (rsv->start + rsv->size))) {
list_for_each_entry(rsv, &mgr->reserved_pages, node) {
if ((rsv->mm_node.start <= start) &&
(start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = 0;
goto out;
}
......@@ -337,10 +327,32 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
ret = -ENOENT;
out:
mutex_unlock(&mgr->lock);
spin_unlock(&mgr->lock);
return ret;
}
/**
* amdgpu_vram_mgr_virt_start - update virtual start address
*
* @mem: ttm_resource to update
* @node: just allocated node
*
* Calculate a virtual BO start address to easily check if everything is CPU
* accessible.
*/
static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
struct drm_mm_node *node)
{
unsigned long start;
start = node->start + node->size;
if (start > mem->num_pages)
start -= mem->num_pages;
else
start = 0;
mem->start = max(mem->start, start);
}
/**
* amdgpu_vram_mgr_new - allocate new ranges
*
......@@ -356,44 +368,46 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource **res)
{
u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block;
unsigned long pages_per_block;
uint64_t vis_usage = 0, mem_bytes, max_bytes;
struct ttm_range_mgr_node *node;
struct drm_mm *mm = &mgr->mm;
enum drm_mm_insert_mode mode;
unsigned i;
int r;
lpfn = place->lpfn << PAGE_SHIFT;
lpfn = place->lpfn;
if (!lpfn)
lpfn = man->size;
fpfn = place->fpfn << PAGE_SHIFT;
lpfn = man->size >> PAGE_SHIFT;
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
mem_bytes = tbo->base.size;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
pages_per_block = ~0ul;
pages_per_node = ~0ul;
num_nodes = 1;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pages_per_block = HPAGE_PMD_NR;
pages_per_node = HPAGE_PMD_NR;
#else
/* default to 2MB */
pages_per_block = 2UL << (20UL - PAGE_SHIFT);
pages_per_node = 2UL << (20UL - PAGE_SHIFT);
#endif
pages_per_block = max_t(uint32_t, pages_per_block,
pages_per_node = max_t(uint32_t, pages_per_node,
tbo->page_alignment);
num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
}
vres = kzalloc(sizeof(*vres), GFP_KERNEL);
if (!vres)
node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
GFP_KERNEL | __GFP_ZERO);
if (!node)
return -ENOMEM;
ttm_resource_init(tbo, place, &vres->base);
ttm_resource_init(tbo, place, &node->base);
/* bail out quickly if there's likely not enough VRAM for this BO */
if (ttm_resource_manager_usage(man) > max_bytes) {
......@@ -401,130 +415,66 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
goto error_fini;
}
INIT_LIST_HEAD(&vres->blocks);
mode = DRM_MM_INSERT_BEST;
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
if (fpfn || lpfn != man->size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
remaining_size = vres->base.num_pages << PAGE_SHIFT;
mutex_lock(&mgr->lock);
while (remaining_size) {
if (tbo->page_alignment)
min_block_size = tbo->page_alignment << PAGE_SHIFT;
else
min_block_size = mgr->default_page_size;
mode = DRM_MM_INSERT_HIGH;
BUG_ON(min_block_size < mm->chunk_size);
pages_left = node->base.num_pages;
/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
/* Limit maximum size to 2GB due to SG table limitations */
pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
if (size >= pages_per_block << PAGE_SHIFT)
min_block_size = pages_per_block << PAGE_SHIFT;
i = 0;
spin_lock(&mgr->lock);
while (pages_left) {
uint32_t alignment = tbo->page_alignment;
cur_size = size;
if (fpfn + size != place->lpfn << PAGE_SHIFT) {
/*
* Except for actual range allocation, modify the size and
* min_block_size conforming to continuous flag enablement
*/
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
size = roundup_pow_of_two(size);
min_block_size = size;
/*
* Modify the size value if size is not
* aligned with min_block_size
*/
} else if (!IS_ALIGNED(size, min_block_size)) {
size = round_up(size, min_block_size);
}
}
if (pages >= pages_per_node)
alignment = pages_per_node;
r = drm_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
min_block_size,
&vres->blocks,
vres->flags);
if (unlikely(r))
goto error_free_blocks;
if (size > remaining_size)
remaining_size = 0;
r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
alignment, 0, place->fpfn,
lpfn, mode);
if (unlikely(r)) {
if (pages > pages_per_node) {
if (is_power_of_2(pages))
pages = pages / 2;
else
remaining_size -= size;
}
mutex_unlock(&mgr->lock);
if (cur_size != size) {
struct drm_buddy_block *block;
struct list_head *trim_list;
u64 original_size;
LIST_HEAD(temp);
trim_list = &vres->blocks;
original_size = vres->base.num_pages << PAGE_SHIFT;
/*
* If size value is rounded up to min_block_size, trim the last
* block to the required size
*/
if (!list_is_singular(&vres->blocks)) {
block = list_last_entry(&vres->blocks, typeof(*block), link);
list_move_tail(&block->link, &temp);
trim_list = &temp;
/*
* Compute the original_size value by subtracting the
* last block size with (aligned size - original size)
*/
original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
pages = rounddown_pow_of_two(pages);
continue;
}
mutex_lock(&mgr->lock);
drm_buddy_block_trim(mm,
original_size,
trim_list);
mutex_unlock(&mgr->lock);
if (!list_empty(&temp))
list_splice_tail(trim_list, &vres->blocks);
goto error_free;
}
list_for_each_entry(block, &vres->blocks, link)
vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
pages_left -= pages;
++i;
block = amdgpu_vram_mgr_first_block(&vres->blocks);
if (!block) {
r = -EINVAL;
goto error_fini;
if (pages > pages_left)
pages = pages_left;
}
spin_unlock(&mgr->lock);
vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
if (i == 1)
node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
if (adev->gmc.xgmi.connected_to_cpu)
vres->base.bus.caching = ttm_cached;
node->base.bus.caching = ttm_cached;
else
vres->base.bus.caching = ttm_write_combined;
node->base.bus.caching = ttm_write_combined;
atomic64_add(vis_usage, &mgr->vis_usage);
*res = &vres->base;
*res = &node->base;
return 0;
error_free_blocks:
drm_buddy_free_list(mm, &vres->blocks);
mutex_unlock(&mgr->lock);
error_free:
while (i--)
drm_mm_remove_node(&node->mm_nodes[i]);
spin_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
kfree(vres);
ttm_resource_fini(man, &node->base);
kvfree(node);
return r;
}
......@@ -540,26 +490,27 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block;
uint64_t vis_usage = 0;
unsigned i, pages;
mutex_lock(&mgr->lock);
list_for_each_entry(block, &vres->blocks, link)
vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
spin_lock(&mgr->lock);
for (i = 0, pages = res->num_pages; pages;
pages -= node->mm_nodes[i].size, ++i) {
struct drm_mm_node *mm = &node->mm_nodes[i];
drm_mm_remove_node(mm);
vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
}
amdgpu_vram_mgr_do_reserve(man);
drm_buddy_free_list(mm, &vres->blocks);
mutex_unlock(&mgr->lock);
spin_unlock(&mgr->lock);
atomic64_sub(vis_usage, &mgr->vis_usage);
ttm_resource_fini(man, res);
kfree(vres);
kvfree(node);
}
/**
......@@ -591,7 +542,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
if (!*sgt)
return -ENOMEM;
/* Determine the number of DRM_BUDDY blocks to export */
/* Determine the number of DRM_MM nodes to export */
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
......@@ -607,10 +558,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg->length = 0;
/*
* Walk down DRM_BUDDY blocks to populate scatterlist nodes
* @note: Use iterator api to get first the DRM_BUDDY block
* Walk down DRM_MM nodes to populate scatterlist nodes
* @note: Use iterator api to get first the DRM_MM node
* and the number of bytes from it. Access the following
* DRM_BUDDY block(s) if more buffer needs to exported
* DRM_MM node(s) if more buffer needs to exported
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
......@@ -697,22 +648,13 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block;
drm_printf(printer, " vis usage:%llu\n",
amdgpu_vram_mgr_vis_usage(mgr));
mutex_lock(&mgr->lock);
drm_printf(printer, "default_page_size: %lluKiB\n",
mgr->default_page_size >> 10);
drm_buddy_print(mm, printer);
drm_printf(printer, "reserved:\n");
list_for_each_entry(block, &mgr->reserved_pages, link)
drm_buddy_block_print(mm, block, printer);
mutex_unlock(&mgr->lock);
spin_lock(&mgr->lock);
drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock);
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
......@@ -732,21 +674,16 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
int err;
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);
man->func = &amdgpu_vram_mgr_func;
err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
if (err)
return err;
mutex_init(&mgr->lock);
drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT);
spin_lock_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
mgr->default_page_size = PAGE_SIZE;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
......@@ -774,16 +711,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
if (ret)
return;
mutex_lock(&mgr->lock);
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
spin_lock(&mgr->lock);
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
drm_buddy_free_list(&mgr->mm, &rsv->blocks);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
drm_mm_remove_node(&rsv->mm_node);
kfree(rsv);
}
drm_buddy_fini(&mgr->mm);
mutex_unlock(&mgr->lock);
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
......
/* SPDX-License-Identifier: MIT
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_VRAM_MGR_H__
#define __AMDGPU_VRAM_MGR_H__
#include <drm/drm_buddy.h>
struct amdgpu_vram_mgr {
struct ttm_resource_manager manager;
struct drm_buddy mm;
/* protects access to buffer objects */
struct mutex lock;
struct list_head reservations_pending;
struct list_head reserved_pages;
atomic64_t vis_usage;
u64 default_page_size;
};
struct amdgpu_vram_mgr_resource {
struct ttm_resource base;
struct list_head blocks;
unsigned long flags;
};
static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
{
return drm_buddy_block_offset(block);
}
static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
{
return PAGE_SIZE << drm_buddy_block_order(block);
}
static inline struct drm_buddy_block *
amdgpu_vram_mgr_first_block(struct list_head *list)
{
return list_first_entry_or_null(list, struct drm_buddy_block, link);
}
static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
{
struct drm_buddy_block *block;
u64 start, size;
block = amdgpu_vram_mgr_first_block(head);
if (!block)
return false;
while (head != block->link.next) {
start = amdgpu_vram_mgr_block_start(block);
size = amdgpu_vram_mgr_block_size(block);
block = list_entry(block->link.next, struct drm_buddy_block, link);
if (start + size != amdgpu_vram_mgr_block_start(block))
return false;
}
return true;
}
static inline struct amdgpu_vram_mgr_resource *
to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
{
return container_of(res, struct amdgpu_vram_mgr_resource, base);
}
#endif
......@@ -184,6 +184,8 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
kfd->device_info.no_atomic_fw_version = 14;
else if (gc_version == IP_VERSION(10, 3, 7))
kfd->device_info.no_atomic_fw_version = 3;
else if (gc_version >= IP_VERSION(10, 3, 0))
kfd->device_info.no_atomic_fw_version = 92;
else if (gc_version >= IP_VERSION(10, 1, 1))
......
......@@ -72,6 +72,7 @@
#include <linux/pci.h>
#include <linux/firmware.h>
#include <linux/component.h>
#include <linux/dmi.h>
#include <drm/display/drm_dp_mst_helper.h>
#include <drm/display/drm_hdmi_helper.h>
......@@ -462,6 +463,26 @@ static void dm_pflip_high_irq(void *interrupt_params)
vrr_active, (int) !e);
}
static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
{
struct drm_crtc *crtc = &acrtc->base;
struct drm_device *dev = crtc->dev;
unsigned long flags;
drm_crtc_handle_vblank(crtc);
spin_lock_irqsave(&dev->event_lock, flags);
/* Send completion event for cursor-only commits */
if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
drm_crtc_send_vblank_event(crtc, acrtc->event);
drm_crtc_vblank_put(crtc);
acrtc->event = NULL;
}
spin_unlock_irqrestore(&dev->event_lock, flags);
}
static void dm_vupdate_high_irq(void *interrupt_params)
{
struct common_irq_params *irq_params = interrupt_params;
......@@ -500,7 +521,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
* if a pageflip happened inside front-porch.
*/
if (vrr_active) {
drm_crtc_handle_vblank(&acrtc->base);
dm_crtc_handle_vblank(acrtc);
/* BTR processing for pre-DCE12 ASICs */
if (acrtc->dm_irq_params.stream &&
......@@ -552,7 +573,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
* to dm_vupdate_high_irq after end of front-porch.
*/
if (!vrr_active)
drm_crtc_handle_vblank(&acrtc->base);
dm_crtc_handle_vblank(acrtc);
/**
* Following stuff must happen at start of vblank, for crc
......@@ -1382,6 +1403,41 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev)
return false;
}
static const struct dmi_system_id hpd_disconnect_quirk_table[] = {
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"),
},
},
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"),
},
},
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
},
},
{}
};
static void retrieve_dmi_info(struct amdgpu_display_manager *dm)
{
const struct dmi_system_id *dmi_id;
dm->aux_hpd_discon_quirk = false;
dmi_id = dmi_first_match(hpd_disconnect_quirk_table);
if (dmi_id) {
dm->aux_hpd_discon_quirk = true;
DRM_INFO("aux_hpd_discon_quirk attached\n");
}
}
static int amdgpu_dm_init(struct amdgpu_device *adev)
{
struct dc_init_data init_data;
......@@ -1508,6 +1564,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
INIT_LIST_HEAD(&adev->dm.da_list);
retrieve_dmi_info(&adev->dm);
/* Display Core create. */
adev->dm.dc = dc_create(&init_data);
......@@ -5407,7 +5466,7 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
}
}
if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
*pre_multiplied_alpha = false;
}
......@@ -9135,6 +9194,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct amdgpu_bo *abo;
uint32_t target_vblank, last_flip_vblank;
bool vrr_active = amdgpu_dm_vrr_active(acrtc_state);
bool cursor_update = false;
bool pflip_present = false;
struct {
struct dc_surface_update surface_updates[MAX_SURFACES];
......@@ -9170,8 +9230,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state);
/* Cursor plane is handled after stream updates */
if (plane->type == DRM_PLANE_TYPE_CURSOR)
if (plane->type == DRM_PLANE_TYPE_CURSOR) {
if ((fb && crtc == pcrtc) ||
(old_plane_state->fb && old_plane_state->crtc == pcrtc))
cursor_update = true;
continue;
}
if (!fb || !crtc || pcrtc != crtc)
continue;
......@@ -9334,6 +9399,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.vrr_infopacket =
&acrtc_state->stream->vrr_infopacket;
}
} else if (cursor_update && acrtc_state->active_planes > 0 &&
!acrtc_state->force_dpms_off &&
acrtc_attach->base.state->event) {
drm_crtc_vblank_get(pcrtc);
spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
acrtc_attach->event = acrtc_attach->base.state->event;
acrtc_attach->base.state->event = NULL;
spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
/* Update the planes if changed or disable if we don't have any. */
......
......@@ -540,6 +540,14 @@ struct amdgpu_display_manager {
* last successfully applied backlight values.
*/
u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
/**
* @aux_hpd_discon_quirk:
*
* quirk for hpd discon while aux is on-going.
* occurred on certain intel platform
*/
bool aux_hpd_discon_quirk;
};
enum dsc_clock_force_state {
......
......@@ -56,6 +56,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
ssize_t result = 0;
struct aux_payload payload;
enum aux_return_code_type operation_result;
struct amdgpu_device *adev;
struct ddc_service *ddc;
if (WARN_ON(msg->size > 16))
return -E2BIG;
......@@ -74,6 +76,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
/*
* w/a on certain intel platform where hpd is unexpected to pull low during
* 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON
* aux transaction is succuess in such case, therefore bypass the error
*/
ddc = TO_DM_AUX(aux)->ddc_service;
adev = ddc->ctx->driver_context;
if (adev->dm.aux_hpd_discon_quirk) {
if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
operation_result == AUX_RET_ERROR_HPD_DISCON) {
result = 0;
operation_result = AUX_RET_SUCCESS;
}
}
if (payload.write && result >= 0)
result = msg->size;
......
......@@ -1117,12 +1117,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
* on certain displays, such as the Sharp 4k. 36bpp is needed
* to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
* SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
* precision on at least DCN display engines. However, at least
* Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
* so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
* did not show such problems, so this seems to be the exception.
* precision on DCN display engines, but apparently not for DCE, as
* far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
* problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
* neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
* passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
*/
if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
......
......@@ -1228,6 +1228,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
uint32_t crystal_clock_freq = 2500;
uint32_t tach_period;
if (speed == 0)
return -EINVAL;
/*
* To prevent from possible overheat, some ASICs may have requirement
* for minimum fan speed:
......
......@@ -620,10 +620,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
struct ttm_resource *res)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
u64 page_alignment;
if (!i915_ttm_gtt_binds_lmem(res))
return i915_ttm_tt_get_st(bo->ttm);
page_alignment = bo->page_alignment << PAGE_SHIFT;
if (!page_alignment)
page_alignment = obj->mm.region->min_page_size;
/*
* If CPU mapping differs, we need to add the ttm_tt pages to
* the resulting st. Might make sense for GGTT.
......@@ -634,7 +639,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
struct i915_refct_sgt *rsgt;
rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
res);
res,
page_alignment);
if (IS_ERR(rsgt))
return rsgt;
......@@ -643,7 +649,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
}
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res,
page_alignment);
}
static int i915_ttm_truncate(struct drm_i915_gem_object *obj)
......
......@@ -9,6 +9,7 @@
#include <linux/jiffies.h>
#include "gt/intel_engine.h"
#include "gt/intel_rps.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
......@@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
timeout);
}
static void
i915_gem_object_boost(struct dma_resv *resv, unsigned int flags)
{
struct dma_resv_iter cursor;
struct dma_fence *fence;
/*
* Prescan all fences for potential boosting before we begin waiting.
*
* When we wait, we wait on outstanding fences serially. If the
* dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced
* form 1:2, then as we look at each wait in turn we see that each
* request is currently executing and not worthy of boosting. But if
* we only happen to look at the final fence in the sequence (because
* of request coalescing or splitting between read/write arrays by
* the iterator), then we would boost. As such our decision to boost
* or not is delicately balanced on the order we wait on fences.
*
* So instead of looking for boosts sequentially, look for all boosts
* upfront and then wait on the outstanding fences.
*/
dma_resv_iter_begin(&cursor, resv,
dma_resv_usage_rw(flags & I915_WAIT_ALL));
dma_resv_for_each_fence_unlocked(&cursor, fence)
if (dma_fence_is_i915(fence) &&
!i915_request_started(to_request(fence)))
intel_rps_boost(to_request(fence));
dma_resv_iter_end(&cursor);
}
static long
i915_gem_object_wait_reservation(struct dma_resv *resv,
unsigned int flags,
......@@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
struct dma_fence *fence;
long ret = timeout ?: 1;
i915_gem_object_boost(resv, flags);
dma_resv_iter_begin(&cursor, resv,
dma_resv_usage_rw(flags & I915_WAIT_ALL));
dma_resv_for_each_fence_unlocked(&cursor, fence) {
......
......@@ -1209,6 +1209,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(&gt->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
for_each_engine(engine, gt, id) {
struct reg_and_bit rb;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
}
spin_unlock_irq(&uncore->lock);
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
......@@ -1223,7 +1237,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
......
......@@ -300,7 +300,7 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
static int gen6_reset_engines(struct intel_gt *gt,
static int __gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
......@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&gt->uncore->lock, flags);
ret = __gen6_reset_engines(gt, engine_mask, retry);
spin_unlock_irqrestore(&gt->uncore->lock, flags);
return ret;
}
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
......@@ -487,7 +501,7 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
static int gen11_reset_engines(struct intel_gt *gt,
static int __gen11_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
......@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
unsigned long flags;
int ret;
spin_lock_irqsave(&gt->uncore->lock, flags);
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
......@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
gen11_reset_engines(gt, gt->info.engine_mask, 0);
__gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
ret = gen11_reset_engines(gt, engine_mask, retry);
ret = __gen11_reset_engines(gt, engine_mask, retry);
else
ret = gen6_reset_engines(gt, engine_mask, retry);
ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
spin_unlock_irqrestore(&gt->uncore->lock, flags);
return ret;
}
......
......@@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg)
continue;
hw = shmem_pin_map(engine->default_state);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
if (!hw) {
err = -ENOMEM;
break;
}
hw += LRC_STATE_OFFSET / sizeof(*hw);
......@@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg)
continue;
hw = shmem_pin_map(engine->default_state);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
if (!hw) {
err = -ENOMEM;
break;
}
hw += LRC_STATE_OFFSET / sizeof(*hw);
......
......@@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
continue;
vaddr = shmem_pin_map(engine->default_state);
if (IS_ERR(vaddr)) {
gvt_err("failed to map %s->default state, err:%zd\n",
engine->name, PTR_ERR(vaddr));
if (!vaddr) {
gvt_err("failed to map %s->default state\n",
engine->name);
return;
}
......
......@@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
* drm_mm_node
* @node: The drm_mm_node.
* @region_start: An offset to add to the dma addresses of the sg list.
* @page_alignment: Required page alignment for each sg entry. Power of two.
*
* Create a struct sg_table, initializing it from a struct drm_mm_node,
* taking a maximum segment length into account, splitting into segments
......@@ -77,15 +78,18 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
* error code cast to an error pointer on failure.
*/
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
u64 region_start)
u64 region_start,
u64 page_alignment)
{
const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
const u64 max_segment = round_down(UINT_MAX, page_alignment);
u64 segment_pages = max_segment >> PAGE_SHIFT;
u64 block_size, offset, prev_end;
struct i915_refct_sgt *rsgt;
struct sg_table *st;
struct scatterlist *sg;
GEM_BUG_ON(!max_segment);
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
if (!rsgt)
return ERR_PTR(-ENOMEM);
......@@ -112,6 +116,8 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
sg = __sg_next(sg);
sg_dma_address(sg) = region_start + offset;
GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
page_alignment));
sg_dma_len(sg) = 0;
sg->length = 0;
st->nents++;
......@@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
* i915_buddy_block list
* @res: The struct i915_ttm_buddy_resource.
* @region_start: An offset to add to the dma addresses of the sg list.
* @page_alignment: Required page alignment for each sg entry. Power of two.
*
* Create a struct sg_table, initializing it from struct i915_buddy_block list,
* taking a maximum segment length into account, splitting into segments
......@@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
* error code cast to an error pointer on failure.
*/
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
u64 region_start)
u64 region_start,
u64 page_alignment)
{
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
const u64 size = res->num_pages << PAGE_SHIFT;
const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE);
const u64 max_segment = round_down(UINT_MAX, page_alignment);
struct drm_buddy *mm = bman_res->mm;
struct list_head *blocks = &bman_res->blocks;
struct drm_buddy_block *block;
......@@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
resource_size_t prev_end;
GEM_BUG_ON(list_empty(blocks));
GEM_BUG_ON(!max_segment);
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
if (!rsgt)
......@@ -191,6 +200,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
sg = __sg_next(sg);
sg_dma_address(sg) = region_start + offset;
GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
page_alignment));
sg_dma_len(sg) = 0;
sg->length = 0;
st->nents++;
......
......@@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt,
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
u64 region_start);
u64 region_start,
u64 page_alignment);
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
u64 region_start);
u64 region_start,
u64 page_alignment);
#endif
......@@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
* Convert an opaque TTM resource manager resource to a refcounted sg_table.
* @mem: The memory region.
* @res: The resource manager resource obtained from the TTM resource manager.
* @page_alignment: Required page alignment for each sg entry. Power of two.
*
* The gem backends typically use sg-tables for operations on the underlying
* io_memory. So provide a way for the backends to translate the
......@@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
*/
struct i915_refct_sgt *
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
struct ttm_resource *res)
struct ttm_resource *res,
u64 page_alignment)
{
if (mem->is_range_manager) {
struct ttm_range_mgr_node *range_node =
to_ttm_range_mgr_node(res);
return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
mem->region.start);
mem->region.start,
page_alignment);
} else {
return i915_rsgt_from_buddy_resource(res, mem->region.start);
return i915_rsgt_from_buddy_resource(res, mem->region.start,
page_alignment);
}
}
......
......@@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem);
struct i915_refct_sgt *
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
struct ttm_resource *res);
struct ttm_resource *res,
u64 page_alignment);
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
struct ttm_resource *res);
......
......@@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm,
u64 addr;
for (addr = round_up(hole_start + min_alignment, step) - min_alignment;
addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment;
hole_end > addr && hole_end - addr >= 2 * min_alignment;
addr += step) {
err = i915_vma_pin(vma, 0, 0, addr | flags);
if (err) {
......
......@@ -451,7 +451,6 @@ static int igt_mock_splintered_region(void *arg)
static int igt_mock_max_segment(void *arg)
{
const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE);
struct intel_memory_region *mem = arg;
struct drm_i915_private *i915 = mem->i915;
struct i915_ttm_buddy_resource *res;
......@@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg)
struct drm_buddy *mm;
struct list_head *blocks;
struct scatterlist *sg;
I915_RND_STATE(prng);
LIST_HEAD(objects);
unsigned int max_segment;
unsigned int ps;
u64 size;
int err = 0;
......@@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg)
*/
size = SZ_8G;
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
ps = PAGE_SIZE;
if (i915_prandom_u64_state(&prng) & 1)
ps = SZ_64K; /* For something like DG2 */
max_segment = round_down(UINT_MAX, ps);
mem = mock_region_create(i915, 0, size, ps, 0, 0);
if (IS_ERR(mem))
return PTR_ERR(mem);
......@@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg)
}
for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
dma_addr_t daddr = sg_dma_address(sg);
if (sg->length > max_segment) {
pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
__func__, sg->length, max_segment);
err = -EINVAL;
goto out_close;
}
if (!IS_ALIGNED(daddr, ps)) {
pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n",
__func__, &daddr, ps);
err = -EINVAL;
goto out_close;
}
}
out_close:
......
......@@ -33,7 +33,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
return PTR_ERR(obj->mm.res);
obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
obj->mm.res);
obj->mm.res,
obj->mm.region->min_page_size);
if (IS_ERR(obj->mm.rsgt)) {
err = PTR_ERR(obj->mm.rsgt);
goto err_free_resource;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment