Commit f30b8eaa authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-4.14' of git://people.freedesktop.org/~agd5f/linux into drm-next

More changes for 4.14.  Highlights:
- command submission overhead improvements
- Huge page support for vega10
- physical mode support for mjpeg for asics that don't support UVD vm
- improve ttm_mem_type_manager_func debug
- misc ttm fixes, cleanups
- misc gpuvm cleanups

* 'drm-next-4.14' of git://people.freedesktop.org/~agd5f/linux: (26 commits)
  drm/ttm: use reservation_object_trylock in ttm_bo_individualize_resv v2
  drm/amdgpu: fix vega10 graphic hang issue in S3 test
  drm/amdgpu: bump version for support of UVD MJPEG decode
  drm/amdgpu: add MJPEG check for UVD physical mode msg buffer
  drm/ttm: Fix accounting error when fail to get pages for pool
  drm/amd/amdgpu: expose fragment size as module parameter (v2)
  drm/amd/amdgpu: store fragment_size in vm_manager
  drm/amdgpu: rename VM invalidated to moved
  drm/amdgpu: separate bo_va structure
  drm/amdgpu: drop the extra VM huge page flag v2
  drm/amdgpu: remove superflous amdgpu_bo_kmap in the VM
  drm/amdgpu: cleanup static CSA handling
  drm/amdgpu: SHADOW and VRAM_CONTIGUOUS flags shouldn't be used by userspace
  drm/amdgpu: save list length when fence is signaled
  drm/amdgpu: move vram usage tracking into the vram manager v2
  drm/amdgpu: move gtt usage tracking into the gtt manager v2
  drm/amdgpu: move debug print into the MM managers
  drm/amdgpu: fix incorrect use of the lru_lock
  drm/radeon: fix incorrect use of the lru_lock
  drm/ttm: make ttm_mem_type_manager_func debug more useful
  ...
parents 54e0aa64 df9bcb06
...@@ -96,6 +96,7 @@ extern int amdgpu_bapm; ...@@ -96,6 +96,7 @@ extern int amdgpu_bapm;
extern int amdgpu_deep_color; extern int amdgpu_deep_color;
extern int amdgpu_vm_size; extern int amdgpu_vm_size;
extern int amdgpu_vm_block_size; extern int amdgpu_vm_block_size;
extern int amdgpu_vm_fragment_size;
extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug; extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode; extern int amdgpu_vm_update_mode;
...@@ -748,6 +749,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); ...@@ -748,6 +749,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
struct amdgpu_fpriv { struct amdgpu_fpriv {
struct amdgpu_vm vm; struct amdgpu_vm vm;
struct amdgpu_bo_va *prt_va; struct amdgpu_bo_va *prt_va;
struct amdgpu_bo_va *csa_va;
struct mutex bo_list_lock; struct mutex bo_list_lock;
struct idr bo_list_handles; struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_ctx_mgr ctx_mgr;
...@@ -1482,9 +1484,6 @@ struct amdgpu_device { ...@@ -1482,9 +1484,6 @@ struct amdgpu_device {
struct amdgpu_mman mman; struct amdgpu_mman mman;
struct amdgpu_vram_scratch vram_scratch; struct amdgpu_vram_scratch vram_scratch;
struct amdgpu_wb wb; struct amdgpu_wb wb;
atomic64_t vram_usage;
atomic64_t vram_vis_usage;
atomic64_t gtt_usage;
atomic64_t num_bytes_moved; atomic64_t num_bytes_moved;
atomic64_t num_evictions; atomic64_t num_evictions;
atomic64_t num_vram_cpu_page_faults; atomic64_t num_vram_cpu_page_faults;
......
...@@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, ...@@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
} }
total_vram = adev->mc.real_vram_size - adev->vram_pin_size; total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
used_vram = atomic64_read(&adev->vram_usage); used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock); spin_lock(&adev->mm_stats.lock);
...@@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, ...@@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
/* Do the same for visible VRAM if half of it is free */ /* Do the same for visible VRAM if half of it is free */
if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
u64 total_vis_vram = adev->mc.visible_vram_size; u64 total_vis_vram = adev->mc.visible_vram_size;
u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); u64 used_vis_vram =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
if (used_vis_vram < total_vis_vram) { if (used_vis_vram < total_vis_vram) {
u64 free_vis_vram = total_vis_vram - used_vis_vram; u64 free_vis_vram = total_vis_vram - used_vis_vram;
...@@ -673,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -673,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
} }
error_validate: error_validate:
if (r) { if (r)
amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
ttm_eu_backoff_reservation(&p->ticket, &p->validated); ttm_eu_backoff_reservation(&p->ticket, &p->validated);
}
error_free_pages: error_free_pages:
...@@ -724,21 +723,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) ...@@ -724,21 +723,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
* If error is set than unvalidate buffer, otherwise just free memory * If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context. * used by parsing context.
**/ **/
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
bool backoff)
{ {
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
unsigned i; unsigned i;
if (!error) { if (!error)
amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&parser->ticket, ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated, &parser->validated,
parser->fence); parser->fence);
} else if (backoff) { else if (backoff)
ttm_eu_backoff_reservation(&parser->ticket, ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated); &parser->validated);
}
for (i = 0; i < parser->num_post_dep_syncobjs; i++) for (i = 0; i < parser->num_post_dep_syncobjs; i++)
drm_syncobj_put(parser->post_dep_syncobjs[i]); drm_syncobj_put(parser->post_dep_syncobjs[i]);
...@@ -791,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) ...@@ -791,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
struct dma_fence *f; struct dma_fence *f;
bo_va = vm->csa_bo_va;
bo_va = fpriv->csa_va;
BUG_ON(!bo_va); BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false); r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r) if (r)
...@@ -828,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) ...@@ -828,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
} }
r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
if (amdgpu_vm_debug && p->bo_list) { if (amdgpu_vm_debug && p->bo_list) {
/* Invalidate all BOs to test for userspace bugs */ /* Invalidate all BOs to test for userspace bugs */
...@@ -1490,7 +1487,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, ...@@ -1490,7 +1487,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
addr > mapping->last) addr > mapping->last)
continue; continue;
*bo = lobj->bo_va->bo; *bo = lobj->bo_va->base.bo;
return mapping; return mapping;
} }
...@@ -1499,7 +1496,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, ...@@ -1499,7 +1496,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
addr > mapping->last) addr > mapping->last)
continue; continue;
*bo = lobj->bo_va->bo; *bo = lobj->bo_va->base.bo;
return mapping; return mapping;
} }
} }
......
...@@ -1076,6 +1076,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) ...@@ -1076,6 +1076,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_gtt_size = -1; amdgpu_gtt_size = -1;
} }
/* valid range is between 4 and 9 inclusive */
if (amdgpu_vm_fragment_size != -1 &&
(amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
dev_warn(adev->dev, "valid range is between 4 and 9\n");
amdgpu_vm_fragment_size = -1;
}
amdgpu_check_vm_size(adev); amdgpu_check_vm_size(adev);
amdgpu_check_block_size(adev); amdgpu_check_block_size(adev);
......
...@@ -68,9 +68,10 @@ ...@@ -68,9 +68,10 @@
* - 3.16.0 - Add reserved vmid support * - 3.16.0 - Add reserved vmid support
* - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
* - 3.18.0 - Export gpu always on cu bitmap * - 3.18.0 - Export gpu always on cu bitmap
* - 3.19.0 - Add support for UVD MJPEG decode
*/ */
#define KMS_DRIVER_MAJOR 3 #define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 18 #define KMS_DRIVER_MINOR 19
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0; int amdgpu_vram_limit = 0;
...@@ -94,6 +95,7 @@ unsigned amdgpu_ip_block_mask = 0xffffffff; ...@@ -94,6 +95,7 @@ unsigned amdgpu_ip_block_mask = 0xffffffff;
int amdgpu_bapm = -1; int amdgpu_bapm = -1;
int amdgpu_deep_color = 0; int amdgpu_deep_color = 0;
int amdgpu_vm_size = -1; int amdgpu_vm_size = -1;
int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1; int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0; int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0; int amdgpu_vm_debug = 0;
...@@ -183,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444); ...@@ -183,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444);
MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
module_param_named(vm_size, amdgpu_vm_size, int, 0444); module_param_named(vm_size, amdgpu_vm_size, int, 0444);
MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
......
...@@ -225,9 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, ...@@ -225,9 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_VRAM_CLEARED| AMDGPU_GEM_CREATE_VRAM_CLEARED))
AMDGPU_GEM_CREATE_SHADOW |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
return -EINVAL; return -EINVAL;
/* reject invalid gem domains */ /* reject invalid gem domains */
...@@ -623,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -623,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) { switch (args->operation) {
case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_MAP:
r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
args->map_size); args->map_size);
if (r) if (r)
goto error_backoff; goto error_backoff;
...@@ -643,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -643,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size); args->map_size);
break; break;
case AMDGPU_VA_OP_REPLACE: case AMDGPU_VA_OP_REPLACE:
r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
args->map_size); args->map_size);
if (r) if (r)
goto error_backoff; goto error_backoff;
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
struct amdgpu_gtt_mgr { struct amdgpu_gtt_mgr {
struct drm_mm mm; struct drm_mm mm;
spinlock_t lock; spinlock_t lock;
uint64_t available; atomic64_t available;
}; };
/** /**
...@@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, ...@@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
size = (adev->mc.gart_size >> PAGE_SHIFT) - start; size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size); drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock); spin_lock_init(&mgr->lock);
mgr->available = p_size; atomic64_set(&mgr->available, p_size);
man->priv = mgr; man->priv = mgr;
return 0; return 0;
} }
...@@ -153,15 +153,6 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, ...@@ -153,15 +153,6 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
return r; return r;
} }
void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
struct amdgpu_gtt_mgr *mgr = man->priv;
seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20);
}
/** /**
* amdgpu_gtt_mgr_new - allocate a new node * amdgpu_gtt_mgr_new - allocate a new node
* *
...@@ -182,11 +173,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, ...@@ -182,11 +173,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
int r; int r;
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
if (mgr->available < mem->num_pages) { if (atomic64_read(&mgr->available) < mem->num_pages) {
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
return 0; return 0;
} }
mgr->available -= mem->num_pages; atomic64_sub(mem->num_pages, &mgr->available);
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
node = kzalloc(sizeof(*node), GFP_KERNEL); node = kzalloc(sizeof(*node), GFP_KERNEL);
...@@ -213,9 +204,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, ...@@ -213,9 +204,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
return 0; return 0;
err_out: err_out:
spin_lock(&mgr->lock); atomic64_add(mem->num_pages, &mgr->available);
mgr->available += mem->num_pages;
spin_unlock(&mgr->lock);
return r; return r;
} }
...@@ -242,30 +231,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, ...@@ -242,30 +231,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man,
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
if (node->start != AMDGPU_BO_INVALID_OFFSET) if (node->start != AMDGPU_BO_INVALID_OFFSET)
drm_mm_remove_node(node); drm_mm_remove_node(node);
mgr->available += mem->num_pages;
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
atomic64_add(mem->num_pages, &mgr->available);
kfree(node); kfree(node);
mem->mm_node = NULL; mem->mm_node = NULL;
} }
/**
* amdgpu_gtt_mgr_usage - return usage of GTT domain
*
* @man: TTM memory type manager
*
* Return how many bytes are used in the GTT domain
*/
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man)
{
struct amdgpu_gtt_mgr *mgr = man->priv;
return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE;
}
/** /**
* amdgpu_gtt_mgr_debug - dump VRAM table * amdgpu_gtt_mgr_debug - dump VRAM table
* *
* @man: TTM memory type manager * @man: TTM memory type manager
* @prefix: text prefix * @printer: DRM printer to use
* *
* Dump the table content using printk. * Dump the table content using printk.
*/ */
static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man,
const char *prefix) struct drm_printer *printer)
{ {
struct amdgpu_gtt_mgr *mgr = man->priv; struct amdgpu_gtt_mgr *mgr = man->priv;
struct drm_printer p = drm_debug_printer(prefix);
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
drm_mm_print(&mgr->mm, &p); drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
man->size, (u64)atomic64_read(&mgr->available),
amdgpu_gtt_mgr_usage(man) >> 20);
} }
const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = {
......
...@@ -455,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -455,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE: case AMDGPU_INFO_VRAM_USAGE:
ui64 = atomic64_read(&adev->vram_usage); ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE: case AMDGPU_INFO_VIS_VRAM_USAGE:
ui64 = atomic64_read(&adev->vram_vis_usage); ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GTT_USAGE: case AMDGPU_INFO_GTT_USAGE:
ui64 = atomic64_read(&adev->gtt_usage); ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GDS_CONFIG: { case AMDGPU_INFO_GDS_CONFIG: {
struct drm_amdgpu_info_gds gds_info; struct drm_amdgpu_info_gds gds_info;
...@@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
mem.vram.total_heap_size = adev->mc.real_vram_size; mem.vram.total_heap_size = adev->mc.real_vram_size;
mem.vram.usable_heap_size = mem.vram.usable_heap_size =
adev->mc.real_vram_size - adev->vram_pin_size; adev->mc.real_vram_size - adev->vram_pin_size;
mem.vram.heap_usage = atomic64_read(&adev->vram_usage); mem.vram.heap_usage =
amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size = mem.cpu_accessible_vram.total_heap_size =
...@@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
adev->mc.visible_vram_size - adev->mc.visible_vram_size -
(adev->vram_pin_size - adev->invisible_pin_size); (adev->vram_pin_size - adev->invisible_pin_size);
mem.cpu_accessible_vram.heap_usage = mem.cpu_accessible_vram.heap_usage =
atomic64_read(&adev->vram_vis_usage); amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.cpu_accessible_vram.max_allocation = mem.cpu_accessible_vram.max_allocation =
mem.cpu_accessible_vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
...@@ -514,7 +515,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -514,7 +515,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
mem.gtt.total_heap_size *= PAGE_SIZE; mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size mem.gtt.usable_heap_size = mem.gtt.total_heap_size
- adev->gart_pin_size; - adev->gart_pin_size;
mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); mem.gtt.heap_usage =
amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
return copy_to_user(out, &mem, return copy_to_user(out, &mem,
...@@ -588,11 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -588,11 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info.pte_fragment_size = dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
(1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
AMDGPU_GPU_PAGE_SIZE;
dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
dev_info.cu_active_number = adev->gfx.cu_info.number; dev_info.cu_active_number = adev->gfx.cu_info.number;
dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
dev_info.ce_ram_size = adev->gfx.ce_ram_size; dev_info.ce_ram_size = adev->gfx.ce_ram_size;
...@@ -841,7 +840,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) ...@@ -841,7 +840,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
} }
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
r = amdgpu_map_static_csa(adev, &fpriv->vm); r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
if (r) if (r)
goto out_suspend; goto out_suspend;
} }
...@@ -894,8 +893,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, ...@@ -894,8 +893,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */ /* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
fpriv->vm.csa_bo_va = NULL; fpriv->csa_va = NULL;
amdgpu_bo_unreserve(adev->virt.csa_obj); amdgpu_bo_unreserve(adev->virt.csa_obj);
} }
......
...@@ -37,55 +37,6 @@ ...@@ -37,55 +37,6 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
struct ttm_mem_reg *mem)
{
if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size)
return 0;
return ((mem->start << PAGE_SHIFT) + mem->size) >
adev->mc.visible_vram_size ?
adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
mem->size;
}
static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
struct ttm_mem_reg *old_mem,
struct ttm_mem_reg *new_mem)
{
u64 vis_size;
if (!adev)
return;
if (new_mem) {
switch (new_mem->mem_type) {
case TTM_PL_TT:
atomic64_add(new_mem->size, &adev->gtt_usage);
break;
case TTM_PL_VRAM:
atomic64_add(new_mem->size, &adev->vram_usage);
vis_size = amdgpu_get_vis_part_size(adev, new_mem);
atomic64_add(vis_size, &adev->vram_vis_usage);
break;
}
}
if (old_mem) {
switch (old_mem->mem_type) {
case TTM_PL_TT:
atomic64_sub(old_mem->size, &adev->gtt_usage);
break;
case TTM_PL_VRAM:
atomic64_sub(old_mem->size, &adev->vram_usage);
vis_size = amdgpu_get_vis_part_size(adev, old_mem);
atomic64_sub(vis_size, &adev->vram_vis_usage);
break;
}
}
}
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
...@@ -94,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) ...@@ -94,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
bo = container_of(tbo, struct amdgpu_bo, tbo); bo = container_of(tbo, struct amdgpu_bo, tbo);
amdgpu_bo_kunmap(bo); amdgpu_bo_kunmap(bo);
amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
drm_gem_object_release(&bo->gem_base); drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent); amdgpu_bo_unref(&bo->parent);
...@@ -992,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, ...@@ -992,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return; return;
/* move_notify is called before move happens */ /* move_notify is called before move happens */
amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
} }
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_INVALID_OFFSET LONG_MAX
/* bo virtual addresses in a vm */
struct amdgpu_bo_va_mapping { struct amdgpu_bo_va_mapping {
struct list_head list; struct list_head list;
struct rb_node rb; struct rb_node rb;
...@@ -43,26 +44,19 @@ struct amdgpu_bo_va_mapping { ...@@ -43,26 +44,19 @@ struct amdgpu_bo_va_mapping {
uint64_t flags; uint64_t flags;
}; };
/* bo virtual addresses in a specific vm */ /* User space allocated BO in a VM */
struct amdgpu_bo_va { struct amdgpu_bo_va {
struct amdgpu_vm_bo_base base;
/* protected by bo being reserved */ /* protected by bo being reserved */
struct list_head bo_list;
struct dma_fence *last_pt_update; struct dma_fence *last_pt_update;
unsigned ref_count; unsigned ref_count;
/* protected by vm mutex and spinlock */
struct list_head vm_status;
/* mappings for this bo_va */ /* mappings for this bo_va */
struct list_head invalids; struct list_head invalids;
struct list_head valids; struct list_head valids;
/* constant after initialization */
struct amdgpu_vm *vm;
struct amdgpu_bo *bo;
}; };
struct amdgpu_bo { struct amdgpu_bo {
/* Protected by tbo.reserved */ /* Protected by tbo.reserved */
u32 preferred_domains; u32 preferred_domains;
......
...@@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, ...@@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct dma_fence *f = e->fence; struct dma_fence *f = e->fence;
struct amd_sched_fence *s_fence = to_amd_sched_fence(f); struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
if (dma_fence_is_signaled(f)) {
hash_del(&e->node);
dma_fence_put(f);
kmem_cache_free(amdgpu_sync_slab, e);
continue;
}
if (ring && s_fence) { if (ring && s_fence) {
/* For fences from the same ring it is sufficient /* For fences from the same ring it is sufficient
* when they are scheduled. * when they are scheduled.
...@@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, ...@@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
} }
} }
if (dma_fence_is_signaled(f)) {
hash_del(&e->node);
dma_fence_put(f);
kmem_cache_free(amdgpu_sync_slab, e);
continue;
}
return f; return f;
} }
......
...@@ -14,6 +14,62 @@ ...@@ -14,6 +14,62 @@
#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
TRACE_EVENT(amdgpu_ttm_tt_populate,
TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address),
TP_ARGS(adev, dma_address, phys_address),
TP_STRUCT__entry(
__field(uint16_t, domain)
__field(uint8_t, bus)
__field(uint8_t, slot)
__field(uint8_t, func)
__field(uint64_t, dma)
__field(uint64_t, phys)
),
TP_fast_assign(
__entry->domain = pci_domain_nr(adev->pdev->bus);
__entry->bus = adev->pdev->bus->number;
__entry->slot = PCI_SLOT(adev->pdev->devfn);
__entry->func = PCI_FUNC(adev->pdev->devfn);
__entry->dma = dma_address;
__entry->phys = phys_address;
),
TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
(unsigned)__entry->domain,
(unsigned)__entry->bus,
(unsigned)__entry->slot,
(unsigned)__entry->func,
(unsigned long long)__entry->dma,
(unsigned long long)__entry->phys)
);
TRACE_EVENT(amdgpu_ttm_tt_unpopulate,
TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address),
TP_ARGS(adev, dma_address, phys_address),
TP_STRUCT__entry(
__field(uint16_t, domain)
__field(uint8_t, bus)
__field(uint8_t, slot)
__field(uint8_t, func)
__field(uint64_t, dma)
__field(uint64_t, phys)
),
TP_fast_assign(
__entry->domain = pci_domain_nr(adev->pdev->bus);
__entry->bus = adev->pdev->bus->number;
__entry->slot = PCI_SLOT(adev->pdev->devfn);
__entry->func = PCI_FUNC(adev->pdev->devfn);
__entry->dma = dma_address;
__entry->phys = phys_address;
),
TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
(unsigned)__entry->domain,
(unsigned)__entry->bus,
(unsigned)__entry->slot,
(unsigned)__entry->func,
(unsigned long long)__entry->dma,
(unsigned long long)__entry->phys)
);
TRACE_EVENT(amdgpu_mm_rreg, TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value), TP_ARGS(did, reg, value),
...@@ -228,7 +284,7 @@ TRACE_EVENT(amdgpu_vm_bo_map, ...@@ -228,7 +284,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
), ),
TP_fast_assign( TP_fast_assign(
__entry->bo = bo_va ? bo_va->bo : NULL; __entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start; __entry->start = mapping->start;
__entry->last = mapping->last; __entry->last = mapping->last;
__entry->offset = mapping->offset; __entry->offset = mapping->offset;
...@@ -252,7 +308,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, ...@@ -252,7 +308,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
), ),
TP_fast_assign( TP_fast_assign(
__entry->bo = bo_va->bo; __entry->bo = bo_va->base.bo;
__entry->start = mapping->start; __entry->start = mapping->start;
__entry->last = mapping->last; __entry->last = mapping->last;
__entry->offset = mapping->offset; __entry->offset = mapping->offset;
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h"
#include "bif/bif_4_1_d.h" #include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
...@@ -662,6 +663,38 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) ...@@ -662,6 +663,38 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
return r; return r;
} }
static void amdgpu_trace_dma_map(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) {
for (i = 0; i < ttm->num_pages; i++) {
trace_amdgpu_ttm_tt_populate(
adev,
gtt->ttm.dma_address[i],
page_to_phys(ttm->pages[i]));
}
}
}
static void amdgpu_trace_dma_unmap(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
if (unlikely(trace_amdgpu_ttm_tt_unpopulate_enabled())) {
for (i = 0; i < ttm->num_pages; i++) {
trace_amdgpu_ttm_tt_unpopulate(
adev,
gtt->ttm.dma_address[i],
page_to_phys(ttm->pages[i]));
}
}
}
/* prepare the sg table with the user pages */ /* prepare the sg table with the user pages */
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{ {
...@@ -688,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) ...@@ -688,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages); gtt->ttm.dma_address, ttm->num_pages);
amdgpu_trace_dma_map(ttm);
return 0; return 0;
release_sg: release_sg:
...@@ -721,6 +756,8 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) ...@@ -721,6 +756,8 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
put_page(page); put_page(page);
} }
amdgpu_trace_dma_unmap(ttm);
sg_free_table(ttm->sg); sg_free_table(ttm->sg);
} }
...@@ -892,7 +929,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, ...@@ -892,7 +929,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
{ {
struct amdgpu_device *adev; struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i; unsigned i;
int r; int r;
...@@ -915,14 +952,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) ...@@ -915,14 +952,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages); gtt->ttm.dma_address, ttm->num_pages);
ttm->state = tt_unbound; ttm->state = tt_unbound;
return 0; r = 0;
goto trace_mappings;
} }
adev = amdgpu_ttm_adev(ttm->bdev);
#ifdef CONFIG_SWIOTLB #ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) { if (swiotlb_nr_tbl()) {
return ttm_dma_populate(&gtt->ttm, adev->dev); r = ttm_dma_populate(&gtt->ttm, adev->dev);
goto trace_mappings;
} }
#endif #endif
...@@ -945,7 +982,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) ...@@ -945,7 +982,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
return -EFAULT; return -EFAULT;
} }
} }
return 0;
r = 0;
trace_mappings:
if (likely(!r))
amdgpu_trace_dma_map(ttm);
return r;
} }
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
...@@ -966,6 +1008,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) ...@@ -966,6 +1008,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
adev = amdgpu_ttm_adev(ttm->bdev); adev = amdgpu_ttm_adev(ttm->bdev);
amdgpu_trace_dma_unmap(ttm);
#ifdef CONFIG_SWIOTLB #ifdef CONFIG_SWIOTLB
if (swiotlb_nr_tbl()) { if (swiotlb_nr_tbl()) {
ttm_dma_unpopulate(&gtt->ttm, adev->dev); ttm_dma_unpopulate(&gtt->ttm, adev->dev);
...@@ -1597,32 +1641,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, ...@@ -1597,32 +1641,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager
*man);
static int amdgpu_mm_dump_table(struct seq_file *m, void *data) static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
{ {
struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_info_node *node = (struct drm_info_node *)m->private;
unsigned ttm_pl = *(int *)node->info_ent->data; unsigned ttm_pl = *(int *)node->info_ent->data;
struct drm_device *dev = node->minor->dev; struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
struct ttm_bo_global *glob = adev->mman.bdev.glob;
struct drm_printer p = drm_seq_file_printer(m); struct drm_printer p = drm_seq_file_printer(m);
spin_lock(&glob->lru_lock); man->func->debug(man, &p);
drm_mm_print(mm, &p);
spin_unlock(&glob->lru_lock);
switch (ttm_pl) {
case TTM_PL_VRAM:
seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
adev->mman.bdev.man[ttm_pl].size,
(u64)atomic64_read(&adev->vram_usage) >> 20,
(u64)atomic64_read(&adev->vram_vis_usage) >> 20);
break;
case TTM_PL_TT:
amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]);
break;
}
return 0; return 0;
} }
......
...@@ -66,6 +66,10 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, ...@@ -66,6 +66,10 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
struct ttm_buffer_object *tbo, struct ttm_buffer_object *tbo,
const struct ttm_place *place, const struct ttm_place *place,
struct ttm_mem_reg *mem); struct ttm_mem_reg *mem);
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count, uint64_t dst_offset, uint32_t byte_count,
......
...@@ -588,6 +588,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, ...@@ -588,6 +588,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
} }
break; break;
case 8: /* MJPEG */
min_dpb_size = 0;
break;
case 16: /* H265 */ case 16: /* H265 */
image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
image_size = ALIGN(image_size, 256); image_size = ALIGN(image_size, 256);
......
...@@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) ...@@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
* address within META_DATA init package to support SRIOV gfx preemption. * address within META_DATA init package to support SRIOV gfx preemption.
*/ */
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{ {
int r;
struct amdgpu_bo_va *bo_va;
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
struct list_head list; struct list_head list;
struct amdgpu_bo_list_entry pd; struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv; struct ttm_validate_buffer csa_tv;
int r;
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head); INIT_LIST_HEAD(&csa_tv.head);
...@@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return r; return r;
} }
bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
if (!bo_va) { if (!*bo_va) {
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for static CSA\n"); DRM_ERROR("failed to create bo_va for static CSA\n");
return -ENOMEM; return -ENOMEM;
} }
r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
AMDGPU_CSA_SIZE); AMDGPU_CSA_SIZE);
if (r) { if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, bo_va); amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
return r; return r;
} }
r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE); AMDGPU_PTE_EXECUTABLE);
if (r) { if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, bo_va); amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
return r; return r;
} }
vm->csa_bo_va = bo_va;
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
return 0; return 0;
} }
......
...@@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void) ...@@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void)
struct amdgpu_vm; struct amdgpu_vm;
int amdgpu_allocate_static_csa(struct amdgpu_device *adev); int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va);
void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev);
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
......
...@@ -159,11 +159,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, ...@@ -159,11 +159,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
*/ */
static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
int (*validate)(void *, struct amdgpu_bo *), int (*validate)(void *, struct amdgpu_bo *),
void *param, bool use_cpu_for_update) void *param, bool use_cpu_for_update,
struct ttm_bo_global *glob)
{ {
unsigned i; unsigned i;
int r; int r;
if (parent->bo->shadow) {
struct amdgpu_bo *shadow = parent->bo->shadow;
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
return r;
}
if (use_cpu_for_update) { if (use_cpu_for_update) {
r = amdgpu_bo_kmap(parent->bo, NULL); r = amdgpu_bo_kmap(parent->bo, NULL);
if (r) if (r)
...@@ -183,12 +192,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, ...@@ -183,12 +192,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
if (r) if (r)
return r; return r;
spin_lock(&glob->lru_lock);
ttm_bo_move_to_lru_tail(&entry->bo->tbo);
if (entry->bo->shadow)
ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
spin_unlock(&glob->lru_lock);
/* /*
* Recurse into the sub directory. This is harmless because we * Recurse into the sub directory. This is harmless because we
* have only a maximum of 5 layers. * have only a maximum of 5 layers.
*/ */
r = amdgpu_vm_validate_level(entry, validate, param, r = amdgpu_vm_validate_level(entry, validate, param,
use_cpu_for_update); use_cpu_for_update, glob);
if (r) if (r)
return r; return r;
} }
...@@ -220,54 +235,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -220,54 +235,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return 0; return 0;
return amdgpu_vm_validate_level(&vm->root, validate, param, return amdgpu_vm_validate_level(&vm->root, validate, param,
vm->use_cpu_for_update); vm->use_cpu_for_update,
adev->mman.bdev.glob);
} }
/** /**
* amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail
*
* @adev: amdgpu device instance
* @vm: vm providing the BOs
*
* Move the PT BOs to the tail of the LRU.
*/
static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent)
{
unsigned i;
if (!parent->entries)
return;
for (i = 0; i <= parent->last_entry_used; ++i) {
struct amdgpu_vm_pt *entry = &parent->entries[i];
if (!entry->bo)
continue;
ttm_bo_move_to_lru_tail(&entry->bo->tbo);
amdgpu_vm_move_level_in_lru(entry);
}
}
/**
* amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail
*
* @adev: amdgpu device instance
* @vm: vm providing the BOs
*
* Move the PT BOs to the tail of the LRU.
*/
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct ttm_bo_global *glob = adev->mman.bdev.glob;
spin_lock(&glob->lru_lock);
amdgpu_vm_move_level_in_lru(&vm->root);
spin_unlock(&glob->lru_lock);
}
/**
* amdgpu_vm_alloc_levels - allocate the PD/PT levels * amdgpu_vm_alloc_levels - allocate the PD/PT levels
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
...@@ -359,7 +331,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, ...@@ -359,7 +331,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
entry->bo = pt; entry->bo = pt;
entry->addr = 0; entry->addr = 0;
entry->huge_page = false;
} }
if (level < adev->vm_manager.num_level) { if (level < adev->vm_manager.num_level) {
...@@ -899,8 +870,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, ...@@ -899,8 +870,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
{ {
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) { list_for_each_entry(bo_va, &bo->va, base.bo_list) {
if (bo_va->vm == vm) { if (bo_va->base.vm == vm) {
return bo_va; return bo_va;
} }
} }
...@@ -1074,11 +1045,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, ...@@ -1074,11 +1045,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
params.func = amdgpu_vm_cpu_set_ptes; params.func = amdgpu_vm_cpu_set_ptes;
} else { } else {
if (shadow) {
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
return r;
}
ring = container_of(vm->entity.sched, struct amdgpu_ring, ring = container_of(vm->entity.sched, struct amdgpu_ring,
sched); sched);
...@@ -1114,22 +1080,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, ...@@ -1114,22 +1080,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
if (bo == NULL) if (bo == NULL)
continue; continue;
if (bo->shadow) {
struct amdgpu_bo *pt_shadow = bo->shadow;
r = amdgpu_ttm_bind(&pt_shadow->tbo,
&pt_shadow->tbo.mem);
if (r)
return r;
}
pt = amdgpu_bo_gpu_offset(bo); pt = amdgpu_bo_gpu_offset(bo);
pt = amdgpu_gart_get_vm_pde(adev, pt); pt = amdgpu_gart_get_vm_pde(adev, pt);
if (parent->entries[pt_idx].addr == pt || /* Don't update huge pages here */
parent->entries[pt_idx].huge_page) if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
continue; continue;
parent->entries[pt_idx].addr = pt; parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
pde = pd_addr + pt_idx * 8; pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) || if (((last_pde + 8 * count) != pde) ||
...@@ -1307,15 +1265,14 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, ...@@ -1307,15 +1265,14 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
* *
* Check if we can update the PD with a huge page. * Check if we can update the PD with a huge page.
*/ */
static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
struct amdgpu_vm_pt *entry, struct amdgpu_vm_pt *entry,
struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *parent,
unsigned nptes, uint64_t dst, unsigned nptes, uint64_t dst,
uint64_t flags) uint64_t flags)
{ {
bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
uint64_t pd_addr, pde; uint64_t pd_addr, pde;
int r;
/* In the case of a mixed PT the PDE must point to it*/ /* In the case of a mixed PT the PDE must point to it*/
if (p->adev->asic_type < CHIP_VEGA10 || if (p->adev->asic_type < CHIP_VEGA10 ||
...@@ -1327,21 +1284,17 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, ...@@ -1327,21 +1284,17 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
dst = amdgpu_gart_get_vm_pde(p->adev, dst); dst = amdgpu_gart_get_vm_pde(p->adev, dst);
flags = AMDGPU_PTE_VALID; flags = AMDGPU_PTE_VALID;
} else { } else {
/* Set the huge page flag to stop scanning at this PDE */
flags |= AMDGPU_PDE_PTE; flags |= AMDGPU_PDE_PTE;
} }
if (entry->addr == dst && if (entry->addr == (dst | flags))
entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) return;
return 0;
entry->addr = dst; entry->addr = (dst | flags);
entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
if (use_cpu_update) { if (use_cpu_update) {
r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr); pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
if (r)
return r;
pde = pd_addr + (entry - parent->entries) * 8; pde = pd_addr + (entry - parent->entries) * 8;
amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
} else { } else {
...@@ -1354,8 +1307,6 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, ...@@ -1354,8 +1307,6 @@ static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
pde = pd_addr + (entry - parent->entries) * 8; pde = pd_addr + (entry - parent->entries) * 8;
amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
} }
return 0;
} }
/** /**
...@@ -1382,7 +1333,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, ...@@ -1382,7 +1333,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
struct amdgpu_bo *pt; struct amdgpu_bo *pt;
unsigned nptes; unsigned nptes;
bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
int r;
/* walk over the address space and update the page tables */ /* walk over the address space and update the page tables */
for (addr = start; addr < end; addr += nptes, for (addr = start; addr < end; addr += nptes,
...@@ -1398,12 +1348,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, ...@@ -1398,12 +1348,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
else else
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
r = amdgpu_vm_handle_huge_pages(params, entry, parent, amdgpu_vm_handle_huge_pages(params, entry, parent,
nptes, dst, flags); nptes, dst, flags);
if (r) /* We don't need to update PTEs for huge pages */
return r; if (entry->addr & AMDGPU_PDE_PTE)
if (entry->huge_page)
continue; continue;
pt = entry->bo; pt = entry->bo;
...@@ -1462,9 +1410,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, ...@@ -1462,9 +1410,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
* Userspace can support this by aligning virtual base address and * Userspace can support this by aligning virtual base address and
* allocation size to the fragment size. * allocation size to the fragment size.
*/ */
unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
/* SI and newer are optimized for 64KB */
unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
uint64_t frag_align = 1 << pages_per_frag; uint64_t frag_align = 1 << pages_per_frag;
...@@ -1778,7 +1724,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -1778,7 +1724,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va, struct amdgpu_bo_va *bo_va,
bool clear) bool clear)
{ {
struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
dma_addr_t *pages_addr = NULL; dma_addr_t *pages_addr = NULL;
uint64_t gtt_flags, flags; uint64_t gtt_flags, flags;
...@@ -1787,27 +1734,27 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -1787,27 +1734,27 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct dma_fence *exclusive; struct dma_fence *exclusive;
int r; int r;
if (clear || !bo_va->bo) { if (clear || !bo_va->base.bo) {
mem = NULL; mem = NULL;
nodes = NULL; nodes = NULL;
exclusive = NULL; exclusive = NULL;
} else { } else {
struct ttm_dma_tt *ttm; struct ttm_dma_tt *ttm;
mem = &bo_va->bo->tbo.mem; mem = &bo_va->base.bo->tbo.mem;
nodes = mem->mm_node; nodes = mem->mm_node;
if (mem->mem_type == TTM_PL_TT) { if (mem->mem_type == TTM_PL_TT) {
ttm = container_of(bo_va->bo->tbo.ttm, struct ttm = container_of(bo_va->base.bo->tbo.ttm,
ttm_dma_tt, ttm); struct ttm_dma_tt, ttm);
pages_addr = ttm->dma_address; pages_addr = ttm->dma_address;
} }
exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); exclusive = reservation_object_get_excl(bo->tbo.resv);
} }
if (bo_va->bo) { if (bo) {
flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
flags : 0; flags : 0;
} else { } else {
flags = 0x0; flags = 0x0;
...@@ -1815,7 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -1815,7 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
} }
spin_lock(&vm->status_lock); spin_lock(&vm->status_lock);
if (!list_empty(&bo_va->vm_status)) if (!list_empty(&bo_va->base.vm_status))
list_splice_init(&bo_va->valids, &bo_va->invalids); list_splice_init(&bo_va->valids, &bo_va->invalids);
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
...@@ -1838,9 +1785,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -1838,9 +1785,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
spin_lock(&vm->status_lock); spin_lock(&vm->status_lock);
list_splice_init(&bo_va->invalids, &bo_va->valids); list_splice_init(&bo_va->invalids, &bo_va->valids);
list_del_init(&bo_va->vm_status); list_del_init(&bo_va->base.vm_status);
if (clear) if (clear)
list_add(&bo_va->vm_status, &vm->cleared); list_add(&bo_va->base.vm_status, &vm->cleared);
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
if (vm->use_cpu_for_update) { if (vm->use_cpu_for_update) {
...@@ -2034,26 +1981,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, ...@@ -2034,26 +1981,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
} }
/** /**
* amdgpu_vm_clear_invalids - clear invalidated BOs in the PT * amdgpu_vm_clear_moved - clear moved BOs in the PT
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @vm: requested vm * @vm: requested vm
* *
* Make sure all invalidated BOs are cleared in the PT. * Make sure all moved BOs are cleared in the PT.
* Returns 0 for success. * Returns 0 for success.
* *
* PTs have to be reserved and mutex must be locked! * PTs have to be reserved and mutex must be locked!
*/ */
int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_vm *vm, struct amdgpu_sync *sync) struct amdgpu_sync *sync)
{ {
struct amdgpu_bo_va *bo_va = NULL; struct amdgpu_bo_va *bo_va = NULL;
int r = 0; int r = 0;
spin_lock(&vm->status_lock); spin_lock(&vm->status_lock);
while (!list_empty(&vm->invalidated)) { while (!list_empty(&vm->moved)) {
bo_va = list_first_entry(&vm->invalidated, bo_va = list_first_entry(&vm->moved,
struct amdgpu_bo_va, vm_status); struct amdgpu_bo_va, base.vm_status);
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
r = amdgpu_vm_bo_update(adev, bo_va, true); r = amdgpu_vm_bo_update(adev, bo_va, true);
...@@ -2093,16 +2040,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, ...@@ -2093,16 +2040,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
if (bo_va == NULL) { if (bo_va == NULL) {
return NULL; return NULL;
} }
bo_va->vm = vm; bo_va->base.vm = vm;
bo_va->bo = bo; bo_va->base.bo = bo;
INIT_LIST_HEAD(&bo_va->base.bo_list);
INIT_LIST_HEAD(&bo_va->base.vm_status);
bo_va->ref_count = 1; bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
if (bo) if (bo)
list_add_tail(&bo_va->bo_list, &bo->va); list_add_tail(&bo_va->base.bo_list, &bo->va);
return bo_va; return bo_va;
} }
...@@ -2127,7 +2075,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -2127,7 +2075,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
uint64_t size, uint64_t flags) uint64_t size, uint64_t flags)
{ {
struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_bo_va_mapping *mapping, *tmp;
struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr; uint64_t eaddr;
/* validate the parameters */ /* validate the parameters */
...@@ -2138,7 +2087,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -2138,7 +2087,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */ /* make sure object fit at this offset */
eaddr = saddr + size - 1; eaddr = saddr + size - 1;
if (saddr >= eaddr || if (saddr >= eaddr ||
(bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) (bo && offset + size > amdgpu_bo_size(bo)))
return -EINVAL; return -EINVAL;
saddr /= AMDGPU_GPU_PAGE_SIZE; saddr /= AMDGPU_GPU_PAGE_SIZE;
...@@ -2148,7 +2097,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -2148,7 +2097,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
if (tmp) { if (tmp) {
/* bo and tmp overlap, invalid addr */ /* bo and tmp overlap, invalid addr */
dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
"0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
tmp->start, tmp->last + 1); tmp->start, tmp->last + 1);
return -EINVAL; return -EINVAL;
} }
...@@ -2193,7 +2142,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, ...@@ -2193,7 +2142,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
uint64_t size, uint64_t flags) uint64_t size, uint64_t flags)
{ {
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr; uint64_t eaddr;
int r; int r;
...@@ -2205,7 +2155,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, ...@@ -2205,7 +2155,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */ /* make sure object fit at this offset */
eaddr = saddr + size - 1; eaddr = saddr + size - 1;
if (saddr >= eaddr || if (saddr >= eaddr ||
(bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) (bo && offset + size > amdgpu_bo_size(bo)))
return -EINVAL; return -EINVAL;
/* Allocate all the needed memory */ /* Allocate all the needed memory */
...@@ -2213,7 +2163,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, ...@@ -2213,7 +2163,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
if (!mapping) if (!mapping)
return -ENOMEM; return -ENOMEM;
r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
if (r) { if (r) {
kfree(mapping); kfree(mapping);
return r; return r;
...@@ -2253,7 +2203,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -2253,7 +2203,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t saddr) uint64_t saddr)
{ {
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true; bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE; saddr /= AMDGPU_GPU_PAGE_SIZE;
...@@ -2401,12 +2351,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, ...@@ -2401,12 +2351,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va) struct amdgpu_bo_va *bo_va)
{ {
struct amdgpu_bo_va_mapping *mapping, *next; struct amdgpu_bo_va_mapping *mapping, *next;
struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_vm *vm = bo_va->base.vm;
list_del(&bo_va->bo_list); list_del(&bo_va->base.bo_list);
spin_lock(&vm->status_lock); spin_lock(&vm->status_lock);
list_del(&bo_va->vm_status); list_del(&bo_va->base.vm_status);
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
...@@ -2438,13 +2388,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, ...@@ -2438,13 +2388,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo) struct amdgpu_bo *bo)
{ {
struct amdgpu_bo_va *bo_va; struct amdgpu_vm_bo_base *bo_base;
list_for_each_entry(bo_va, &bo->va, bo_list) { list_for_each_entry(bo_base, &bo->va, bo_list) {
spin_lock(&bo_va->vm->status_lock); spin_lock(&bo_base->vm->status_lock);
if (list_empty(&bo_va->vm_status)) if (list_empty(&bo_base->vm_status))
list_add(&bo_va->vm_status, &bo_va->vm->invalidated); list_add(&bo_base->vm_status,
spin_unlock(&bo_va->vm->status_lock); &bo_base->vm->moved);
spin_unlock(&bo_base->vm->status_lock);
} }
} }
...@@ -2462,12 +2413,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) ...@@ -2462,12 +2413,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
} }
/** /**
* amdgpu_vm_adjust_size - adjust vm size and block size * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
*
* @adev: amdgpu_device pointer
* @fragment_size_default: the default fragment size if it's set auto
*/
void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
{
if (amdgpu_vm_fragment_size == -1)
adev->vm_manager.fragment_size = fragment_size_default;
else
adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
}
/**
* amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @vm_size: the default vm size if it's set auto * @vm_size: the default vm size if it's set auto
*/ */
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
{ {
/* adjust vm size firstly */ /* adjust vm size firstly */
if (amdgpu_vm_size == -1) if (amdgpu_vm_size == -1)
...@@ -2482,8 +2447,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) ...@@ -2482,8 +2447,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
else else
adev->vm_manager.block_size = amdgpu_vm_block_size; adev->vm_manager.block_size = amdgpu_vm_block_size;
DRM_INFO("vm size is %llu GB, block size is %u-bit\n", amdgpu_vm_set_fragment_size(adev, fragment_size_default);
adev->vm_manager.vm_size, adev->vm_manager.block_size);
DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
adev->vm_manager.vm_size, adev->vm_manager.block_size,
adev->vm_manager.fragment_size);
} }
/** /**
...@@ -2512,7 +2480,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -2512,7 +2480,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL; vm->reserved_vmid[i] = NULL;
spin_lock_init(&vm->status_lock); spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->freed);
......
...@@ -50,11 +50,6 @@ struct amdgpu_bo_list_entry; ...@@ -50,11 +50,6 @@ struct amdgpu_bo_list_entry;
/* PTBs (Page Table Blocks) need to be aligned to 32K */ /* PTBs (Page Table Blocks) need to be aligned to 32K */
#define AMDGPU_VM_PTB_ALIGN_SIZE 32768 #define AMDGPU_VM_PTB_ALIGN_SIZE 32768
/* LOG2 number of continuous pages for the fragment field */
#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
((adev)->asic_type < CHIP_VEGA10 ? 4 : \
(adev)->vm_manager.block_size)
#define AMDGPU_PTE_VALID (1ULL << 0) #define AMDGPU_PTE_VALID (1ULL << 0)
#define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SYSTEM (1ULL << 1)
#define AMDGPU_PTE_SNOOPED (1ULL << 2) #define AMDGPU_PTE_SNOOPED (1ULL << 2)
...@@ -99,11 +94,22 @@ struct amdgpu_bo_list_entry; ...@@ -99,11 +94,22 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
/* base structure for tracking BO usage in a VM */
struct amdgpu_vm_bo_base {
/* constant after initialization */
struct amdgpu_vm *vm;
struct amdgpu_bo *bo;
/* protected by bo being reserved */
struct list_head bo_list;
/* protected by spinlock */
struct list_head vm_status;
};
struct amdgpu_vm_pt { struct amdgpu_vm_pt {
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
uint64_t addr; uint64_t addr;
bool huge_page;
/* array of page tables, one for each directory entry */ /* array of page tables, one for each directory entry */
struct amdgpu_vm_pt *entries; struct amdgpu_vm_pt *entries;
...@@ -118,7 +124,7 @@ struct amdgpu_vm { ...@@ -118,7 +124,7 @@ struct amdgpu_vm {
spinlock_t status_lock; spinlock_t status_lock;
/* BOs moved, but not yet updated in the PT */ /* BOs moved, but not yet updated in the PT */
struct list_head invalidated; struct list_head moved;
/* BOs cleared in the PT because of a move */ /* BOs cleared in the PT because of a move */
struct list_head cleared; struct list_head cleared;
...@@ -141,8 +147,6 @@ struct amdgpu_vm { ...@@ -141,8 +147,6 @@ struct amdgpu_vm {
u64 client_id; u64 client_id;
/* dedicated to vm */ /* dedicated to vm */
struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* each VM will map on CSA */
struct amdgpu_bo_va *csa_bo_va;
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update; bool use_cpu_for_update;
...@@ -191,6 +195,7 @@ struct amdgpu_vm_manager { ...@@ -191,6 +195,7 @@ struct amdgpu_vm_manager {
uint32_t num_level; uint32_t num_level;
uint64_t vm_size; uint64_t vm_size;
uint32_t block_size; uint32_t block_size;
uint32_t fragment_size;
/* vram base address for page table entry */ /* vram base address for page table entry */
u64 vram_base_offset; u64 vram_base_offset;
/* vm pte handling */ /* vm pte handling */
...@@ -223,8 +228,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, ...@@ -223,8 +228,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*callback)(void *p, struct amdgpu_bo *bo), int (*callback)(void *p, struct amdgpu_bo *bo),
void *param); void *param);
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size); uint64_t saddr, uint64_t size);
...@@ -240,8 +243,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, ...@@ -240,8 +243,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
struct dma_fence **fence); struct dma_fence **fence);
int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_sync *sync); struct amdgpu_sync *sync);
int amdgpu_vm_bo_update(struct amdgpu_device *adev, int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va, struct amdgpu_bo_va *bo_va,
bool clear); bool clear);
...@@ -268,7 +271,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, ...@@ -268,7 +271,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
uint64_t saddr, uint64_t size); uint64_t saddr, uint64_t size);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va); struct amdgpu_bo_va *bo_va);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
uint32_t fragment_size_default);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
uint32_t fragment_size_default);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job); struct amdgpu_job *job);
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
struct amdgpu_vram_mgr { struct amdgpu_vram_mgr {
struct drm_mm mm; struct drm_mm mm;
spinlock_t lock; spinlock_t lock;
atomic64_t usage;
atomic64_t vis_usage;
}; };
/** /**
...@@ -78,6 +80,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) ...@@ -78,6 +80,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
return 0; return 0;
} }
/**
* amdgpu_vram_mgr_vis_size - Calculate visible node size
*
* @adev: amdgpu device structure
* @node: MM node structure
*
* Calculate how many bytes of the MM node are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
struct drm_mm_node *node)
{
uint64_t start = node->start << PAGE_SHIFT;
uint64_t end = (node->size + node->start) << PAGE_SHIFT;
if (start >= adev->mc.visible_vram_size)
return 0;
return (end > adev->mc.visible_vram_size ?
adev->mc.visible_vram_size : end) - start;
}
/** /**
* amdgpu_vram_mgr_new - allocate new ranges * amdgpu_vram_mgr_new - allocate new ranges
* *
...@@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
const struct ttm_place *place, const struct ttm_place *place,
struct ttm_mem_reg *mem) struct ttm_mem_reg *mem)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
struct amdgpu_vram_mgr *mgr = man->priv; struct amdgpu_vram_mgr *mgr = man->priv;
struct drm_mm *mm = &mgr->mm; struct drm_mm *mm = &mgr->mm;
struct drm_mm_node *nodes; struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode; enum drm_mm_insert_mode mode;
unsigned long lpfn, num_nodes, pages_per_node, pages_left; unsigned long lpfn, num_nodes, pages_per_node, pages_left;
uint64_t usage = 0, vis_usage = 0;
unsigned i; unsigned i;
int r; int r;
...@@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (unlikely(r)) if (unlikely(r))
goto error; goto error;
usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
/* Calculate a virtual BO start address to easily check if /* Calculate a virtual BO start address to easily check if
* everything is CPU accessible. * everything is CPU accessible.
*/ */
...@@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
} }
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
atomic64_add(usage, &mgr->usage);
atomic64_add(vis_usage, &mgr->vis_usage);
mem->mm_node = nodes; mem->mm_node = nodes;
return 0; return 0;
...@@ -181,8 +212,10 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, ...@@ -181,8 +212,10 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
struct ttm_mem_reg *mem) struct ttm_mem_reg *mem)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
struct amdgpu_vram_mgr *mgr = man->priv; struct amdgpu_vram_mgr *mgr = man->priv;
struct drm_mm_node *nodes = mem->mm_node; struct drm_mm_node *nodes = mem->mm_node;
uint64_t usage = 0, vis_usage = 0;
unsigned pages = mem->num_pages; unsigned pages = mem->num_pages;
if (!mem->mm_node) if (!mem->mm_node)
...@@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, ...@@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
while (pages) { while (pages) {
pages -= nodes->size; pages -= nodes->size;
drm_mm_remove_node(nodes); drm_mm_remove_node(nodes);
usage += nodes->size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
++nodes; ++nodes;
} }
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
atomic64_sub(usage, &mgr->usage);
atomic64_sub(vis_usage, &mgr->vis_usage);
kfree(mem->mm_node); kfree(mem->mm_node);
mem->mm_node = NULL; mem->mm_node = NULL;
} }
/**
* amdgpu_vram_mgr_usage - how many bytes are used in this domain
*
* @man: TTM memory type manager
*
* Returns how many bytes are used in this domain.
*/
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man)
{
struct amdgpu_vram_mgr *mgr = man->priv;
return atomic64_read(&mgr->usage);
}
/**
* amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
*
* @man: TTM memory type manager
*
* Returns how many bytes are used in the visible part of VRAM
*/
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man)
{
struct amdgpu_vram_mgr *mgr = man->priv;
return atomic64_read(&mgr->vis_usage);
}
/** /**
* amdgpu_vram_mgr_debug - dump VRAM table * amdgpu_vram_mgr_debug - dump VRAM table
* *
* @man: TTM memory type manager * @man: TTM memory type manager
* @prefix: text prefix * @printer: DRM printer to use
* *
* Dump the table content using printk. * Dump the table content using printk.
*/ */
static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
const char *prefix) struct drm_printer *printer)
{ {
struct amdgpu_vram_mgr *mgr = man->priv; struct amdgpu_vram_mgr *mgr = man->priv;
struct drm_printer p = drm_debug_printer(prefix);
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
drm_mm_print(&mgr->mm, &p); drm_mm_print(&mgr->mm, printer);
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);
drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
man->size, amdgpu_vram_mgr_usage(man) >> 20,
amdgpu_vram_mgr_vis_usage(man) >> 20);
} }
const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
......
...@@ -2157,7 +2157,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) ...@@ -2157,7 +2157,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
const struct cs_section_def *sect = NULL; const struct cs_section_def *sect = NULL;
const struct cs_extent_def *ext = NULL; const struct cs_extent_def *ext = NULL;
int r, i; int r, i, tmp;
/* init the CP */ /* init the CP */
WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
...@@ -2165,7 +2165,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) ...@@ -2165,7 +2165,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true); gfx_v9_0_cp_gfx_enable(adev, true);
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) { if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
return r; return r;
...@@ -2203,6 +2203,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) ...@@ -2203,6 +2203,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, 0x8000); amdgpu_ring_write(ring, 0x8000);
amdgpu_ring_write(ring, 0x8000); amdgpu_ring_write(ring, 0x8000);
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, tmp);
amdgpu_ring_write(ring, 0);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
return 0; return 0;
......
...@@ -124,7 +124,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ...@@ -124,7 +124,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{ {
uint32_t tmp; uint32_t tmp, field;
/* Setup L2 cache */ /* Setup L2 cache */
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
...@@ -143,9 +143,10 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) ...@@ -143,9 +143,10 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
field = adev->vm_manager.fragment_size;
tmp = mmVM_L2_CNTL3_DEFAULT; tmp = mmVM_L2_CNTL3_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT; tmp = mmVM_L2_CNTL4_DEFAULT;
......
...@@ -461,6 +461,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) ...@@ -461,6 +461,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{ {
int r, i; int r, i;
u32 field;
if (adev->gart.robj == NULL) { if (adev->gart.robj == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
...@@ -488,10 +489,12 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) ...@@ -488,10 +489,12 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL2, WREG32(mmVM_L2_CNTL2,
VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK |
VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK);
field = adev->vm_manager.fragment_size;
WREG32(mmVM_L2_CNTL3, WREG32(mmVM_L2_CNTL3,
VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
(4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) | (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
(4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
/* setup context0 */ /* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
...@@ -811,7 +814,7 @@ static int gmc_v6_0_sw_init(void *handle) ...@@ -811,7 +814,7 @@ static int gmc_v6_0_sw_init(void *handle)
if (r) if (r)
return r; return r;
amdgpu_vm_adjust_size(adev, 64); amdgpu_vm_adjust_size(adev, 64, 4);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
adev->mc.mc_mask = 0xffffffffffULL; adev->mc.mc_mask = 0xffffffffffULL;
......
...@@ -562,7 +562,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) ...@@ -562,7 +562,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
{ {
int r, i; int r, i;
u32 tmp; u32 tmp, field;
if (adev->gart.robj == NULL) { if (adev->gart.robj == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
...@@ -592,10 +592,12 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) ...@@ -592,10 +592,12 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32(mmVM_L2_CNTL2, tmp); WREG32(mmVM_L2_CNTL2, tmp);
field = adev->vm_manager.fragment_size;
tmp = RREG32(mmVM_L2_CNTL3); tmp = RREG32(mmVM_L2_CNTL3);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
WREG32(mmVM_L2_CNTL3, tmp); WREG32(mmVM_L2_CNTL3, tmp);
/* setup context0 */ /* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
...@@ -948,7 +950,7 @@ static int gmc_v7_0_sw_init(void *handle) ...@@ -948,7 +950,7 @@ static int gmc_v7_0_sw_init(void *handle)
* Currently set to 4GB ((1 << 20) 4k pages). * Currently set to 4GB ((1 << 20) 4k pages).
* Max GPUVM size for cayman and SI is 40 bits. * Max GPUVM size for cayman and SI is 40 bits.
*/ */
amdgpu_vm_adjust_size(adev, 64); amdgpu_vm_adjust_size(adev, 64, 4);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
/* Set the internal MC address mask /* Set the internal MC address mask
......
...@@ -762,7 +762,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) ...@@ -762,7 +762,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
{ {
int r, i; int r, i;
u32 tmp; u32 tmp, field;
if (adev->gart.robj == NULL) { if (adev->gart.robj == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
...@@ -793,10 +793,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) ...@@ -793,10 +793,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32(mmVM_L2_CNTL2, tmp); WREG32(mmVM_L2_CNTL2, tmp);
field = adev->vm_manager.fragment_size;
tmp = RREG32(mmVM_L2_CNTL3); tmp = RREG32(mmVM_L2_CNTL3);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
WREG32(mmVM_L2_CNTL3, tmp); WREG32(mmVM_L2_CNTL3, tmp);
/* XXX: set to enable PTE/PDE in system memory */ /* XXX: set to enable PTE/PDE in system memory */
tmp = RREG32(mmVM_L2_CNTL4); tmp = RREG32(mmVM_L2_CNTL4);
...@@ -1046,7 +1048,7 @@ static int gmc_v8_0_sw_init(void *handle) ...@@ -1046,7 +1048,7 @@ static int gmc_v8_0_sw_init(void *handle)
* Currently set to 4GB ((1 << 20) 4k pages). * Currently set to 4GB ((1 << 20) 4k pages).
* Max GPUVM size for cayman and SI is 40 bits. * Max GPUVM size for cayman and SI is 40 bits.
*/ */
amdgpu_vm_adjust_size(adev, 64); amdgpu_vm_adjust_size(adev, 64, 4);
adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
/* Set the internal MC address mask /* Set the internal MC address mask
......
...@@ -541,9 +541,10 @@ static int gmc_v9_0_sw_init(void *handle) ...@@ -541,9 +541,10 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.vm_size = 1U << 18; adev->vm_manager.vm_size = 1U << 18;
adev->vm_manager.block_size = 9; adev->vm_manager.block_size = 9;
adev->vm_manager.num_level = 3; adev->vm_manager.num_level = 3;
amdgpu_vm_set_fragment_size(adev, 9);
} else { } else {
/* vm_size is 64GB for legacy 2-level page support*/ /* vm_size is 64GB for legacy 2-level page support */
amdgpu_vm_adjust_size(adev, 64); amdgpu_vm_adjust_size(adev, 64, 9);
adev->vm_manager.num_level = 1; adev->vm_manager.num_level = 1;
} }
break; break;
...@@ -558,14 +559,16 @@ static int gmc_v9_0_sw_init(void *handle) ...@@ -558,14 +559,16 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.vm_size = 1U << 18; adev->vm_manager.vm_size = 1U << 18;
adev->vm_manager.block_size = 9; adev->vm_manager.block_size = 9;
adev->vm_manager.num_level = 3; adev->vm_manager.num_level = 3;
amdgpu_vm_set_fragment_size(adev, 9);
break; break;
default: default:
break; break;
} }
DRM_INFO("vm size is %llu GB, block size is %u-bit\n", DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n",
adev->vm_manager.vm_size, adev->vm_manager.vm_size,
adev->vm_manager.block_size); adev->vm_manager.block_size,
adev->vm_manager.fragment_size);
/* This interrupt is VMC page fault.*/ /* This interrupt is VMC page fault.*/
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
......
...@@ -138,7 +138,7 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ...@@ -138,7 +138,7 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{ {
uint32_t tmp; uint32_t tmp, field;
/* Setup L2 cache */ /* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
...@@ -157,9 +157,10 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) ...@@ -157,9 +157,10 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
field = adev->vm_manager.fragment_size;
tmp = mmVM_L2_CNTL3_DEFAULT; tmp = mmVM_L2_CNTL3_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT; tmp = mmVM_L2_CNTL4_DEFAULT;
......
...@@ -250,6 +250,7 @@ ...@@ -250,6 +250,7 @@
#define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
#define PACKET3_SCRATCH_RAM_WRITE 0x7D #define PACKET3_SCRATCH_RAM_WRITE 0x7D
#define PACKET3_SCRATCH_RAM_READ 0x7E #define PACKET3_SCRATCH_RAM_READ 0x7E
#define PACKET3_LOAD_CONST_RAM 0x80 #define PACKET3_LOAD_CONST_RAM 0x80
......
...@@ -179,7 +179,8 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, ...@@ -179,7 +179,8 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
} }
static void static void
nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) nouveau_gart_manager_debug(struct ttm_mem_type_manager *man,
struct drm_printer *printer)
{ {
} }
...@@ -252,7 +253,8 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man, ...@@ -252,7 +253,8 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
} }
static void static void
nv04_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) nv04_gart_manager_debug(struct ttm_mem_type_manager *man,
struct drm_printer *printer)
{ {
} }
......
...@@ -1030,19 +1030,17 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma) ...@@ -1030,19 +1030,17 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma)
static int radeon_mm_dump_table(struct seq_file *m, void *data) static int radeon_mm_dump_table(struct seq_file *m, void *data)
{ {
struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_info_node *node = (struct drm_info_node *)m->private;
unsigned ttm_pl = *(int *)node->info_ent->data; unsigned ttm_pl = *(int*)node->info_ent->data;
struct drm_device *dev = node->minor->dev; struct drm_device *dev = node->minor->dev;
struct radeon_device *rdev = dev->dev_private; struct radeon_device *rdev = dev->dev_private;
struct drm_mm *mm = (struct drm_mm *)rdev->mman.bdev.man[ttm_pl].priv; struct ttm_mem_type_manager *man = &rdev->mman.bdev.man[ttm_pl];
struct ttm_bo_global *glob = rdev->mman.bdev.glob;
struct drm_printer p = drm_seq_file_printer(m); struct drm_printer p = drm_seq_file_printer(m);
spin_lock(&glob->lru_lock); man->func->debug(man, &p);
drm_mm_print(mm, &p);
spin_unlock(&glob->lru_lock);
return 0; return 0;
} }
static int ttm_pl_vram = TTM_PL_VRAM; static int ttm_pl_vram = TTM_PL_VRAM;
static int ttm_pl_tt = TTM_PL_TT; static int ttm_pl_tt = TTM_PL_TT;
......
...@@ -70,6 +70,7 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place, ...@@ -70,6 +70,7 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place,
static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
{ {
struct ttm_mem_type_manager *man = &bdev->man[mem_type]; struct ttm_mem_type_manager *man = &bdev->man[mem_type];
struct drm_printer p = drm_debug_printer(TTM_PFX);
pr_err(" has_type: %d\n", man->has_type); pr_err(" has_type: %d\n", man->has_type);
pr_err(" use_type: %d\n", man->use_type); pr_err(" use_type: %d\n", man->use_type);
...@@ -79,7 +80,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) ...@@ -79,7 +80,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
pr_err(" available_caching: 0x%08X\n", man->available_caching); pr_err(" available_caching: 0x%08X\n", man->available_caching);
pr_err(" default_caching: 0x%08X\n", man->default_caching); pr_err(" default_caching: 0x%08X\n", man->default_caching);
if (mem_type != TTM_PL_SYSTEM) if (mem_type != TTM_PL_SYSTEM)
(*man->func->debug)(man, TTM_PFX); (*man->func->debug)(man, &p);
} }
static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
...@@ -394,14 +395,33 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) ...@@ -394,14 +395,33 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
ww_mutex_unlock (&bo->resv->lock); ww_mutex_unlock (&bo->resv->lock);
} }
static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
{
int r;
if (bo->resv == &bo->ttm_resv)
return 0;
reservation_object_init(&bo->ttm_resv);
BUG_ON(!reservation_object_trylock(&bo->ttm_resv));
r = reservation_object_copy_fences(&bo->ttm_resv, bo->resv);
if (r) {
reservation_object_unlock(&bo->ttm_resv);
reservation_object_fini(&bo->ttm_resv);
}
return r;
}
static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
{ {
struct reservation_object_list *fobj; struct reservation_object_list *fobj;
struct dma_fence *fence; struct dma_fence *fence;
int i; int i;
fobj = reservation_object_get_list(bo->resv); fobj = reservation_object_get_list(&bo->ttm_resv);
fence = reservation_object_get_excl(bo->resv); fence = reservation_object_get_excl(&bo->ttm_resv);
if (fence && !fence->ops->signaled) if (fence && !fence->ops->signaled)
dma_fence_enable_sw_signaling(fence); dma_fence_enable_sw_signaling(fence);
...@@ -430,8 +450,19 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ...@@ -430,8 +450,19 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
ttm_bo_cleanup_memtype_use(bo); ttm_bo_cleanup_memtype_use(bo);
return; return;
} else }
ttm_bo_flush_all_fences(bo);
ret = ttm_bo_individualize_resv(bo);
if (ret) {
/* Last resort, if we fail to allocate memory for the
* fences block for the BO to become idle and free it.
*/
spin_unlock(&glob->lru_lock);
ttm_bo_wait(bo, true, true);
ttm_bo_cleanup_memtype_use(bo);
return;
}
ttm_bo_flush_all_fences(bo);
/* /*
* Make NO_EVICT bos immediately available to * Make NO_EVICT bos immediately available to
...@@ -443,6 +474,8 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) ...@@ -443,6 +474,8 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
ttm_bo_add_to_lru(bo); ttm_bo_add_to_lru(bo);
} }
if (bo->resv != &bo->ttm_resv)
reservation_object_unlock(&bo->ttm_resv);
__ttm_bo_unreserve(bo); __ttm_bo_unreserve(bo);
} }
...@@ -471,17 +504,25 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, ...@@ -471,17 +504,25 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
bool no_wait_gpu) bool no_wait_gpu)
{ {
struct ttm_bo_global *glob = bo->glob; struct ttm_bo_global *glob = bo->glob;
struct reservation_object *resv;
int ret; int ret;
ret = ttm_bo_wait(bo, false, true); if (unlikely(list_empty(&bo->ddestroy)))
resv = bo->resv;
else
resv = &bo->ttm_resv;
if (reservation_object_test_signaled_rcu(resv, true))
ret = 0;
else
ret = -EBUSY;
if (ret && !no_wait_gpu) { if (ret && !no_wait_gpu) {
long lret; long lret;
ww_mutex_unlock(&bo->resv->lock); ww_mutex_unlock(&bo->resv->lock);
spin_unlock(&glob->lru_lock); spin_unlock(&glob->lru_lock);
lret = reservation_object_wait_timeout_rcu(bo->resv, lret = reservation_object_wait_timeout_rcu(resv, true,
true,
interruptible, interruptible,
30 * HZ); 30 * HZ);
...@@ -505,13 +546,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, ...@@ -505,13 +546,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
spin_unlock(&glob->lru_lock); spin_unlock(&glob->lru_lock);
return 0; return 0;
} }
/*
* remove sync_obj with ttm_bo_wait, the wait should be
* finished, and no new wait object should have been added.
*/
ret = ttm_bo_wait(bo, false, true);
WARN_ON(ret);
} }
if (ret || unlikely(list_empty(&bo->ddestroy))) { if (ret || unlikely(list_empty(&bo->ddestroy))) {
......
...@@ -136,13 +136,12 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man) ...@@ -136,13 +136,12 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
} }
static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
const char *prefix) struct drm_printer *printer)
{ {
struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv; struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
struct drm_printer p = drm_debug_printer(prefix);
spin_lock(&rman->lock); spin_lock(&rman->lock);
drm_mm_print(&rman->mm, &p); drm_mm_print(&rman->mm, printer);
spin_unlock(&rman->lock); spin_unlock(&rman->lock);
} }
......
...@@ -615,7 +615,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, ...@@ -615,7 +615,7 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool,
} else { } else {
pr_err("Failed to fill pool (%p)\n", pool); pr_err("Failed to fill pool (%p)\n", pool);
/* If we have any pages left put them to the pool. */ /* If we have any pages left put them to the pool. */
list_for_each_entry(p, &pool->list, lru) { list_for_each_entry(p, &new_pages, lru) {
++cpages; ++cpages;
} }
list_splice(&new_pages, &pool->list); list_splice(&new_pages, &pool->list);
......
...@@ -192,7 +192,7 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man) ...@@ -192,7 +192,7 @@ static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
} }
static void ttm_bo_man_debug(struct ttm_mem_type_manager *man, static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
const char *prefix) struct drm_printer *printer)
{ {
} }
......
...@@ -157,9 +157,9 @@ static int vmw_gmrid_man_takedown(struct ttm_mem_type_manager *man) ...@@ -157,9 +157,9 @@ static int vmw_gmrid_man_takedown(struct ttm_mem_type_manager *man)
} }
static void vmw_gmrid_man_debug(struct ttm_mem_type_manager *man, static void vmw_gmrid_man_debug(struct ttm_mem_type_manager *man,
const char *prefix) struct drm_printer *printer)
{ {
pr_info("%s: No debug info available for the GMR id manager\n", prefix); drm_printf(printer, "No debug info available for the GMR id manager\n");
} }
const struct ttm_mem_type_manager_func vmw_gmrid_manager_func = { const struct ttm_mem_type_manager_func vmw_gmrid_manager_func = {
......
...@@ -229,13 +229,14 @@ struct ttm_mem_type_manager_func { ...@@ -229,13 +229,14 @@ struct ttm_mem_type_manager_func {
* struct ttm_mem_type_manager member debug * struct ttm_mem_type_manager member debug
* *
* @man: Pointer to a memory type manager. * @man: Pointer to a memory type manager.
* @prefix: Prefix to be used in printout to identify the caller. * @printer: Prefix to be used in printout to identify the caller.
* *
* This function is called to print out the state of the memory * This function is called to print out the state of the memory
* type manager to aid debugging of out-of-memory conditions. * type manager to aid debugging of out-of-memory conditions.
* It may not be called from within atomic context. * It may not be called from within atomic context.
*/ */
void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); void (*debug)(struct ttm_mem_type_manager *man,
struct drm_printer *printer);
}; };
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment