Commit 5263925c authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next

First radeon and amdgpu pull request for 4.6.  Highlights:
- ACP support for APUs with i2s audio
- CS ioctl optimizations
- GPU scheduler optimizations
- GPUVM optimizations
- Initial GPU reset support (not enabled yet)
- New powerplay sysfs interface for manually selecting clocks
- Powerplay fixes
- Virtualization fixes
- Removal of hw semaphore support
- Lots of other misc fixes and cleanups

* 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits)
  drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy
  drm/amdgpu: Fix race condition in amdgpu_mn_unregister
  drm/amdgpu: cleanup gem init/finit
  drm/amdgpu: rework GEM info printing
  drm/amdgpu: print the GPU offset as well in gem_info
  drm/amdgpu: optionally print the pin count in gem_info as well
  drm/amdgpu: print the BO size only once in amdgpu_gem_info
  drm/amdgpu: print pid as integer
  drm/amdgpu: remove page flip work queue v3
  drm/amdgpu: stop blocking for page filp fences
  drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code
  drm/amdgpu: remove fence reset detection leftovers
  drm/amdgpu: Fix race condition in MMU notifier release
  drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled
  drm/amdgpu/vi: move uvd tiling config setup into uvd code
  drm/amdgpu/vi: move sdma tiling config setup into sdma code
  drm/amdgpu/cik: move uvd tiling config setup into uvd code
  drm/amdgpu/cik: move sdma tiling config setup into sdma code
  drm/amdgpu/gfx7: rework gpu_init()
  drm/amdgpu/gfx: clean up harvest configuration (v2)
  ...
parents 08244c00 390be282
......@@ -172,6 +172,8 @@ config DRM_AMDGPU
source "drivers/gpu/drm/amd/amdgpu/Kconfig"
source "drivers/gpu/drm/amd/powerplay/Kconfig"
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/nouveau/Kconfig"
config DRM_I810
......
menu "ACP Configuration"
config DRM_AMD_ACP
bool "Enable ACP IP support"
default y
select MFD_CORE
select PM_GENERIC_DOMAINS if PM
help
Choose this option to enable ACP IP support for AMD SOCs.
endmenu
#
# Makefile for the ACP, which is a sub-component
# of AMDSOC/AMDGPU drm driver.
# It provides the HW control for ACP related functionalities.
subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include "acp_gfx_if.h"
#define ACP_MODE_I2S 0
#define ACP_MODE_AZ 1
#define mmACP_AZALIA_I2S_SELECT 0x51d4
int amd_acp_hw_init(void *cgs_device,
unsigned acp_version_major, unsigned acp_version_minor)
{
unsigned int acp_mode = ACP_MODE_I2S;
if ((acp_version_major == 2) && (acp_version_minor == 2))
acp_mode = cgs_read_register(cgs_device,
mmACP_AZALIA_I2S_SELECT);
if (acp_mode != ACP_MODE_I2S)
return -ENODEV;
return 0;
}
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H
#include <linux/types.h>
#include "cgs_linux.h"
#include "cgs_common.h"
int amd_acp_hw_init(void *cgs_device,
unsigned acp_version_major, unsigned acp_version_minor);
#endif /* _ACP_GFX_IF_H */
......@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/include \
-I$(FULL_AMD_PATH)/amdgpu \
-I$(FULL_AMD_PATH)/scheduler \
-I$(FULL_AMD_PATH)/powerplay/inc
-I$(FULL_AMD_PATH)/powerplay/inc \
-I$(FULL_AMD_PATH)/acp/include
amdgpu-y := amdgpu_drv.o
......@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
......@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o
amdgpu-y += \
../scheduler/gpu_scheduler.o \
../scheduler/sched_fence.o \
amdgpu_sched.o
amdgpu_job.o
# ACP componet
ifneq ($(CONFIG_DRM_AMD_ACP),)
amdgpu-y += amdgpu_acp.o
AMDACPPATH := ../acp
include $(FULL_AMD_PATH)/acp/Makefile
amdgpu-y += $(AMD_ACP_FILES)
endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef __AMDGPU_ACP_H__
#define __AMDGPU_ACP_H__
#include <linux/mfd/core.h>
struct amdgpu_acp {
struct device *parent;
void *cgs_device;
struct amd_acp_private *private;
struct mfd_cell *acp_cell;
struct resource *acp_res;
struct acp_pm_domain *acp_genpd;
};
extern const struct amd_ip_funcs acp_ip_funcs;
#endif /* __AMDGPU_ACP_H__ */
......@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
return -EINVAL;
}
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
{
int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
u8 frev, crev;
u16 data_offset, size;
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
&frev, &crev, &data_offset))
return true;
return false;
}
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
{
uint32_t bios_6_scratch;
......
......@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
u8 module_index,
struct atom_mc_reg_table *reg_table);
bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
......
......@@ -32,6 +32,9 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
struct amdgpu_bo_list **result,
int *id)
......@@ -90,6 +93,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
bool has_userptr = false;
unsigned i;
int r;
array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
if (!array)
......@@ -99,31 +103,34 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
for (i = 0; i < num_entries; ++i) {
struct amdgpu_bo_list_entry *entry = &array[i];
struct drm_gem_object *gobj;
struct mm_struct *usermm;
gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
if (!gobj)
if (!gobj) {
r = -ENOENT;
goto error_free;
}
entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
drm_gem_object_unreference_unlocked(gobj);
entry->priority = info[i].bo_priority;
entry->prefered_domains = entry->robj->initial_domain;
entry->allowed_domains = entry->prefered_domains;
if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) {
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm);
if (usermm) {
if (usermm != current->mm) {
r = -EPERM;
goto error_free;
}
has_userptr = true;
entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
}
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
gds_obj = entry->robj;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
gws_obj = entry->robj;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
oa_obj = entry->robj;
trace_amdgpu_bo_list_set(list, entry->robj);
......@@ -145,7 +152,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
error_free:
drm_free_large(array);
return -ENOENT;
return r;
}
struct amdgpu_bo_list *
......@@ -161,6 +168,36 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
return result;
}
void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
struct list_head *validated)
{
/* This is based on the bucket sort with O(n) time complexity.
* An item with priority "i" is added to bucket[i]. The lists are then
* concatenated in descending order.
*/
struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
unsigned i;
for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
INIT_LIST_HEAD(&bucket[i]);
/* Since buffers which appear sooner in the relocation list are
* likely to be used more often than buffers which appear later
* in the list, the sort mustn't change the ordering of buffers
* with the same priority, i.e. it must be stable.
*/
for (i = 0; i < list->num_entries; i++) {
unsigned priority = list->array[i].priority;
list_add_tail(&list->array[i].tv.head,
&bucket[priority]);
}
/* Connect the sorted buckets in the output list. */
for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
list_splice(&bucket[i], validated);
}
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
mutex_unlock(&list->lock);
......
This diff is collapsed.
......@@ -25,8 +25,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
struct amdgpu_ctx *ctx)
static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
{
unsigned i, j;
int r;
......@@ -35,27 +34,22 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs *
AMDGPU_MAX_RINGS, GFP_KERNEL);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
sizeof(struct fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) *
amdgpu_sched_jobs * i;
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
}
if (amdgpu_enable_scheduler) {
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct amd_sched_rq *rq;
if (pri >= AMD_SCHED_MAX_PRIORITY) {
kfree(ctx->fences);
return -EINVAL;
}
rq = &adev->rings[i]->sched.sched_rq[pri];
r = amd_sched_entity_init(&adev->rings[i]->sched,
&ctx->rings[i].entity,
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
rq, amdgpu_sched_jobs);
if (r)
break;
......@@ -68,11 +62,10 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
kfree(ctx->fences);
return r;
}
}
return 0;
}
void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
{
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
......@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
fence_put(ctx->rings[i].fences[j]);
kfree(ctx->fences);
if (amdgpu_enable_scheduler) {
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity);
}
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
......@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
return r;
}
*id = (uint32_t)r;
r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx);
r = amdgpu_ctx_init(adev, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
......
......@@ -635,31 +635,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev)
}
/**
* amdgpu_boot_test_post_card - check and possibly initialize the hw
*
* @adev: amdgpu_device pointer
*
* Check if the asic is initialized and if not, attempt to initialize
* it (all asics).
* Returns true if initialized or false if not.
*/
bool amdgpu_boot_test_post_card(struct amdgpu_device *adev)
{
if (amdgpu_card_posted(adev))
return true;
if (adev->bios) {
DRM_INFO("GPU not posted. posting now...\n");
if (adev->is_atom_bios)
amdgpu_atom_asic_init(adev->mode_info.atom_context);
return true;
} else {
dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
return false;
}
}
/**
* amdgpu_dummy_page_init - init dummy page used by the driver
*
......@@ -959,12 +934,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
}
/* vramlimit must be a power of two */
if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) {
dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n",
amdgpu_vram_limit);
amdgpu_vram_limit = 0;
}
if (amdgpu_gart_size != -1) {
/* gtt size must be power of two and greater or equal to 32M */
......@@ -1434,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_funcs_ring = NULL;
adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL;
adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
......@@ -1455,9 +1424,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* mutex initialization are all done here so we
* can recall function without having locking issues */
mutex_init(&adev->ring_lock);
mutex_init(&adev->vm_manager.lock);
atomic_set(&adev->irq.ih.lock, 0);
mutex_init(&adev->gem.mutex);
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
......@@ -1531,8 +1499,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
/* See if the asic supports SR-IOV */
adev->virtualization.supports_sr_iov =
amdgpu_atombios_has_gpu_virtualization_table(adev);
/* Post card if necessary */
if (!amdgpu_card_posted(adev)) {
if (!amdgpu_card_posted(adev) ||
adev->virtualization.supports_sr_iov) {
if (!adev->bios) {
dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
return -EINVAL;
......@@ -1577,11 +1550,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx);
if (r) {
dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
return r;
}
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
......@@ -1645,7 +1613,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->shutdown = true;
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
amdgpu_ctx_fini(&adev->kernel_ctx);
amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev);
......@@ -1889,6 +1856,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
retry:
r = amdgpu_asic_reset(adev);
/* post card */
amdgpu_atom_asic_init(adev->mode_info.atom_context);
if (!r) {
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_resume(adev);
......
......@@ -35,32 +35,30 @@
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
{
struct amdgpu_flip_work *work =
container_of(cb, struct amdgpu_flip_work, cb);
fence_put(f);
schedule_work(&work->flip_work);
}
static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
struct fence **f)
{
struct amdgpu_fence *fence;
long r;
struct fence *fence= *f;
if (*f == NULL)
return;
if (fence == NULL)
return false;
fence = to_amdgpu_fence(*f);
if (fence) {
r = fence_wait(&fence->base, false);
if (r == -EDEADLK)
r = amdgpu_gpu_reset(adev);
} else
r = fence_wait(*f, false);
*f = NULL;
if (r)
DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r);
if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
return true;
/* We continue with the page flip even if we failed to wait on
* the fence, otherwise the DRM core and userspace will be
* confused about which BO the CRTC is scanning out
*/
fence_put(*f);
*f = NULL;
return false;
}
static void amdgpu_flip_work_func(struct work_struct *__work)
......@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
int vpos, hpos, stat, min_udelay;
struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
amdgpu_flip_wait_fence(adev, &work->excl);
if (amdgpu_flip_handle_fence(work, &work->excl))
return;
for (i = 0; i < work->shared_count; ++i)
amdgpu_flip_wait_fence(adev, &work->shared[i]);
if (amdgpu_flip_handle_fence(work, &work->shared[i]))
return;
/* We borrow the event spin lock for protecting flip_status */
spin_lock_irqsave(&crtc->dev->event_lock, flags);
......@@ -118,12 +119,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
spin_lock_irqsave(&crtc->dev->event_lock, flags);
};
/* do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
/* set the flip status */
amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
/* Do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
}
/*
......@@ -242,7 +243,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
/* update crtc fb */
crtc->primary->fb = fb;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
queue_work(amdgpu_crtc->pflip_queue, &work->flip_work);
amdgpu_flip_work_func(&work->flip_work);
return 0;
vblank_cleanup:
......
......@@ -69,7 +69,6 @@ int amdgpu_dpm = -1;
int amdgpu_smc_load_fw = 1;
int amdgpu_aspm = -1;
int amdgpu_runtime_pm = -1;
int amdgpu_hard_reset = 0;
unsigned amdgpu_ip_block_mask = 0xffffffff;
int amdgpu_bapm = -1;
int amdgpu_deep_color = 0;
......@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0;
int amdgpu_exp_hw_support = 0;
int amdgpu_enable_scheduler = 1;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
int amdgpu_enable_semaphores = 0;
int amdgpu_powerplay = -1;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
......@@ -126,9 +123,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
module_param_named(hard_reset, amdgpu_hard_reset, int, 0444);
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
......@@ -153,18 +147,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
#ifdef CONFIG_DRM_AMD_POWERPLAY
MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
module_param_named(powerplay, amdgpu_powerplay, int, 0444);
......
......@@ -107,7 +107,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
if ((*fence) == NULL) {
return -ENOMEM;
}
(*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
(*fence)->seq = ++ring->fence_drv.sync_seq;
(*fence)->ring = ring;
(*fence)->owner = owner;
fence_init(&(*fence)->base, &amdgpu_fence_ops,
......@@ -171,7 +171,7 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
*/
last_seq = atomic64_read(&ring->fence_drv.last_seq);
do {
last_emitted = ring->fence_drv.sync_seq[ring->idx];
last_emitted = ring->fence_drv.sync_seq;
seq = amdgpu_fence_read(ring);
seq |= last_seq & 0xffffffff00000000LL;
if (seq < last_seq) {
......@@ -260,34 +260,28 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
}
/*
* amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
* amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
* @ring: ring to wait on for the seq number
* @seq: seq number wait for
*
* return value:
* 0: seq signaled, and gpu not hang
* -EDEADL: GPU hang detected
* -EINVAL: some paramter is not valid
*/
static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
{
bool signaled = false;
BUG_ON(!ring);
if (seq > ring->fence_drv.sync_seq[ring->idx])
if (seq > ring->fence_drv.sync_seq)
return -EINVAL;
if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
return 0;
amdgpu_fence_schedule_fallback(ring);
wait_event(ring->fence_drv.fence_queue, (
(signaled = amdgpu_fence_seq_signaled(ring, seq))));
wait_event(ring->fence_drv.fence_queue,
amdgpu_fence_seq_signaled(ring, seq));
if (signaled)
return 0;
else
return -EDEADLK;
}
/**
......@@ -304,7 +298,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
{
uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
if (seq >= ring->fence_drv.sync_seq[ring->idx])
if (seq >= ring->fence_drv.sync_seq)
return -ENOENT;
return amdgpu_fence_ring_wait_seq(ring, seq);
......@@ -322,7 +316,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
*/
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
{
uint64_t seq = ring->fence_drv.sync_seq[ring->idx];
uint64_t seq = ring->fence_drv.sync_seq;
if (!seq)
return 0;
......@@ -347,7 +341,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here.
*/
amdgpu_fence_process(ring);
emitted = ring->fence_drv.sync_seq[ring->idx]
emitted = ring->fence_drv.sync_seq
- atomic64_read(&ring->fence_drv.last_seq);
/* to avoid 32bits warp around */
if (emitted > 0x10000000)
......@@ -356,68 +350,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
return (unsigned)emitted;
}
/**
* amdgpu_fence_need_sync - do we need a semaphore
*
* @fence: amdgpu fence object
* @dst_ring: which ring to check against
*
* Check if the fence needs to be synced against another ring
* (all asics). If so, we need to emit a semaphore.
* Returns true if we need to sync with another ring, false if
* not.
*/
bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
struct amdgpu_ring *dst_ring)
{
struct amdgpu_fence_driver *fdrv;
if (!fence)
return false;
if (fence->ring == dst_ring)
return false;
/* we are protected by the ring mutex */
fdrv = &dst_ring->fence_drv;
if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
return false;
return true;
}
/**
* amdgpu_fence_note_sync - record the sync point
*
* @fence: amdgpu fence object
* @dst_ring: which ring to check against
*
* Note the sequence number at which point the fence will
* be synced with the requested ring (all asics).
*/
void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
struct amdgpu_ring *dst_ring)
{
struct amdgpu_fence_driver *dst, *src;
unsigned i;
if (!fence)
return;
if (fence->ring == dst_ring)
return;
/* we are protected by the ring mutex */
src = &fence->ring->fence_drv;
dst = &dst_ring->fence_drv;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
if (i == dst_ring->idx)
continue;
dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
}
}
/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.
......@@ -471,13 +403,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
*/
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
int i, r;
long timeout;
int r;
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
ring->fence_drv.sync_seq[i] = 0;
ring->fence_drv.sync_seq = 0;
atomic64_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
......@@ -486,8 +417,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
init_waitqueue_head(&ring->fence_drv.fence_queue);
if (amdgpu_enable_scheduler) {
long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
if (timeout == 0) {
/*
* FIXME:
......@@ -506,7 +436,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
ring->name);
return r;
}
}
return 0;
}
......@@ -552,7 +481,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
kmem_cache_destroy(amdgpu_fence_slab);
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
......@@ -570,7 +498,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
del_timer_sync(&ring->fence_drv.fallback_timer);
ring->fence_drv.initialized = false;
}
mutex_unlock(&adev->ring_lock);
}
/**
......@@ -585,7 +512,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
{
int i, r;
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
......@@ -602,7 +528,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
mutex_unlock(&adev->ring_lock);
}
/**
......@@ -621,7 +546,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
{
int i;
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
......@@ -631,7 +555,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
mutex_unlock(&adev->ring_lock);
}
/**
......@@ -651,7 +574,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
}
}
......@@ -781,7 +704,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
int i, j;
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
......@@ -794,28 +717,38 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
seq_printf(m, "Last signaled fence 0x%016llx\n",
(unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
seq_printf(m, "Last emitted 0x%016llx\n",
ring->fence_drv.sync_seq[i]);
for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
struct amdgpu_ring *other = adev->rings[j];
if (i != j && other && other->fence_drv.initialized &&
ring->fence_drv.sync_seq[j])
seq_printf(m, "Last sync to ring %d 0x%016llx\n",
j, ring->fence_drv.sync_seq[j]);
}
ring->fence_drv.sync_seq);
}
return 0;
}
/**
* amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
*
* Manually trigger a gpu reset at the next fence wait.
*/
static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
seq_printf(m, "gpu reset\n");
amdgpu_gpu_reset(adev);
return 0;
}
static struct drm_info_list amdgpu_debugfs_fence_list[] = {
{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
};
#endif
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
#else
return 0;
#endif
......
......@@ -83,24 +83,32 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
return r;
}
*obj = &robj->gem_base;
robj->pid = task_pid_nr(current);
mutex_lock(&adev->gem.mutex);
list_add_tail(&robj->list, &adev->gem.objects);
mutex_unlock(&adev->gem.mutex);
return 0;
}
int amdgpu_gem_init(struct amdgpu_device *adev)
void amdgpu_gem_force_release(struct amdgpu_device *adev)
{
INIT_LIST_HEAD(&adev->gem.objects);
return 0;
}
struct drm_device *ddev = adev->ddev;
struct drm_file *file;
void amdgpu_gem_fini(struct amdgpu_device *adev)
{
amdgpu_bo_force_delete(adev);
mutex_lock(&ddev->struct_mutex);
list_for_each_entry(file, &ddev->filelist, lhead) {
struct drm_gem_object *gobj;
int handle;
WARN_ONCE(1, "Still active user space clients!\n");
spin_lock(&file->table_lock);
idr_for_each_entry(&file->object_idr, gobj, handle) {
WARN_ONCE(1, "And also active allocations!\n");
drm_gem_object_unreference(gobj);
}
idr_destroy(&file->object_idr);
spin_unlock(&file->table_lock);
}
mutex_unlock(&ddev->struct_mutex);
}
/*
......@@ -252,6 +260,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto handle_lockup;
bo = gem_to_amdgpu_bo(gobj);
bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
if (r)
goto release_object;
......@@ -308,7 +318,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
return -ENOENT;
}
robj = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) ||
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
(robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
drm_gem_object_unreference_unlocked(gobj);
return -EPERM;
......@@ -628,7 +638,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.bo_size = robj->gem_base.size;
info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
info.domains = robj->initial_domain;
info.domains = robj->prefered_domains;
info.domain_flags = robj->flags;
amdgpu_bo_unreserve(robj);
if (copy_to_user(out, &info, sizeof(info)))
......@@ -636,14 +646,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
r = -EPERM;
amdgpu_bo_unreserve(robj);
break;
}
robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU);
robj->allowed_domains = robj->prefered_domains;
if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
amdgpu_bo_unreserve(robj);
break;
default:
......@@ -688,20 +702,17 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
}
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *rbo;
unsigned i = 0;
struct drm_gem_object *gobj = ptr;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct seq_file *m = data;
mutex_lock(&adev->gem.mutex);
list_for_each_entry(rbo, &adev->gem.objects, list) {
unsigned domain;
const char *placement;
unsigned pin_count;
domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type);
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
placement = "VRAM";
......@@ -714,12 +725,50 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
placement = " CPU";
break;
}
seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20,
placement, (unsigned long)rbo->pid);
i++;
seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
id, amdgpu_bo_size(bo), placement,
amdgpu_bo_gpu_offset(bo));
pin_count = ACCESS_ONCE(bo->pin_count);
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
seq_printf(m, "\n");
return 0;
}
static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct drm_file *file;
int r;
r = mutex_lock_interruptible(&dev->struct_mutex);
if (r)
return r;
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
/*
* Although we have a valid reference on file->pid, that does
* not guarantee that the task_struct who called get_pid() is
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
spin_lock(&file->table_lock);
idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
spin_unlock(&file->table_lock);
}
mutex_unlock(&adev->gem.mutex);
mutex_unlock(&dev->struct_mutex);
return 0;
}
......
......@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
* suballocator.
* Returns 0 on success, error on failure.
*/
int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib)
{
struct amdgpu_device *adev = ring->adev;
int r;
if (size) {
......@@ -75,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
}
amdgpu_sync_create(&ib->sync);
ib->ring = ring;
ib->vm = vm;
return 0;
......@@ -93,7 +89,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
*/
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
{
amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
if (ib->fence)
fence_put(&ib->fence->base);
......@@ -106,6 +101,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* @num_ibs: number of IBs to schedule
* @ibs: IB objects to schedule
* @owner: owner for creating the fences
* @f: fence created during this submission
*
* Schedule an IB on the associated ring (all asics).
* Returns 0 on success, error on failure.
......@@ -120,11 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* a CONST_IB), it will be put on the ring prior to the DE IB. Prior
* to SI there was just a DE IB.
*/
int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_ib *ibs, void *owner)
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, void *owner,
struct fence *last_vm_update,
struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
unsigned i;
......@@ -133,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
if (num_ibs == 0)
return -EINVAL;
ring = ibs->ring;
ctx = ibs->ctx;
vm = ibs->vm;
......@@ -141,36 +138,21 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
dev_err(adev->dev, "couldn't schedule ib\n");
return -EINVAL;
}
r = amdgpu_sync_wait(&ibs->sync);
if (r) {
dev_err(adev->dev, "IB sync failed (%d).\n", r);
return r;
}
r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
if (vm) {
/* grab a vm id if necessary */
r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
if (r) {
amdgpu_ring_unlock_undo(ring);
return r;
}
if (vm && !ibs->grabbed_vmid) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
r = amdgpu_sync_rings(&ibs->sync, ring);
r = amdgpu_ring_alloc(ring, 256 * num_ibs);
if (r) {
amdgpu_ring_unlock_undo(ring);
dev_err(adev->dev, "failed to sync rings (%d)\n", r);
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
if (vm) {
/* do context switch */
amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update);
amdgpu_vm_flush(ring, vm, last_vm_update);
if (ring->funcs->emit_gds_switch)
amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
......@@ -186,9 +168,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) {
if (ib->ctx != ctx || ib->vm != vm) {
ring->current_ctx = old_ctx;
amdgpu_ring_unlock_undo(ring);
amdgpu_ring_undo(ring);
return -EINVAL;
}
amdgpu_ring_emit_ib(ring, ib);
......@@ -199,14 +181,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
ring->current_ctx = old_ctx;
amdgpu_ring_unlock_undo(ring);
amdgpu_ring_undo(ring);
return r;
}
if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base);
/* wrap the last IB with fence */
if (ib->user) {
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
......@@ -215,10 +193,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
AMDGPU_FENCE_FLAG_64BIT);
}
if (ib->vm)
amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
if (f)
*f = fence_get(&ib->fence->base);
amdgpu_ring_unlock_commit(ring);
amdgpu_ring_commit(ring);
return 0;
}
......
......@@ -28,81 +28,132 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_job **job)
{
struct amdgpu_job *job = to_amdgpu_job(sched_job);
return amdgpu_sync_get_fence(&job->ibs->sync);
size_t size = sizeof(struct amdgpu_job);
if (num_ibs == 0)
return -EINVAL;
size += sizeof(struct amdgpu_ib) * num_ibs;
*job = kzalloc(size, GFP_KERNEL);
if (!*job)
return -ENOMEM;
(*job)->adev = adev;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
amdgpu_sync_create(&(*job)->sync);
return 0;
}
static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
struct amdgpu_job **job)
{
struct amdgpu_fence *fence = NULL;
struct amdgpu_job *job;
int r;
if (!sched_job) {
DRM_ERROR("job is null\n");
return NULL;
}
job = to_amdgpu_job(sched_job);
trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err;
}
r = amdgpu_job_alloc(adev, 1, job);
if (r)
return r;
fence = job->ibs[job->num_ibs - 1].fence;
fence_get(&fence->base);
r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
if (r)
kfree(*job);
err:
if (job->free_job)
job->free_job(job);
return r;
}
void amdgpu_job_free(struct amdgpu_job *job)
{
unsigned i;
for (i = 0; i < job->num_ibs; ++i)
amdgpu_ib_free(job->adev, &job->ibs[i]);
amdgpu_bo_unref(&job->uf.bo);
amdgpu_sync_free(&job->sync);
kfree(job);
return fence ? &fence->base : NULL;
}
struct amd_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_sched_dependency,
.run_job = amdgpu_sched_run_job,
};
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
int (*free_job)(struct amdgpu_job *),
void *owner,
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner,
struct fence **f)
{
int r = 0;
if (amdgpu_enable_scheduler) {
struct amdgpu_job *job =
kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job)
return -ENOMEM;
job->ring = ring;
job->base.sched = &ring->sched;
job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
job->base.s_entity = entity;
job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
if (!job->base.s_fence) {
kfree(job);
if (!job->base.s_fence)
return -ENOMEM;
}
*f = fence_get(&job->base.s_fence->base);
job->adev = adev;
job->ibs = ibs;
job->num_ibs = num_ibs;
job->owner = owner;
job->free_job = free_job;
amd_sched_entity_push_job(&job->base);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
return 0;
}
static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
{
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->ibs->vm;
struct fence *fence = amdgpu_sync_get_fence(&job->sync);
if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
struct amdgpu_ring *ring = job->ring;
int r;
r = amdgpu_vm_grab_id(vm, ring, &job->sync,
&job->base.s_fence->base);
if (r)
return r;
*f = fence_get(&ibs[num_ibs - 1].fence->base);
DRM_ERROR("Error getting VM ID (%d)\n", r);
else
job->ibs->grabbed_vmid = true;
fence = amdgpu_sync_get_fence(&job->sync);
}
return 0;
return fence;
}
static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
{
struct fence *fence = NULL;
struct amdgpu_job *job;
int r;
if (!sched_job) {
DRM_ERROR("job is null\n");
return NULL;
}
job = to_amdgpu_job(sched_job);
r = amdgpu_sync_wait(&job->sync);
if (r) {
DRM_ERROR("failed to sync wait (%d)\n", r);
return NULL;
}
trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner,
job->sync.last_vm_update, &fence);
if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err;
}
err:
amdgpu_job_free(job);
return fence;
}
struct amd_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_job_dependency,
.run_job = amdgpu_job_run,
};
......@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.max_memory_clock = adev->pm.default_mclk * 10;
}
dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
adev->gfx.config.max_shader_engines;
dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
dev_info._pad = 0;
dev_info.ids_flags = 0;
......@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
/* Get associated drm_crtc: */
crtc = &adev->mode_info.crtcs[pipe]->base;
if (!crtc) {
/* This can occur on driver load if some component fails to
* initialize completely and driver is unloaded */
DRM_ERROR("Uninitialized crtc %d\n", pipe);
return -EINVAL;
}
/* Helper routine in DRM core does all the work: */
return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
......
......@@ -48,8 +48,7 @@ struct amdgpu_mn {
/* protected by adev->mn_lock */
struct hlist_node node;
/* objects protected by lock */
struct mutex lock;
/* objects protected by mm->mmap_sem */
struct rb_root objects;
};
......@@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work)
struct amdgpu_bo *bo, *next_bo;
mutex_lock(&adev->mn_lock);
mutex_lock(&rmn->lock);
down_write(&rmn->mm->mmap_sem);
hash_del(&rmn->node);
rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
it.rb) {
interval_tree_remove(&node->it, &rmn->objects);
list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
bo->mn = NULL;
list_del_init(&bo->mn_list);
}
kfree(node);
}
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
mmu_notifier_unregister(&rmn->mn, rmn->mm);
mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
kfree(rmn);
}
......@@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
/* notification is exclusive, but interval is inclusive */
end -= 1;
mutex_lock(&rmn->lock);
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
......@@ -142,7 +137,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
list_for_each_entry(bo, &node->bos, mn_list) {
if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
end))
continue;
r = amdgpu_bo_reserve(bo, true);
......@@ -164,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
amdgpu_bo_unreserve(bo);
}
}
mutex_unlock(&rmn->lock);
}
static const struct mmu_notifier_ops amdgpu_mn_ops = {
......@@ -186,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
struct amdgpu_mn *rmn;
int r;
down_write(&mm->mmap_sem);
mutex_lock(&adev->mn_lock);
down_write(&mm->mmap_sem);
hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
if (rmn->mm == mm)
......@@ -202,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
rmn->mn.ops = &amdgpu_mn_ops;
mutex_init(&rmn->lock);
rmn->objects = RB_ROOT;
r = __mmu_notifier_register(&rmn->mn, mm);
......@@ -212,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
release_locks:
mutex_unlock(&adev->mn_lock);
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
return rmn;
free_rmn:
mutex_unlock(&adev->mn_lock);
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
kfree(rmn);
return ERR_PTR(r);
......@@ -249,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
INIT_LIST_HEAD(&bos);
mutex_lock(&rmn->lock);
down_write(&rmn->mm->mmap_sem);
while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
kfree(node);
......@@ -263,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
if (!node) {
node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
if (!node) {
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
return -ENOMEM;
}
}
......@@ -278,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
interval_tree_insert(&node->it, &rmn->objects);
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
return 0;
}
......@@ -297,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
struct list_head *head;
mutex_lock(&adev->mn_lock);
rmn = bo->mn;
if (rmn == NULL) {
mutex_unlock(&adev->mn_lock);
return;
}
mutex_lock(&rmn->lock);
down_write(&rmn->mm->mmap_sem);
/* save the next list entry for later */
head = bo->mn_list.next;
......@@ -317,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
kfree(node);
}
mutex_unlock(&rmn->lock);
up_write(&rmn->mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
}
......@@ -390,7 +390,6 @@ struct amdgpu_crtc {
struct drm_display_mode native_mode;
u32 pll_id;
/* page flipping */
struct workqueue_struct *pflip_queue;
struct amdgpu_flip_work *pflip_works;
enum amdgpu_flip_status pflip_status;
int deferred_flip_completion;
......
......@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
mutex_lock(&bo->adev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
kfree(bo->metadata);
......@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
bo->adev = adev;
INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va);
bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM |
bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA);
bo->allowed_domains = bo->prefered_domains;
if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = flags;
......@@ -367,7 +367,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
int r, i;
unsigned fpfn, lpfn;
if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
if (WARN_ON_ONCE(min_offset > max_offset))
......@@ -470,26 +470,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}
void amdgpu_bo_force_delete(struct amdgpu_device *adev)
{
struct amdgpu_bo *bo, *n;
if (list_empty(&adev->gem.objects)) {
return;
}
dev_err(adev->dev, "Userspace still has active objects !\n");
list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
dev_err(adev->dev, "%p %p %lu %lu force free\n",
&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
*((unsigned long *)&bo->gem_base.refcount));
mutex_lock(&bo->adev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
/* this should unref the ttm bo */
drm_gem_object_unreference_unlocked(&bo->gem_base);
}
}
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* Add an MTRR for the VRAM */
......
......@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
u64 *gpu_addr);
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
void amdgpu_bo_force_delete(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
......
This diff is collapsed.
......@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
return ERR_PTR(ret);
mutex_lock(&adev->gem.mutex);
list_add_tail(&bo->list, &adev->gem.objects);
mutex_unlock(&adev->gem.mutex);
return &bo->gem_base;
}
......@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return ERR_PTR(-EPERM);
return drm_gem_prime_export(dev, gobj, flags);
......
......@@ -48,28 +48,6 @@
*/
static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
/**
* amdgpu_ring_free_size - update the free size
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring structure holding ring information
*
* Update the free dw slots in the ring buffer (all asics).
*/
void amdgpu_ring_free_size(struct amdgpu_ring *ring)
{
uint32_t rptr = amdgpu_ring_get_rptr(ring);
/* This works because ring_size is a power of 2 */
ring->ring_free_dw = rptr + (ring->ring_size / 4);
ring->ring_free_dw -= ring->wptr;
ring->ring_free_dw &= ring->ptr_mask;
if (!ring->ring_free_dw) {
/* this is an empty ring */
ring->ring_free_dw = ring->ring_size / 4;
}
}
/**
* amdgpu_ring_alloc - allocate space on the ring buffer
*
......@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
*/
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
{
int r;
/* make sure we aren't trying to allocate more space than there is on the ring */
if (ndw > (ring->ring_size / 4))
return -ENOMEM;
/* Align requested size with padding so unlock_commit can
* pad safely */
amdgpu_ring_free_size(ring);
ndw = (ndw + ring->align_mask) & ~ring->align_mask;
while (ndw > (ring->ring_free_dw - 1)) {
amdgpu_ring_free_size(ring);
if (ndw < ring->ring_free_dw) {
break;
}
r = amdgpu_fence_wait_next(ring);
if (r)
return r;
}
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
return 0;
}
/**
* amdgpu_ring_lock - lock the ring and allocate space on it
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring structure holding ring information
* @ndw: number of dwords to allocate in the ring buffer
*
* Lock the ring and allocate @ndw dwords in the ring buffer
* (all asics).
* Returns 0 on success, error on failure.
/* Make sure we aren't trying to allocate more space
* than the maximum for one submission
*/
int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
{
int r;
if (WARN_ON_ONCE(ndw > ring->max_dw))
return -ENOMEM;
mutex_lock(ring->ring_lock);
r = amdgpu_ring_alloc(ring, ndw);
if (r) {
mutex_unlock(ring->ring_lock);
return r;
}
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
return 0;
}
......@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
amdgpu_ring_write(ring, ring->nop);
}
/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
*
* @ring: amdgpu_ring structure holding ring information
* @ib: IB to add NOP packets to
*
* This is the generic pad_ib function for rings except SDMA
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
while (ib->length_dw & ring->align_mask)
ib->ptr[ib->length_dw++] = ring->nop;
}
/**
* amdgpu_ring_commit - tell the GPU to execute the new
* commands on the ring buffer
......@@ -167,20 +126,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
amdgpu_ring_set_wptr(ring);
}
/**
* amdgpu_ring_unlock_commit - tell the GPU to execute the new
* commands on the ring buffer and unlock it
*
* @ring: amdgpu_ring structure holding ring information
*
* Call amdgpu_ring_commit() then unlock the ring (all asics).
*/
void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
{
amdgpu_ring_commit(ring);
mutex_unlock(ring->ring_lock);
}
/**
* amdgpu_ring_undo - reset the wptr
*
......@@ -193,19 +138,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->wptr = ring->wptr_old;
}
/**
* amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
*
* @ring: amdgpu_ring structure holding ring information
*
* Call amdgpu_ring_undo() then unlock the ring (all asics).
*/
void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
{
amdgpu_ring_undo(ring);
mutex_unlock(ring->ring_lock);
}
/**
* amdgpu_ring_backup - Back up the content of a ring
*
......@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
{
unsigned size, ptr, i;
/* just in case lock the ring */
mutex_lock(ring->ring_lock);
*data = NULL;
if (ring->ring_obj == NULL) {
mutex_unlock(ring->ring_lock);
if (ring->ring_obj == NULL)
return 0;
}
/* it doesn't make sense to save anything if all fences are signaled */
if (!amdgpu_fence_count_emitted(ring)) {
mutex_unlock(ring->ring_lock);
if (!amdgpu_fence_count_emitted(ring))
return 0;
}
ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
size = ring->wptr + (ring->ring_size / 4);
size -= ptr;
size &= ring->ptr_mask;
if (size == 0) {
mutex_unlock(ring->ring_lock);
if (size == 0)
return 0;
}
/* and then save the content of the ring */
*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
if (!*data) {
mutex_unlock(ring->ring_lock);
if (!*data)
return 0;
}
for (i = 0; i < size; ++i) {
(*data)[i] = ring->ring[ptr++];
ptr &= ring->ptr_mask;
}
mutex_unlock(ring->ring_lock);
return size;
}
......@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
return 0;
/* restore the saved ring content */
r = amdgpu_ring_lock(ring, size);
r = amdgpu_ring_alloc(ring, size);
if (r)
return r;
......@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, data[i]);
}
amdgpu_ring_unlock_commit(ring);
amdgpu_ring_commit(ring);
kfree(data);
return 0;
}
......@@ -352,7 +273,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
ring->ring_lock = &adev->ring_lock;
/* Align ring size */
rb_bufsz = order_base_2(ring_size / 8);
ring_size = (1 << (rb_bufsz + 1)) * 4;
......@@ -389,7 +309,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
}
ring->ptr_mask = (ring->ring_size / 4) - 1;
ring->ring_free_dw = ring->ring_size / 4;
ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
amdgpu_sched_hw_submission);
if (amdgpu_debugfs_ring_init(adev, ring)) {
DRM_ERROR("Failed to register debugfs file for rings !\n");
......@@ -410,15 +331,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
int r;
struct amdgpu_bo *ring_obj;
if (ring->ring_lock == NULL)
return;
mutex_lock(ring->ring_lock);
ring_obj = ring->ring_obj;
ring->ready = false;
ring->ring = NULL;
ring->ring_obj = NULL;
mutex_unlock(ring->ring_lock);
amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs);
......@@ -474,29 +390,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
uint32_t rptr, wptr, rptr_next;
unsigned count, i, j;
amdgpu_ring_free_size(ring);
count = (ring->ring_size / 4) - ring->ring_free_dw;
unsigned i;
wptr = amdgpu_ring_get_wptr(ring);
seq_printf(m, "wptr: 0x%08x [%5d]\n",
wptr, wptr);
seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
rptr = amdgpu_ring_get_rptr(ring);
seq_printf(m, "rptr: 0x%08x [%5d]\n",
rptr, rptr);
rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
ring->wptr, ring->wptr);
seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
ring->last_semaphore_signal_addr);
seq_printf(m, "last semaphore wait addr : 0x%016llx\n",
ring->last_semaphore_wait_addr);
seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
seq_printf(m, "%u dwords in ring\n", count);
if (!ring->ready)
return 0;
......@@ -505,11 +410,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
* packet that is the root issue
*/
i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
for (j = 0; j <= (count + 32); j++) {
while (i != rptr) {
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
if (i == rptr)
seq_puts(m, " *");
if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
}
while (i != wptr) {
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
if (rptr == i)
if (i == rptr)
seq_puts(m, " *");
if (rptr_next == i)
if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
......
......@@ -321,8 +321,11 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
int i, r;
signed long t;
BUG_ON(align > sa_manager->align);
BUG_ON(size > sa_manager->size);
if (WARN_ON_ONCE(align > sa_manager->align))
return -EINVAL;
if (WARN_ON_ONCE(size > sa_manager->size))
return -EINVAL;
*sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
if ((*sa_bo) == NULL) {
......
/*
* Copyright 2011 Christian König.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
/*
* Authors:
* Christian König <deathsimple@vodafone.de>
*/
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
int amdgpu_semaphore_create(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore)
{
int r;
*semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL);
if (*semaphore == NULL) {
return -ENOMEM;
}
r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
*semaphore = NULL;
return r;
}
(*semaphore)->waiters = 0;
(*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo);
*((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
return 0;
}
bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore)
{
trace_amdgpu_semaphore_signale(ring->idx, semaphore);
if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) {
--semaphore->waiters;
/* for debugging lockup only, used by sysfs debug files */
ring->last_semaphore_signal_addr = semaphore->gpu_addr;
return true;
}
return false;
}
bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore)
{
trace_amdgpu_semaphore_wait(ring->idx, semaphore);
if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) {
++semaphore->waiters;
/* for debugging lockup only, used by sysfs debug files */
ring->last_semaphore_wait_addr = semaphore->gpu_addr;
return true;
}
return false;
}
void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore,
struct fence *fence)
{
if (semaphore == NULL || *semaphore == NULL) {
return;
}
if ((*semaphore)->waiters > 0) {
dev_err(adev->dev, "semaphore %p has more waiters than signalers,"
" hardware lockup imminent!\n", *semaphore);
}
amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence);
kfree(*semaphore);
*semaphore = NULL;
}
......@@ -46,14 +46,6 @@ struct amdgpu_sync_entry {
*/
void amdgpu_sync_create(struct amdgpu_sync *sync)
{
unsigned i;
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
sync->semaphores[i] = NULL;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
sync->sync_to[i] = NULL;
hash_init(sync->fences);
sync->last_vm_update = NULL;
}
......@@ -107,7 +99,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f)
{
struct amdgpu_sync_entry *e;
struct amdgpu_fence *fence;
if (!f)
return 0;
......@@ -116,8 +107,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
amdgpu_sync_keep_later(&sync->last_vm_update, f);
fence = to_amdgpu_fence(f);
if (!fence || fence->ring->adev != adev) {
hash_for_each_possible(sync->fences, e, node, f->context) {
if (unlikely(e->fence->context != f->context))
continue;
......@@ -132,11 +121,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
hash_add(sync->fences, &e->node, f->context);
e->fence = fence_get(f);
return 0;
}
amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
return 0;
}
......@@ -153,13 +137,13 @@ static void *amdgpu_sync_get_owner(struct fence *f)
}
/**
* amdgpu_sync_resv - use the semaphores to sync to a reservation object
* amdgpu_sync_resv - sync to a reservation object
*
* @sync: sync object to add fences from reservation object to
* @resv: reservation object with embedded fence
* @shared: true if we should only sync to the exclusive fence
*
* Sync to the fence using the semaphore objects
* Sync to the fence
*/
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
......@@ -250,123 +234,17 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
kfree(e);
}
if (amdgpu_enable_semaphores)
return 0;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct fence *fence = sync->sync_to[i];
if (!fence)
continue;
r = fence_wait(fence, false);
if (r)
return r;
}
return 0;
}
/**
* amdgpu_sync_rings - sync ring to all registered fences
*
* @sync: sync object to use
* @ring: ring that needs sync
*
* Ensure that all registered fences are signaled before letting
* the ring continue. The caller must hold the ring lock.
*/
int amdgpu_sync_rings(struct amdgpu_sync *sync,
struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
unsigned count = 0;
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *other = adev->rings[i];
struct amdgpu_semaphore *semaphore;
struct amdgpu_fence *fence;
if (!sync->sync_to[i])
continue;
fence = to_amdgpu_fence(sync->sync_to[i]);
/* check if we really need to sync */
if (!amdgpu_enable_scheduler &&
!amdgpu_fence_need_sync(fence, ring))
continue;
/* prevent GPU deadlocks */
if (!other->ready) {
dev_err(adev->dev, "Syncing to a disabled ring!");
return -EINVAL;
}
if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
r = fence_wait(sync->sync_to[i], true);
if (r)
return r;
continue;
}
if (count >= AMDGPU_NUM_SYNCS) {
/* not enough room, wait manually */
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
}
r = amdgpu_semaphore_create(adev, &semaphore);
if (r)
return r;
sync->semaphores[count++] = semaphore;
/* allocate enough space for sync command */
r = amdgpu_ring_alloc(other, 16);
if (r)
return r;
/* emit the signal semaphore */
if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
/* signaling wasn't successful wait manually */
amdgpu_ring_undo(other);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
}
/* we assume caller has already allocated space on waiters ring */
if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
/* waiting wasn't successful wait manually */
amdgpu_ring_undo(other);
r = fence_wait(&fence->base, false);
if (r)
return r;
continue;
}
amdgpu_ring_commit(other);
amdgpu_fence_note_sync(fence, ring);
}
return 0;
}
/**
* amdgpu_sync_free - free the sync object
*
* @adev: amdgpu_device pointer
* @sync: sync object to use
* @fence: fence to use for the free
*
* Free the sync object by freeing all semaphores in it.
* Free the sync object.
*/
void amdgpu_sync_free(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct fence *fence)
void amdgpu_sync_free(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
......@@ -378,11 +256,5 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
kfree(e);
}
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
fence_put(sync->sync_to[i]);
fence_put(sync->last_vm_update);
}
......@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
amdgpu_do_test_moves(adev);
}
static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct fence **fence)
{
uint32_t handle = ring->idx ^ 0xdeafbeef;
int r;
if (ring == &adev->uvd.ring) {
r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
if (r) {
DRM_ERROR("Failed to get dummy create msg\n");
return r;
}
r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
if (r) {
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else if (ring == &adev->vce.ring[0] ||
ring == &adev->vce.ring[1]) {
r = amdgpu_vce_get_create_msg(ring, handle, NULL);
if (r) {
DRM_ERROR("Failed to get dummy create msg\n");
return r;
}
r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
if (r) {
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else {
struct amdgpu_fence *a_fence = NULL;
r = amdgpu_ring_lock(ring, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
return r;
}
amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
amdgpu_ring_unlock_commit(ring);
*fence = &a_fence->base;
}
return 0;
}
void amdgpu_test_ring_sync(struct amdgpu_device *adev,
struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB)
{
struct fence *fence1 = NULL, *fence2 = NULL;
struct amdgpu_semaphore *semaphore = NULL;
int r;
r = amdgpu_semaphore_create(adev, &semaphore);
if (r) {
DRM_ERROR("Failed to create semaphore\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringA, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringA, semaphore);
amdgpu_ring_unlock_commit(ringA);
r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
if (r)
goto out_cleanup;
r = amdgpu_ring_lock(ringA, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringA, semaphore);
amdgpu_ring_unlock_commit(ringA);
r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
if (r)
goto out_cleanup;
mdelay(1000);
if (fence_is_signaled(fence1)) {
DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringB, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringB);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = fence_wait(fence1, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence 1\n");
goto out_cleanup;
}
mdelay(1000);
if (fence_is_signaled(fence2)) {
DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringB, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringB);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = fence_wait(fence2, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence 1\n");
goto out_cleanup;
}
out_cleanup:
amdgpu_semaphore_free(adev, &semaphore, NULL);
if (fence1)
fence_put(fence1);
if (fence2)
fence_put(fence2);
if (r)
printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
}
static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
......@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
struct amdgpu_ring *ringB,
struct amdgpu_ring *ringC)
{
struct fence *fenceA = NULL, *fenceB = NULL;
struct amdgpu_semaphore *semaphore = NULL;
bool sigA, sigB;
int i, r;
r = amdgpu_semaphore_create(adev, &semaphore);
if (r) {
DRM_ERROR("Failed to create semaphore\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringA, 64);
if (r) {
DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringA, semaphore);
amdgpu_ring_unlock_commit(ringA);
r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
if (r)
goto out_cleanup;
r = amdgpu_ring_lock(ringB, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
goto out_cleanup;
}
amdgpu_semaphore_emit_wait(ringB, semaphore);
amdgpu_ring_unlock_commit(ringB);
r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
if (r)
goto out_cleanup;
mdelay(1000);
if (fence_is_signaled(fenceA)) {
DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
goto out_cleanup;
}
if (fence_is_signaled(fenceB)) {
DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
goto out_cleanup;
}
r = amdgpu_ring_lock(ringC, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringC);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringC, semaphore);
amdgpu_ring_unlock_commit(ringC);
for (i = 0; i < 30; ++i) {
mdelay(100);
sigA = fence_is_signaled(fenceA);
sigB = fence_is_signaled(fenceB);
if (sigA || sigB)
break;
}
if (!sigA && !sigB) {
DRM_ERROR("Neither fence A nor B has been signaled\n");
goto out_cleanup;
} else if (sigA && sigB) {
DRM_ERROR("Both fence A and B has been signaled\n");
goto out_cleanup;
}
DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
r = amdgpu_ring_lock(ringC, 64);
if (r) {
DRM_ERROR("Failed to lock ring B %p\n", ringC);
goto out_cleanup;
}
amdgpu_semaphore_emit_signal(ringC, semaphore);
amdgpu_ring_unlock_commit(ringC);
mdelay(1000);
r = fence_wait(fenceA, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence A\n");
goto out_cleanup;
}
r = fence_wait(fenceB, false);
if (r) {
DRM_ERROR("Failed to wait for sync fence B\n");
goto out_cleanup;
}
out_cleanup:
amdgpu_semaphore_free(adev, &semaphore, NULL);
if (fenceA)
fence_put(fenceA);
if (fenceB)
fence_put(fenceB);
if (r)
printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
}
static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
......
......@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
__entry->ring = p->ibs[i].ring->idx;
__entry->dw = p->ibs[i].length_dw;
__entry->ring = p->job->ring->idx;
__entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted(
p->ibs[i].ring);
p->job->ring);
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
......@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
__entry->ring_name = job->ibs[0].ring->name;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
......@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
__entry->ring_name = job->ibs[0].ring->name;
__entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
......@@ -100,18 +100,21 @@ TRACE_EVENT(amdgpu_sched_run_job,
TRACE_EVENT(amdgpu_vm_grab_id,
TP_PROTO(unsigned vmid, int ring),
TP_ARGS(vmid, ring),
TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring),
TP_ARGS(vm, vmid, ring),
TP_STRUCT__entry(
__field(struct amdgpu_vm *, vm)
__field(u32, vmid)
__field(u32, ring)
),
TP_fast_assign(
__entry->vm = vm;
__entry->vmid = vmid;
__entry->ring = ring;
),
TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid,
__entry->ring)
);
TRACE_EVENT(amdgpu_vm_bo_map,
......@@ -247,42 +250,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
);
DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
TP_ARGS(ring, sem),
TP_STRUCT__entry(
__field(int, ring)
__field(signed, waiters)
__field(uint64_t, gpu_addr)
),
TP_fast_assign(
__entry->ring = ring;
__entry->waiters = sem->waiters;
__entry->gpu_addr = sem->gpu_addr;
),
TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
__entry->waiters, __entry->gpu_addr)
);
DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
TP_ARGS(ring, sem)
);
DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
TP_ARGS(ring, sem)
);
#endif
/* This part must be outside protection */
......
......@@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
struct amdgpu_ring *ring;
struct amd_sched_rq *rq;
int r;
adev->mman.mem_global_referenced = false;
......@@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
return r;
}
ring = adev->mman.buffer_funcs_ring;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
rq, amdgpu_sched_jobs);
if (r != 0) {
DRM_ERROR("Failed setting up TTM BO move run queue.\n");
drm_global_item_unref(&adev->mman.mem_global_ref);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
return r;
}
adev->mman.mem_global_referenced = true;
return 0;
}
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
{
if (adev->mman.mem_global_referenced) {
amd_sched_entity_fini(adev->mman.entity.sched,
&adev->mman.entity);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
drm_global_item_unref(&adev->mman.mem_global_ref);
adev->mman.mem_global_referenced = false;
......@@ -499,9 +515,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
if (current->mm != gtt->usermm)
return -EPERM;
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only pin down anonymous memory
to prevent problems with writeback */
......@@ -773,14 +786,33 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return 0;
}
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm)
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL)
return NULL;
return gtt->usermm;
}
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long size;
if (gtt == NULL)
return false;
return !!gtt->userptr;
if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
return false;
size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
return true;
}
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
......@@ -996,9 +1028,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
uint32_t max_bytes;
unsigned num_loops, num_dw;
struct amdgpu_ib *ib;
unsigned i;
int r;
......@@ -1010,20 +1043,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
while (num_dw & 0x7)
num_dw++;
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
if (r) {
kfree(ib);
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
if (r)
return r;
}
ib->length_dw = 0;
if (resv) {
r = amdgpu_sync_resv(adev, &ib->sync, resv,
r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
......@@ -1034,31 +1059,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
cur_size_in_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
dst_offset, cur_size_in_bytes);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
}
amdgpu_vm_pad_ib(adev, ib);
WARN_ON(ib->length_dw > num_dw);
r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
&amdgpu_vm_free_job,
AMDGPU_FENCE_OWNER_UNDEFINED,
fence);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
r = amdgpu_job_submit(job, ring, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r)
goto error_free;
if (!amdgpu_enable_scheduler) {
amdgpu_ib_free(adev, ib);
kfree(ib);
}
return 0;
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
amdgpu_job_free(job);
return r;
}
......
This diff is collapsed.
......@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
bool direct, struct fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
......
This diff is collapsed.
......@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
bool direct, struct fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore,
bool emit_wait);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -2670,7 +2670,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
......@@ -2890,7 +2889,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = 128;
......@@ -3366,7 +3364,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
schedule_work(&works->unpin_work);
return 0;
}
......
......@@ -2661,7 +2661,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
......@@ -2881,7 +2880,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = 128;
......@@ -3361,7 +3359,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
schedule_work(&works->unpin_work);
return 0;
}
......
......@@ -2582,7 +2582,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
......@@ -2809,7 +2808,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
......@@ -3375,7 +3373,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
schedule_work(&works->unpin_work);
return 0;
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment