Commit eaf01ee5 authored by Sarah Walker's avatar Sarah Walker Committed by Maxime Ripard

drm/imagination: Implement job submission and scheduling

Implement job submission ioctl. Job scheduling is implemented using
drm_sched.

Jobs are submitted in a stream format. This is intended to allow the UAPI
data format to be independent of the actual FWIF structures in use, which
vary depending on the GPU in use.

The stream formats are documented at:
https://gitlab.freedesktop.org/mesa/mesa/-/blob/f8d2b42ae65c2f16f36a43e0ae39d288431e4263/src/imagination/csbgen/rogue_kmd_stream.xml

Changes since v8:
- Updated for upstreamed DRM scheduler changes
- Removed workaround code for the pending_list previously being updated
  after run_job() returned
- Fixed null deref in pvr_queue_cleanup_fw_context() for bad stream ptr
  given to create_context ioctl
- Corrected license identifiers

Changes since v7:
- Updated for v8 "DRM scheduler changes for XE" patchset

Changes since v6:
- Fix fence handling in pvr_sync_signal_array_add()
- Add handling for SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE flag
- Fix missing dma_resv locking in job submit path

Changes since v5:
- Fix leak in job creation error path

Changes since v4:
- Use a regular workqueue for job scheduling

Changes since v3:
- Support partial render jobs
- Add job timeout handler
- Split sync handling out of job code
- Use drm_dev_{enter,exit}

Changes since v2:
- Use drm_sched for job scheduling
Co-developed-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
Co-developed-by: default avatarDonald Robson <donald.robson@imgtec.com>
Signed-off-by: default avatarDonald Robson <donald.robson@imgtec.com>
Signed-off-by: default avatarSarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/c98dab7a5f5fb891fbed7e4990d19b5d13964365.1700668843.git.donald.robson@imgtec.comSigned-off-by: default avatarMaxime Ripard <mripard@kernel.org>
parent d2d79d29
...@@ -6,6 +6,7 @@ config DRM_POWERVR ...@@ -6,6 +6,7 @@ config DRM_POWERVR
depends on ARM64 depends on ARM64
depends on DRM depends on DRM
depends on PM depends on PM
select DRM_EXEC
select DRM_GEM_SHMEM_HELPER select DRM_GEM_SHMEM_HELPER
select DRM_SCHED select DRM_SCHED
select DRM_GPUVM select DRM_GPUVM
......
...@@ -18,10 +18,13 @@ powervr-y := \ ...@@ -18,10 +18,13 @@ powervr-y := \
pvr_fw_trace.o \ pvr_fw_trace.o \
pvr_gem.o \ pvr_gem.o \
pvr_hwrt.o \ pvr_hwrt.o \
pvr_job.o \
pvr_mmu.o \ pvr_mmu.o \
pvr_power.o \ pvr_power.o \
pvr_queue.o \
pvr_stream.o \ pvr_stream.o \
pvr_stream_defs.o \ pvr_stream_defs.o \
pvr_sync.o \
pvr_vm.o \ pvr_vm.o \
pvr_vm_mips.o pvr_vm_mips.o
......
...@@ -6,10 +6,12 @@ ...@@ -6,10 +6,12 @@
#include "pvr_device.h" #include "pvr_device.h"
#include "pvr_drv.h" #include "pvr_drv.h"
#include "pvr_gem.h" #include "pvr_gem.h"
#include "pvr_job.h"
#include "pvr_power.h" #include "pvr_power.h"
#include "pvr_rogue_fwif.h" #include "pvr_rogue_fwif.h"
#include "pvr_rogue_fwif_common.h" #include "pvr_rogue_fwif_common.h"
#include "pvr_rogue_fwif_resetframework.h" #include "pvr_rogue_fwif_resetframework.h"
#include "pvr_stream.h"
#include "pvr_stream_defs.h" #include "pvr_stream_defs.h"
#include "pvr_vm.h" #include "pvr_vm.h"
...@@ -164,6 +166,116 @@ ctx_fw_data_init(void *cpu_ptr, void *priv) ...@@ -164,6 +166,116 @@ ctx_fw_data_init(void *cpu_ptr, void *priv)
memcpy(cpu_ptr, ctx->data, ctx->data_size); memcpy(cpu_ptr, ctx->data, ctx->data_size);
} }
/**
* pvr_context_destroy_queues() - Destroy all queues attached to a context.
* @ctx: Context to destroy queues on.
*
* Should be called when the last reference to a context object is dropped.
* It releases all resources attached to the queues bound to this context.
*/
static void pvr_context_destroy_queues(struct pvr_context *ctx)
{
switch (ctx->type) {
case DRM_PVR_CTX_TYPE_RENDER:
pvr_queue_destroy(ctx->queues.fragment);
pvr_queue_destroy(ctx->queues.geometry);
break;
case DRM_PVR_CTX_TYPE_COMPUTE:
pvr_queue_destroy(ctx->queues.compute);
break;
case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
pvr_queue_destroy(ctx->queues.transfer);
break;
}
}
/**
* pvr_context_create_queues() - Create all queues attached to a context.
* @ctx: Context to create queues on.
* @args: Context creation arguments passed by userspace.
* @fw_ctx_map: CPU mapping of the FW context object.
*
* Return:
* * 0 on success, or
* * A negative error code otherwise.
*/
static int pvr_context_create_queues(struct pvr_context *ctx,
struct drm_pvr_ioctl_create_context_args *args,
void *fw_ctx_map)
{
int err;
switch (ctx->type) {
case DRM_PVR_CTX_TYPE_RENDER:
ctx->queues.geometry = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_GEOMETRY,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.geometry)) {
err = PTR_ERR(ctx->queues.geometry);
ctx->queues.geometry = NULL;
goto err_destroy_queues;
}
ctx->queues.fragment = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_FRAGMENT,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.fragment)) {
err = PTR_ERR(ctx->queues.fragment);
ctx->queues.fragment = NULL;
goto err_destroy_queues;
}
return 0;
case DRM_PVR_CTX_TYPE_COMPUTE:
ctx->queues.compute = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_COMPUTE,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.compute)) {
err = PTR_ERR(ctx->queues.compute);
ctx->queues.compute = NULL;
goto err_destroy_queues;
}
return 0;
case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
ctx->queues.transfer = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_TRANSFER_FRAG,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.transfer)) {
err = PTR_ERR(ctx->queues.transfer);
ctx->queues.transfer = NULL;
goto err_destroy_queues;
}
return 0;
}
return -EINVAL;
err_destroy_queues:
pvr_context_destroy_queues(ctx);
return err;
}
/**
* pvr_context_kill_queues() - Kill queues attached to context.
* @ctx: Context to kill queues on.
*
* Killing the queues implies making them unusable for future jobs, while still
* letting the currently submitted jobs a chance to finish. Queue resources will
* stay around until pvr_context_destroy_queues() is called.
*/
static void pvr_context_kill_queues(struct pvr_context *ctx)
{
switch (ctx->type) {
case DRM_PVR_CTX_TYPE_RENDER:
pvr_queue_kill(ctx->queues.fragment);
pvr_queue_kill(ctx->queues.geometry);
break;
case DRM_PVR_CTX_TYPE_COMPUTE:
pvr_queue_kill(ctx->queues.compute);
break;
case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
pvr_queue_kill(ctx->queues.transfer);
break;
}
}
/** /**
* pvr_context_create() - Create a context. * pvr_context_create() - Create a context.
* @pvr_file: File to attach the created context to. * @pvr_file: File to attach the created context to.
...@@ -214,10 +326,14 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co ...@@ -214,10 +326,14 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co
goto err_put_vm; goto err_put_vm;
} }
err = init_fw_objs(ctx, args, ctx->data); err = pvr_context_create_queues(ctx, args, ctx->data);
if (err) if (err)
goto err_free_ctx_data; goto err_free_ctx_data;
err = init_fw_objs(ctx, args, ctx->data);
if (err)
goto err_destroy_queues;
err = pvr_fw_object_create(pvr_dev, ctx_size, PVR_BO_FW_FLAGS_DEVICE_UNCACHED, err = pvr_fw_object_create(pvr_dev, ctx_size, PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
ctx_fw_data_init, ctx, &ctx->fw_obj); ctx_fw_data_init, ctx, &ctx->fw_obj);
if (err) if (err)
...@@ -243,6 +359,9 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co ...@@ -243,6 +359,9 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co
err_destroy_fw_obj: err_destroy_fw_obj:
pvr_fw_object_destroy(ctx->fw_obj); pvr_fw_object_destroy(ctx->fw_obj);
err_destroy_queues:
pvr_context_destroy_queues(ctx);
err_free_ctx_data: err_free_ctx_data:
kfree(ctx->data); kfree(ctx->data);
...@@ -262,6 +381,7 @@ pvr_context_release(struct kref *ref_count) ...@@ -262,6 +381,7 @@ pvr_context_release(struct kref *ref_count)
struct pvr_device *pvr_dev = ctx->pvr_dev; struct pvr_device *pvr_dev = ctx->pvr_dev;
xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id); xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id);
pvr_context_destroy_queues(ctx);
pvr_fw_object_destroy(ctx->fw_obj); pvr_fw_object_destroy(ctx->fw_obj);
kfree(ctx->data); kfree(ctx->data);
pvr_vm_context_put(ctx->vm_ctx); pvr_vm_context_put(ctx->vm_ctx);
...@@ -299,6 +419,9 @@ pvr_context_destroy(struct pvr_file *pvr_file, u32 handle) ...@@ -299,6 +419,9 @@ pvr_context_destroy(struct pvr_file *pvr_file, u32 handle)
if (!ctx) if (!ctx)
return -EINVAL; return -EINVAL;
/* Make sure nothing can be queued to the queues after that point. */
pvr_context_kill_queues(ctx);
/* Release the reference held by the handle set. */ /* Release the reference held by the handle set. */
pvr_context_put(ctx); pvr_context_put(ctx);
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "pvr_cccb.h" #include "pvr_cccb.h"
#include "pvr_device.h" #include "pvr_device.h"
#include "pvr_queue.h"
/* Forward declaration from pvr_gem.h. */ /* Forward declaration from pvr_gem.h. */
struct pvr_fw_object; struct pvr_fw_object;
...@@ -58,8 +59,51 @@ struct pvr_context { ...@@ -58,8 +59,51 @@ struct pvr_context {
/** @ctx_id: FW context ID. */ /** @ctx_id: FW context ID. */
u32 ctx_id; u32 ctx_id;
/**
* @faulty: Set to 1 when the context queues had unfinished job when
* a GPU reset happened.
*
* In that case, the context is in an inconsistent state and can't be
* used anymore.
*/
atomic_t faulty;
/** @queues: Union containing all kind of queues. */
union {
struct {
/** @geometry: Geometry queue. */
struct pvr_queue *geometry;
/** @fragment: Fragment queue. */
struct pvr_queue *fragment;
};
/** @compute: Compute queue. */
struct pvr_queue *compute;
/** @compute: Transfer queue. */
struct pvr_queue *transfer;
} queues;
}; };
static __always_inline struct pvr_queue *
pvr_context_get_queue_for_job(struct pvr_context *ctx, enum drm_pvr_job_type type)
{
switch (type) {
case DRM_PVR_JOB_TYPE_GEOMETRY:
return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.geometry : NULL;
case DRM_PVR_JOB_TYPE_FRAGMENT:
return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.fragment : NULL;
case DRM_PVR_JOB_TYPE_COMPUTE:
return ctx->type == DRM_PVR_CTX_TYPE_COMPUTE ? ctx->queues.compute : NULL;
case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
return ctx->type == DRM_PVR_CTX_TYPE_TRANSFER_FRAG ? ctx->queues.transfer : NULL;
}
return NULL;
}
/** /**
* pvr_context_get() - Take additional reference on context. * pvr_context_get() - Take additional reference on context.
* @ctx: Context pointer. * @ctx: Context pointer.
......
...@@ -6,7 +6,9 @@ ...@@ -6,7 +6,9 @@
#include "pvr_fw.h" #include "pvr_fw.h"
#include "pvr_power.h" #include "pvr_power.h"
#include "pvr_queue.h"
#include "pvr_rogue_cr_defs.h" #include "pvr_rogue_cr_defs.h"
#include "pvr_stream.h"
#include "pvr_vm.h" #include "pvr_vm.h"
#include <drm/drm_print.h> #include <drm/drm_print.h>
...@@ -117,6 +119,32 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev) ...@@ -117,6 +119,32 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
return 0; return 0;
} }
/**
* pvr_device_process_active_queues() - Process all queue related events.
* @pvr_dev: PowerVR device to check
*
* This is called any time we receive a FW event. It iterates over all
* active queues and calls pvr_queue_process() on them.
*/
void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
{
struct pvr_queue *queue, *tmp_queue;
LIST_HEAD(active_queues);
mutex_lock(&pvr_dev->queues.lock);
/* Move all active queues to a temporary list. Queues that remain
* active after we're done processing them are re-inserted to
* the queues.active list by pvr_queue_process().
*/
list_splice_init(&pvr_dev->queues.active, &active_queues);
list_for_each_entry_safe(queue, tmp_queue, &active_queues, node)
pvr_queue_process(queue);
mutex_unlock(&pvr_dev->queues.lock);
}
static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data) static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
{ {
struct pvr_device *pvr_dev = data; struct pvr_device *pvr_dev = data;
...@@ -132,6 +160,7 @@ static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data) ...@@ -132,6 +160,7 @@ static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
if (pvr_dev->fw_dev.booted) { if (pvr_dev->fw_dev.booted) {
pvr_fwccb_process(pvr_dev); pvr_fwccb_process(pvr_dev);
pvr_kccb_wake_up_waiters(pvr_dev); pvr_kccb_wake_up_waiters(pvr_dev);
pvr_device_process_active_queues(pvr_dev);
} }
pm_runtime_mark_last_busy(from_pvr_device(pvr_dev)->dev); pm_runtime_mark_last_busy(from_pvr_device(pvr_dev)->dev);
...@@ -398,6 +427,8 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev) ...@@ -398,6 +427,8 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
else else
return -EINVAL; return -EINVAL;
pvr_stream_create_musthave_masks(pvr_dev);
err = pvr_set_dma_info(pvr_dev); err = pvr_set_dma_info(pvr_dev);
if (err) if (err)
return err; return err;
......
...@@ -173,6 +173,26 @@ struct pvr_device { ...@@ -173,6 +173,26 @@ struct pvr_device {
*/ */
struct xarray free_list_ids; struct xarray free_list_ids;
/**
* @job_ids: Array of jobs belonging to this device. Array members
* are of type "struct pvr_job *".
*/
struct xarray job_ids;
/**
* @queues: Queue-related fields.
*/
struct {
/** @active: Active queue list. */
struct list_head active;
/** @idle: Idle queue list. */
struct list_head idle;
/** @lock: Lock protecting access to the active/idle lists. */
struct mutex lock;
} queues;
struct { struct {
/** @work: Work item for watchdog callback. */ /** @work: Work item for watchdog callback. */
struct delayed_work work; struct delayed_work work;
...@@ -442,6 +462,7 @@ packed_bvnc_to_pvr_gpu_id(u64 bvnc, struct pvr_gpu_id *gpu_id) ...@@ -442,6 +462,7 @@ packed_bvnc_to_pvr_gpu_id(u64 bvnc, struct pvr_gpu_id *gpu_id)
int pvr_device_init(struct pvr_device *pvr_dev); int pvr_device_init(struct pvr_device *pvr_dev);
void pvr_device_fini(struct pvr_device *pvr_dev); void pvr_device_fini(struct pvr_device *pvr_dev);
void pvr_device_reset(struct pvr_device *pvr_dev);
bool bool
pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk); pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk);
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "pvr_free_list.h" #include "pvr_free_list.h"
#include "pvr_gem.h" #include "pvr_gem.h"
#include "pvr_hwrt.h" #include "pvr_hwrt.h"
#include "pvr_job.h"
#include "pvr_mmu.h" #include "pvr_mmu.h"
#include "pvr_power.h" #include "pvr_power.h"
#include "pvr_rogue_defs.h" #include "pvr_rogue_defs.h"
...@@ -32,6 +33,8 @@ ...@@ -32,6 +33,8 @@
#include <linux/of_device.h> #include <linux/of_device.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/xarray.h>
/** /**
* DOC: PowerVR (Series 6 and later) and IMG Graphics Driver * DOC: PowerVR (Series 6 and later) and IMG Graphics Driver
...@@ -397,7 +400,8 @@ pvr_dev_query_runtime_info_get(struct pvr_device *pvr_dev, ...@@ -397,7 +400,8 @@ pvr_dev_query_runtime_info_get(struct pvr_device *pvr_dev,
return 0; return 0;
} }
runtime_info.free_list_min_pages = 0; /* FIXME */ runtime_info.free_list_min_pages =
pvr_get_free_list_min_pages(pvr_dev);
runtime_info.free_list_max_pages = runtime_info.free_list_max_pages =
ROGUE_PM_MAX_FREELIST_SIZE / ROGUE_PM_PAGE_SIZE; ROGUE_PM_MAX_FREELIST_SIZE / ROGUE_PM_PAGE_SIZE;
runtime_info.common_store_alloc_region_size = runtime_info.common_store_alloc_region_size =
...@@ -1137,7 +1141,20 @@ static int ...@@ -1137,7 +1141,20 @@ static int
pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args, pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args,
struct drm_file *file) struct drm_file *file)
{ {
return -ENOTTY; struct drm_pvr_ioctl_submit_jobs_args *args = raw_args;
struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
struct pvr_file *pvr_file = to_pvr_file(file);
int idx;
int err;
if (!drm_dev_enter(drm_dev, &idx))
return -EIO;
err = pvr_submit_jobs(pvr_dev, pvr_file, args);
drm_dev_exit(idx);
return err;
} }
int int
...@@ -1353,7 +1370,8 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev, ...@@ -1353,7 +1370,8 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops); DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
static struct drm_driver pvr_drm_driver = { static struct drm_driver pvr_drm_driver = {
.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER, .driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER |
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
.open = pvr_drm_driver_open, .open = pvr_drm_driver_open,
.postclose = pvr_drm_driver_postclose, .postclose = pvr_drm_driver_postclose,
.ioctls = pvr_drm_driver_ioctls, .ioctls = pvr_drm_driver_ioctls,
...@@ -1386,8 +1404,15 @@ pvr_probe(struct platform_device *plat_dev) ...@@ -1386,8 +1404,15 @@ pvr_probe(struct platform_device *plat_dev)
drm_dev = &pvr_dev->base; drm_dev = &pvr_dev->base;
platform_set_drvdata(plat_dev, drm_dev); platform_set_drvdata(plat_dev, drm_dev);
init_rwsem(&pvr_dev->reset_sem);
pvr_context_device_init(pvr_dev); pvr_context_device_init(pvr_dev);
err = pvr_queue_device_init(pvr_dev);
if (err)
goto err_context_fini;
devm_pm_runtime_enable(&plat_dev->dev); devm_pm_runtime_enable(&plat_dev->dev);
pm_runtime_mark_last_busy(&plat_dev->dev); pm_runtime_mark_last_busy(&plat_dev->dev);
...@@ -1404,6 +1429,7 @@ pvr_probe(struct platform_device *plat_dev) ...@@ -1404,6 +1429,7 @@ pvr_probe(struct platform_device *plat_dev)
goto err_device_fini; goto err_device_fini;
xa_init_flags(&pvr_dev->free_list_ids, XA_FLAGS_ALLOC1); xa_init_flags(&pvr_dev->free_list_ids, XA_FLAGS_ALLOC1);
xa_init_flags(&pvr_dev->job_ids, XA_FLAGS_ALLOC1);
return 0; return 0;
...@@ -1413,6 +1439,11 @@ pvr_probe(struct platform_device *plat_dev) ...@@ -1413,6 +1439,11 @@ pvr_probe(struct platform_device *plat_dev)
err_watchdog_fini: err_watchdog_fini:
pvr_watchdog_fini(pvr_dev); pvr_watchdog_fini(pvr_dev);
pvr_queue_device_fini(pvr_dev);
err_context_fini:
pvr_context_device_fini(pvr_dev);
return err; return err;
} }
...@@ -1422,14 +1453,17 @@ pvr_remove(struct platform_device *plat_dev) ...@@ -1422,14 +1453,17 @@ pvr_remove(struct platform_device *plat_dev)
struct drm_device *drm_dev = platform_get_drvdata(plat_dev); struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
struct pvr_device *pvr_dev = to_pvr_device(drm_dev); struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
WARN_ON(!xa_empty(&pvr_dev->job_ids));
WARN_ON(!xa_empty(&pvr_dev->free_list_ids)); WARN_ON(!xa_empty(&pvr_dev->free_list_ids));
xa_destroy(&pvr_dev->job_ids);
xa_destroy(&pvr_dev->free_list_ids); xa_destroy(&pvr_dev->free_list_ids);
pm_runtime_suspend(drm_dev->dev); pm_runtime_suspend(drm_dev->dev);
pvr_device_fini(pvr_dev); pvr_device_fini(pvr_dev);
drm_dev_unplug(drm_dev); drm_dev_unplug(drm_dev);
pvr_watchdog_fini(pvr_dev); pvr_watchdog_fini(pvr_dev);
pvr_queue_device_fini(pvr_dev);
pvr_context_device_fini(pvr_dev); pvr_context_device_fini(pvr_dev);
return 0; return 0;
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#ifndef PVR_JOB_H
#define PVR_JOB_H
#include <uapi/drm/pvr_drm.h>
#include <linux/kref.h>
#include <linux/types.h>
#include <drm/drm_gem.h>
#include <drm/gpu_scheduler.h>
#include "pvr_power.h"
/* Forward declaration from "pvr_context.h". */
struct pvr_context;
/* Forward declarations from "pvr_device.h". */
struct pvr_device;
struct pvr_file;
/* Forward declarations from "pvr_hwrt.h". */
struct pvr_hwrt_data;
/* Forward declaration from "pvr_queue.h". */
struct pvr_queue;
struct pvr_job {
/** @base: drm_sched_job object. */
struct drm_sched_job base;
/** @ref_count: Refcount for job. */
struct kref ref_count;
/** @type: Type of job. */
enum drm_pvr_job_type type;
/** @id: Job ID number. */
u32 id;
/**
* @paired_job: Job paired to this job.
*
* This field is only meaningful for geometry and fragment jobs.
*
* Paired jobs are executed on the same context, and need to be submitted
* atomically to the FW, to make sure the partial render logic has a
* fragment job to execute when the Parameter Manager runs out of memory.
*
* The geometry job should point to the fragment job it's paired with,
* and the fragment job should point to the geometry job it's paired with.
*/
struct pvr_job *paired_job;
/** @cccb_fence: Fence used to wait for CCCB space. */
struct dma_fence *cccb_fence;
/** @kccb_fence: Fence used to wait for KCCB space. */
struct dma_fence *kccb_fence;
/** @done_fence: Fence to signal when the job is done. */
struct dma_fence *done_fence;
/** @pvr_dev: Device pointer. */
struct pvr_device *pvr_dev;
/** @ctx: Pointer to owning context. */
struct pvr_context *ctx;
/** @cmd: Command data. Format depends on @type. */
void *cmd;
/** @cmd_len: Length of command data, in bytes. */
u32 cmd_len;
/**
* @fw_ccb_cmd_type: Firmware CCB command type. Must be one of %ROGUE_FWIF_CCB_CMD_TYPE_*.
*/
u32 fw_ccb_cmd_type;
/** @hwrt: HWRT object. Will be NULL for compute and transfer jobs. */
struct pvr_hwrt_data *hwrt;
/**
* @has_pm_ref: True if the job has a power ref, thus forcing the GPU to stay on until
* the job is done.
*/
bool has_pm_ref;
};
/**
* pvr_job_get() - Take additional reference on job.
* @job: Job pointer.
*
* Call pvr_job_put() to release.
*
* Returns:
* * The requested job on success, or
* * %NULL if no job pointer passed.
*/
static __always_inline struct pvr_job *
pvr_job_get(struct pvr_job *job)
{
if (job)
kref_get(&job->ref_count);
return job;
}
void pvr_job_put(struct pvr_job *job);
/**
* pvr_job_release_pm_ref() - Release the PM ref if the job acquired it.
* @job: The job to release the PM ref on.
*/
static __always_inline void
pvr_job_release_pm_ref(struct pvr_job *job)
{
if (job->has_pm_ref) {
pvr_power_put(job->pvr_dev);
job->has_pm_ref = false;
}
}
/**
* pvr_job_get_pm_ref() - Get a PM ref and attach it to the job.
* @job: The job to attach the PM ref to.
*
* Return:
* * 0 on success, or
* * Any error returned by pvr_power_get() otherwise.
*/
static __always_inline int
pvr_job_get_pm_ref(struct pvr_job *job)
{
int err;
if (job->has_pm_ref)
return 0;
err = pvr_power_get(job->pvr_dev);
if (!err)
job->has_pm_ref = true;
return err;
}
int pvr_job_wait_first_non_signaled_native_dep(struct pvr_job *job);
bool pvr_job_non_native_deps_done(struct pvr_job *job);
int pvr_job_fits_in_cccb(struct pvr_job *job, unsigned long native_dep_count);
void pvr_job_submit(struct pvr_job *job);
int pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
struct drm_pvr_ioctl_submit_jobs_args *args);
#endif /* PVR_JOB_H */
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "pvr_fw.h" #include "pvr_fw.h"
#include "pvr_fw_startstop.h" #include "pvr_fw_startstop.h"
#include "pvr_power.h" #include "pvr_power.h"
#include "pvr_queue.h"
#include "pvr_rogue_fwif.h" #include "pvr_rogue_fwif.h"
#include <drm/drm_drv.h> #include <drm/drm_drv.h>
...@@ -155,6 +156,21 @@ pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev) ...@@ -155,6 +156,21 @@ pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
pvr_dev->watchdog.kccb_stall_count = 0; pvr_dev->watchdog.kccb_stall_count = 0;
return true; return true;
} }
} else if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed) {
bool has_active_contexts;
mutex_lock(&pvr_dev->queues.lock);
has_active_contexts = list_empty(&pvr_dev->queues.active);
mutex_unlock(&pvr_dev->queues.lock);
if (has_active_contexts) {
/* Send a HEALTH_CHECK command so we can verify FW is still alive. */
struct rogue_fwif_kccb_cmd health_check_cmd;
health_check_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK;
pvr_kccb_send_cmd_powered(pvr_dev, &health_check_cmd, NULL);
}
} else { } else {
pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed; pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed;
pvr_dev->watchdog.kccb_stall_count = 0; pvr_dev->watchdog.kccb_stall_count = 0;
...@@ -318,6 +334,7 @@ pvr_power_device_idle(struct device *dev) ...@@ -318,6 +334,7 @@ pvr_power_device_idle(struct device *dev)
int int
pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
{ {
bool queues_disabled = false;
int err; int err;
/* /*
...@@ -337,6 +354,11 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) ...@@ -337,6 +354,11 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
disable_irq(pvr_dev->irq); disable_irq(pvr_dev->irq);
do { do {
if (hard_reset) {
pvr_queue_device_pre_reset(pvr_dev);
queues_disabled = true;
}
err = pvr_power_fw_disable(pvr_dev, hard_reset); err = pvr_power_fw_disable(pvr_dev, hard_reset);
if (!err) { if (!err) {
if (hard_reset) { if (hard_reset) {
...@@ -372,6 +394,9 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) ...@@ -372,6 +394,9 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
} }
} while (err); } while (err);
if (queues_disabled)
pvr_queue_device_post_reset(pvr_dev);
enable_irq(pvr_dev->irq); enable_irq(pvr_dev->irq);
up_write(&pvr_dev->reset_sem); up_write(&pvr_dev->reset_sem);
...@@ -386,6 +411,9 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) ...@@ -386,6 +411,9 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
/* Leave IRQs disabled if the device is lost. */ /* Leave IRQs disabled if the device is lost. */
if (queues_disabled)
pvr_queue_device_post_reset(pvr_dev);
err_up_write: err_up_write:
up_write(&pvr_dev->reset_sem); up_write(&pvr_dev->reset_sem);
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#ifndef PVR_QUEUE_H
#define PVR_QUEUE_H
#include <drm/gpu_scheduler.h>
#include "pvr_cccb.h"
#include "pvr_device.h"
struct pvr_context;
struct pvr_queue;
/**
* struct pvr_queue_fence_ctx - Queue fence context
*
* Used to implement dma_fence_ops for pvr_job::{done,cccb}_fence.
*/
struct pvr_queue_fence_ctx {
/** @id: Fence context ID allocated with dma_fence_context_alloc(). */
u64 id;
/** @seqno: Sequence number incremented each time a fence is created. */
atomic_t seqno;
/** @lock: Lock used to synchronize access to fences allocated by this context. */
spinlock_t lock;
};
/**
* struct pvr_queue_cccb_fence_ctx - CCCB fence context
*
* Context used to manage fences controlling access to the CCCB. No fences are
* issued if there's enough space in the CCCB to push job commands.
*/
struct pvr_queue_cccb_fence_ctx {
/** @base: Base queue fence context. */
struct pvr_queue_fence_ctx base;
/**
* @job: Job waiting for CCCB space.
*
* Thanks to the serializationg done at the drm_sched_entity level,
* there's no more than one job waiting for CCCB at a given time.
*
* This field is NULL if no jobs are currently waiting for CCCB space.
*
* Must be accessed with @job_lock held.
*/
struct pvr_job *job;
/** @lock: Lock protecting access to the job object. */
struct mutex job_lock;
};
/**
* struct pvr_queue_fence - Queue fence object
*/
struct pvr_queue_fence {
/** @base: Base dma_fence. */
struct dma_fence base;
/** @queue: Queue that created this fence. */
struct pvr_queue *queue;
};
/**
* struct pvr_queue - Job queue
*
* Used to queue and track execution of pvr_job objects.
*/
struct pvr_queue {
/** @scheduler: Single entity scheduler use to push jobs to this queue. */
struct drm_gpu_scheduler scheduler;
/** @entity: Scheduling entity backing this queue. */
struct drm_sched_entity entity;
/** @type: Type of jobs queued to this queue. */
enum drm_pvr_job_type type;
/** @ctx: Context object this queue is bound to. */
struct pvr_context *ctx;
/** @node: Used to add the queue to the active/idle queue list. */
struct list_head node;
/**
* @in_flight_job_count: Number of jobs submitted to the CCCB that
* have not been processed yet.
*/
atomic_t in_flight_job_count;
/**
* @cccb_fence_ctx: CCCB fence context.
*
* Used to control access to the CCCB is full, such that we don't
* end up trying to push commands to the CCCB if there's not enough
* space to receive all commands needed for a job to complete.
*/
struct pvr_queue_cccb_fence_ctx cccb_fence_ctx;
/** @job_fence_ctx: Job fence context object. */
struct pvr_queue_fence_ctx job_fence_ctx;
/** @timeline_ufo: Timeline UFO for the context queue. */
struct {
/** @fw_obj: FW object representing the UFO value. */
struct pvr_fw_object *fw_obj;
/** @value: CPU mapping of the UFO value. */
u32 *value;
} timeline_ufo;
/**
* last_queued_job_scheduled_fence: The scheduled fence of the last
* job queued to this queue.
*
* We use it to insert frag -> geom dependencies when issuing combined
* geom+frag jobs, to guarantee that the fragment job that's part of
* the combined operation comes after all fragment jobs that were queued
* before it.
*/
struct dma_fence *last_queued_job_scheduled_fence;
/** @cccb: Client Circular Command Buffer. */
struct pvr_cccb cccb;
/** @reg_state_obj: FW object representing the register state of this queue. */
struct pvr_fw_object *reg_state_obj;
/** @ctx_offset: Offset of the queue context in the FW context object. */
u32 ctx_offset;
/** @callstack_addr: Initial call stack address for register state object. */
u64 callstack_addr;
};
bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f);
int pvr_queue_job_init(struct pvr_job *job);
void pvr_queue_job_cleanup(struct pvr_job *job);
void pvr_queue_job_push(struct pvr_job *job);
struct dma_fence *pvr_queue_job_arm(struct pvr_job *job);
struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
enum drm_pvr_job_type type,
struct drm_pvr_ioctl_create_context_args *args,
void *fw_ctx_map);
void pvr_queue_kill(struct pvr_queue *queue);
void pvr_queue_destroy(struct pvr_queue *queue);
void pvr_queue_process(struct pvr_queue *queue);
void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev);
void pvr_queue_device_post_reset(struct pvr_device *pvr_dev);
int pvr_queue_device_init(struct pvr_device *pvr_dev);
void pvr_queue_device_fini(struct pvr_device *pvr_dev);
#endif /* PVR_QUEUE_H */
// SPDX-License-Identifier: GPL-2.0-only OR MIT
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#include <uapi/drm/pvr_drm.h>
#include <drm/drm_syncobj.h>
#include <drm/gpu_scheduler.h>
#include <linux/xarray.h>
#include <linux/dma-fence-unwrap.h>
#include "pvr_device.h"
#include "pvr_queue.h"
#include "pvr_sync.h"
static int
pvr_check_sync_op(const struct drm_pvr_sync_op *sync_op)
{
u8 handle_type;
if (sync_op->flags & ~DRM_PVR_SYNC_OP_FLAGS_MASK)
return -EINVAL;
handle_type = sync_op->flags & DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK;
if (handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ &&
handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ)
return -EINVAL;
if (handle_type == DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ &&
sync_op->value != 0)
return -EINVAL;
return 0;
}
static void
pvr_sync_signal_free(struct pvr_sync_signal *sig_sync)
{
if (!sig_sync)
return;
drm_syncobj_put(sig_sync->syncobj);
dma_fence_chain_free(sig_sync->chain);
dma_fence_put(sig_sync->fence);
kfree(sig_sync);
}
void
pvr_sync_signal_array_cleanup(struct xarray *array)
{
struct pvr_sync_signal *sig_sync;
unsigned long i;
xa_for_each(array, i, sig_sync)
pvr_sync_signal_free(sig_sync);
xa_destroy(array);
}
static struct pvr_sync_signal *
pvr_sync_signal_array_add(struct xarray *array, struct drm_file *file, u32 handle, u64 point)
{
struct pvr_sync_signal *sig_sync;
struct dma_fence *cur_fence;
int err;
u32 id;
sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL);
if (!sig_sync)
return ERR_PTR(-ENOMEM);
sig_sync->handle = handle;
sig_sync->point = point;
if (point > 0) {
sig_sync->chain = dma_fence_chain_alloc();
if (!sig_sync->chain) {
err = -ENOMEM;
goto err_free_sig_sync;
}
}
sig_sync->syncobj = drm_syncobj_find(file, handle);
if (!sig_sync->syncobj) {
err = -EINVAL;
goto err_free_sig_sync;
}
/* Retrieve the current fence attached to that point. It's
* perfectly fine to get a NULL fence here, it just means there's
* no fence attached to that point yet.
*/
if (!drm_syncobj_find_fence(file, handle, point, 0, &cur_fence))
sig_sync->fence = cur_fence;
err = xa_alloc(array, &id, sig_sync, xa_limit_32b, GFP_KERNEL);
if (err)
goto err_free_sig_sync;
return sig_sync;
err_free_sig_sync:
pvr_sync_signal_free(sig_sync);
return ERR_PTR(err);
}
static struct pvr_sync_signal *
pvr_sync_signal_array_search(struct xarray *array, u32 handle, u64 point)
{
struct pvr_sync_signal *sig_sync;
unsigned long i;
xa_for_each(array, i, sig_sync) {
if (handle == sig_sync->handle && point == sig_sync->point)
return sig_sync;
}
return NULL;
}
static struct pvr_sync_signal *
pvr_sync_signal_array_get(struct xarray *array, struct drm_file *file, u32 handle, u64 point)
{
struct pvr_sync_signal *sig_sync;
sig_sync = pvr_sync_signal_array_search(array, handle, point);
if (sig_sync)
return sig_sync;
return pvr_sync_signal_array_add(array, file, handle, point);
}
int
pvr_sync_signal_array_collect_ops(struct xarray *array,
struct drm_file *file,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops)
{
for (u32 i = 0; i < sync_op_count; i++) {
struct pvr_sync_signal *sig_sync;
int ret;
if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL))
continue;
ret = pvr_check_sync_op(&sync_ops[i]);
if (ret)
return ret;
sig_sync = pvr_sync_signal_array_get(array, file,
sync_ops[i].handle,
sync_ops[i].value);
if (IS_ERR(sig_sync))
return PTR_ERR(sig_sync);
}
return 0;
}
int
pvr_sync_signal_array_update_fences(struct xarray *array,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct dma_fence *done_fence)
{
for (u32 i = 0; i < sync_op_count; i++) {
struct dma_fence *old_fence;
struct pvr_sync_signal *sig_sync;
if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL))
continue;
sig_sync = pvr_sync_signal_array_search(array, sync_ops[i].handle,
sync_ops[i].value);
if (WARN_ON(!sig_sync))
return -EINVAL;
old_fence = sig_sync->fence;
sig_sync->fence = dma_fence_get(done_fence);
dma_fence_put(old_fence);
if (WARN_ON(!sig_sync->fence))
return -EINVAL;
}
return 0;
}
void
pvr_sync_signal_array_push_fences(struct xarray *array)
{
struct pvr_sync_signal *sig_sync;
unsigned long i;
xa_for_each(array, i, sig_sync) {
if (sig_sync->chain) {
drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain,
sig_sync->fence, sig_sync->point);
sig_sync->chain = NULL;
} else {
drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence);
}
}
}
static int
pvr_sync_add_dep_to_job(struct drm_sched_job *job, struct dma_fence *f)
{
struct dma_fence_unwrap iter;
u32 native_fence_count = 0;
struct dma_fence *uf;
int err = 0;
dma_fence_unwrap_for_each(uf, &iter, f) {
if (pvr_queue_fence_is_ufo_backed(uf))
native_fence_count++;
}
/* No need to unwrap the fence if it's fully non-native. */
if (!native_fence_count)
return drm_sched_job_add_dependency(job, f);
dma_fence_unwrap_for_each(uf, &iter, f) {
/* There's no dma_fence_unwrap_stop() helper cleaning up the refs
* owned by dma_fence_unwrap(), so let's just iterate over all
* entries without doing anything when something failed.
*/
if (err)
continue;
if (pvr_queue_fence_is_ufo_backed(uf)) {
struct drm_sched_fence *s_fence = to_drm_sched_fence(uf);
/* If this is a native dependency, we wait for the scheduled fence,
* and we will let pvr_queue_run_job() issue FW waits.
*/
err = drm_sched_job_add_dependency(job,
dma_fence_get(&s_fence->scheduled));
} else {
err = drm_sched_job_add_dependency(job, dma_fence_get(uf));
}
}
dma_fence_put(f);
return err;
}
int
pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct xarray *signal_array)
{
int err = 0;
if (!sync_op_count)
return 0;
for (u32 i = 0; i < sync_op_count; i++) {
struct pvr_sync_signal *sig_sync;
struct dma_fence *fence;
if (sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL)
continue;
err = pvr_check_sync_op(&sync_ops[i]);
if (err)
return err;
sig_sync = pvr_sync_signal_array_search(signal_array, sync_ops[i].handle,
sync_ops[i].value);
if (sig_sync) {
if (WARN_ON(!sig_sync->fence))
return -EINVAL;
fence = dma_fence_get(sig_sync->fence);
} else {
err = drm_syncobj_find_fence(from_pvr_file(pvr_file), sync_ops[i].handle,
sync_ops[i].value, 0, &fence);
if (err)
return err;
}
err = pvr_sync_add_dep_to_job(job, fence);
if (err)
return err;
}
return 0;
}
/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#ifndef PVR_SYNC_H
#define PVR_SYNC_H
#include <uapi/drm/pvr_drm.h>
/* Forward declaration from <linux/xarray.h>. */
struct xarray;
/* Forward declaration from <drm/drm_file.h>. */
struct drm_file;
/* Forward declaration from <drm/gpu_scheduler.h>. */
struct drm_sched_job;
/* Forward declaration from "pvr_device.h". */
struct pvr_file;
/**
* struct pvr_sync_signal - Object encoding a syncobj signal operation
*
* The job submission logic collects all signal operations in an array of
* pvr_sync_signal objects. This array also serves as a cache to get the
* latest dma_fence when multiple jobs are submitted at once, and one job
* signals a syncobj point that's later waited on by a subsequent job.
*/
struct pvr_sync_signal {
/** @handle: Handle of the syncobj to signal. */
u32 handle;
/**
* @point: Point to signal in the syncobj.
*
* Only relevant for timeline syncobjs.
*/
u64 point;
/** @syncobj: Syncobj retrieved from the handle. */
struct drm_syncobj *syncobj;
/**
* @chain: Chain object used to link the new fence with the
* existing timeline syncobj.
*
* Should be zero when manipulating a regular syncobj.
*/
struct dma_fence_chain *chain;
/**
* @fence: New fence object to attach to the syncobj.
*
* This pointer starts with the current fence bound to
* the <handle,point> pair.
*/
struct dma_fence *fence;
};
void
pvr_sync_signal_array_cleanup(struct xarray *array);
int
pvr_sync_signal_array_collect_ops(struct xarray *array,
struct drm_file *file,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops);
int
pvr_sync_signal_array_update_fences(struct xarray *array,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct dma_fence *done_fence);
void
pvr_sync_signal_array_push_fences(struct xarray *array);
int
pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct xarray *signal_array);
#endif /* PVR_SYNC_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment