Commit 463873d5 authored by Eric Anholt's avatar Eric Anholt

drm/vc4: Add an API for creating GPU shaders in GEM BOs.

Since we have no MMU, the kernel needs to validate that the submitted
shader code won't make any accesses to memory that the user doesn't
control, which involves banning some operations (general purpose DMA
writes), and tracking where we need to write out pointers for other
operations (texture sampling).  Once it's validated, we return a GEM
BO containing the shader, which doesn't allow mapping for write or
exporting to other subsystems.

v2: Use __u32-style types.
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
parent d5bc60f6
......@@ -10,7 +10,8 @@ vc4-y := \
vc4_kms.o \
vc4_hdmi.o \
vc4_hvs.o \
vc4_plane.o
vc4_plane.o \
vc4_validate_shaders.o
vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
......
......@@ -79,6 +79,12 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
struct drm_gem_object *obj = &bo->base.base;
struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
if (bo->validated_shader) {
kfree(bo->validated_shader->texture_samples);
kfree(bo->validated_shader);
bo->validated_shader = NULL;
}
vc4->bo_stats.num_allocated--;
vc4->bo_stats.size_allocated -= obj->size;
drm_gem_cma_free_object(obj);
......@@ -315,6 +321,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
goto out;
}
if (bo->validated_shader) {
kfree(bo->validated_shader->texture_samples);
kfree(bo->validated_shader);
bo->validated_shader = NULL;
}
bo->free_time = jiffies;
list_add(&bo->size_head, cache_list);
list_add(&bo->unref_head, &vc4->bo_cache.time_list);
......@@ -347,6 +359,78 @@ static void vc4_bo_cache_time_timer(unsigned long data)
schedule_work(&vc4->bo_cache.time_work);
}
struct dma_buf *
vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
{
struct vc4_bo *bo = to_vc4_bo(obj);
if (bo->validated_shader) {
DRM_ERROR("Attempting to export shader BO\n");
return ERR_PTR(-EINVAL);
}
return drm_gem_prime_export(dev, obj, flags);
}
int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct drm_gem_object *gem_obj;
struct vc4_bo *bo;
int ret;
ret = drm_gem_mmap(filp, vma);
if (ret)
return ret;
gem_obj = vma->vm_private_data;
bo = to_vc4_bo(gem_obj);
if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
return -EINVAL;
}
/*
* Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
* vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
* the whole buffer.
*/
vma->vm_flags &= ~VM_PFNMAP;
vma->vm_pgoff = 0;
ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
bo->base.vaddr, bo->base.paddr,
vma->vm_end - vma->vm_start);
if (ret)
drm_gem_vm_close(vma);
return ret;
}
int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{
struct vc4_bo *bo = to_vc4_bo(obj);
if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
return -EINVAL;
}
return drm_gem_cma_prime_mmap(obj, vma);
}
void *vc4_prime_vmap(struct drm_gem_object *obj)
{
struct vc4_bo *bo = to_vc4_bo(obj);
if (bo->validated_shader) {
DRM_ERROR("mmaping of shader BOs not allowed.\n");
return ERR_PTR(-EINVAL);
}
return drm_gem_cma_prime_vmap(obj);
}
int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
......@@ -387,6 +471,62 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
return 0;
}
int
vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_create_shader_bo *args = data;
struct vc4_bo *bo = NULL;
int ret;
if (args->size == 0)
return -EINVAL;
if (args->size % sizeof(u64) != 0)
return -EINVAL;
if (args->flags != 0) {
DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
return -EINVAL;
}
if (args->pad != 0) {
DRM_INFO("Pad set: 0x%08x\n", args->pad);
return -EINVAL;
}
bo = vc4_bo_create(dev, args->size, true);
if (!bo)
return -ENOMEM;
ret = copy_from_user(bo->base.vaddr,
(void __user *)(uintptr_t)args->data,
args->size);
if (ret != 0)
goto fail;
/* Clear the rest of the memory from allocating from the BO
* cache.
*/
memset(bo->base.vaddr + args->size, 0,
bo->base.base.size - args->size);
bo->validated_shader = vc4_validate_shader(&bo->base);
if (!bo->validated_shader) {
ret = -EINVAL;
goto fail;
}
/* We have to create the handle after validation, to avoid
* races for users to do doing things like mmap the shader BO.
*/
ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
fail:
drm_gem_object_unreference_unlocked(&bo->base.base);
return ret;
}
void vc4_bo_cache_init(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
......
......@@ -64,7 +64,7 @@ static const struct file_operations vc4_drm_fops = {
.open = drm_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.mmap = drm_gem_cma_mmap,
.mmap = vc4_mmap,
.poll = drm_poll,
.read = drm_read,
#ifdef CONFIG_COMPAT
......@@ -76,6 +76,7 @@ static const struct file_operations vc4_drm_fops = {
static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
};
static struct drm_driver vc4_drm_driver = {
......@@ -102,12 +103,12 @@ static struct drm_driver vc4_drm_driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_export = drm_gem_prime_export,
.gem_prime_export = vc4_prime_export,
.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
.gem_prime_vmap = drm_gem_cma_prime_vmap,
.gem_prime_vmap = vc4_prime_vmap,
.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
.gem_prime_mmap = drm_gem_cma_prime_mmap,
.gem_prime_mmap = vc4_prime_mmap,
.dumb_create = vc4_dumb_create,
.dumb_map_offset = drm_gem_cma_dumb_map_offset,
......
......@@ -69,6 +69,11 @@ struct vc4_bo {
/* List entry for the BO's position in vc4_dev->bo_cache.size_list */
struct list_head size_head;
/* Struct for shader validation state, if created by
* DRM_IOCTL_VC4_CREATE_SHADER_BO.
*/
struct vc4_validated_shader_info *validated_shader;
};
static inline struct vc4_bo *
......@@ -117,6 +122,42 @@ to_vc4_encoder(struct drm_encoder *encoder)
#define HVS_READ(offset) readl(vc4->hvs->regs + offset)
#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
/**
* struct vc4_texture_sample_info - saves the offsets into the UBO for texture
* setup parameters.
*
* This will be used at draw time to relocate the reference to the texture
* contents in p0, and validate that the offset combined with
* width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
* Note that the hardware treats unprovided config parameters as 0, so not all
* of them need to be set up for every texure sample, and we'll store ~0 as
* the offset to mark the unused ones.
*
* See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
* Setup") for definitions of the texture parameters.
*/
struct vc4_texture_sample_info {
bool is_direct;
uint32_t p_offset[4];
};
/**
* struct vc4_validated_shader_info - information about validated shaders that
* needs to be used from command list validation.
*
* For a given shader, each time a shader state record references it, we need
* to verify that the shader doesn't read more uniforms than the shader state
* record's uniform BO pointer can provide, and we need to apply relocations
* and validate the shader state record's uniforms that define the texture
* samples.
*/
struct vc4_validated_shader_info {
uint32_t uniforms_size;
uint32_t uniforms_src_size;
uint32_t num_texture_samples;
struct vc4_texture_sample_info *texture_samples;
};
/**
* _wait_for - magic (register) wait macro
*
......@@ -157,8 +198,13 @@ struct dma_buf *vc4_prime_export(struct drm_device *dev,
struct drm_gem_object *obj, int flags);
int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
void *vc4_prime_vmap(struct drm_gem_object *obj);
void vc4_bo_cache_init(struct drm_device *dev);
void vc4_bo_cache_destroy(struct drm_device *dev);
int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
......@@ -194,3 +240,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
enum drm_plane_type type);
u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
u32 vc4_plane_dlist_size(struct drm_plane_state *state);
/* vc4_validate_shader.c */
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef VC4_QPU_DEFINES_H
#define VC4_QPU_DEFINES_H
enum qpu_op_add {
QPU_A_NOP,
QPU_A_FADD,
QPU_A_FSUB,
QPU_A_FMIN,
QPU_A_FMAX,
QPU_A_FMINABS,
QPU_A_FMAXABS,
QPU_A_FTOI,
QPU_A_ITOF,
QPU_A_ADD = 12,
QPU_A_SUB,
QPU_A_SHR,
QPU_A_ASR,
QPU_A_ROR,
QPU_A_SHL,
QPU_A_MIN,
QPU_A_MAX,
QPU_A_AND,
QPU_A_OR,
QPU_A_XOR,
QPU_A_NOT,
QPU_A_CLZ,
QPU_A_V8ADDS = 30,
QPU_A_V8SUBS = 31,
};
enum qpu_op_mul {
QPU_M_NOP,
QPU_M_FMUL,
QPU_M_MUL24,
QPU_M_V8MULD,
QPU_M_V8MIN,
QPU_M_V8MAX,
QPU_M_V8ADDS,
QPU_M_V8SUBS,
};
enum qpu_raddr {
QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
/* 0-31 are the plain regfile a or b fields */
QPU_R_UNIF = 32,
QPU_R_VARY = 35,
QPU_R_ELEM_QPU = 38,
QPU_R_NOP,
QPU_R_XY_PIXEL_COORD = 41,
QPU_R_MS_REV_FLAGS = 41,
QPU_R_VPM = 48,
QPU_R_VPM_LD_BUSY,
QPU_R_VPM_LD_WAIT,
QPU_R_MUTEX_ACQUIRE,
};
enum qpu_waddr {
/* 0-31 are the plain regfile a or b fields */
QPU_W_ACC0 = 32, /* aka r0 */
QPU_W_ACC1,
QPU_W_ACC2,
QPU_W_ACC3,
QPU_W_TMU_NOSWAP,
QPU_W_ACC5,
QPU_W_HOST_INT,
QPU_W_NOP,
QPU_W_UNIFORMS_ADDRESS,
QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
QPU_W_MS_FLAGS = 42,
QPU_W_REV_FLAG = 42,
QPU_W_TLB_STENCIL_SETUP = 43,
QPU_W_TLB_Z,
QPU_W_TLB_COLOR_MS,
QPU_W_TLB_COLOR_ALL,
QPU_W_TLB_ALPHA_MASK,
QPU_W_VPM,
QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
QPU_W_MUTEX_RELEASE,
QPU_W_SFU_RECIP,
QPU_W_SFU_RECIPSQRT,
QPU_W_SFU_EXP,
QPU_W_SFU_LOG,
QPU_W_TMU0_S,
QPU_W_TMU0_T,
QPU_W_TMU0_R,
QPU_W_TMU0_B,
QPU_W_TMU1_S,
QPU_W_TMU1_T,
QPU_W_TMU1_R,
QPU_W_TMU1_B,
};
enum qpu_sig_bits {
QPU_SIG_SW_BREAKPOINT,
QPU_SIG_NONE,
QPU_SIG_THREAD_SWITCH,
QPU_SIG_PROG_END,
QPU_SIG_WAIT_FOR_SCOREBOARD,
QPU_SIG_SCOREBOARD_UNLOCK,
QPU_SIG_LAST_THREAD_SWITCH,
QPU_SIG_COVERAGE_LOAD,
QPU_SIG_COLOR_LOAD,
QPU_SIG_COLOR_LOAD_END,
QPU_SIG_LOAD_TMU0,
QPU_SIG_LOAD_TMU1,
QPU_SIG_ALPHA_MASK_LOAD,
QPU_SIG_SMALL_IMM,
QPU_SIG_LOAD_IMM,
QPU_SIG_BRANCH
};
enum qpu_mux {
/* hardware mux values */
QPU_MUX_R0,
QPU_MUX_R1,
QPU_MUX_R2,
QPU_MUX_R3,
QPU_MUX_R4,
QPU_MUX_R5,
QPU_MUX_A,
QPU_MUX_B,
/* non-hardware mux values */
QPU_MUX_IMM,
};
enum qpu_cond {
QPU_COND_NEVER,
QPU_COND_ALWAYS,
QPU_COND_ZS,
QPU_COND_ZC,
QPU_COND_NS,
QPU_COND_NC,
QPU_COND_CS,
QPU_COND_CC,
};
enum qpu_pack_mul {
QPU_PACK_MUL_NOP,
/* replicated to each 8 bits of the 32-bit dst. */
QPU_PACK_MUL_8888 = 3,
QPU_PACK_MUL_8A,
QPU_PACK_MUL_8B,
QPU_PACK_MUL_8C,
QPU_PACK_MUL_8D,
};
enum qpu_pack_a {
QPU_PACK_A_NOP,
/* convert to 16 bit float if float input, or to int16. */
QPU_PACK_A_16A,
QPU_PACK_A_16B,
/* replicated to each 8 bits of the 32-bit dst. */
QPU_PACK_A_8888,
/* Convert to 8-bit unsigned int. */
QPU_PACK_A_8A,
QPU_PACK_A_8B,
QPU_PACK_A_8C,
QPU_PACK_A_8D,
/* Saturating variants of the previous instructions. */
QPU_PACK_A_32_SAT, /* int-only */
QPU_PACK_A_16A_SAT, /* int or float */
QPU_PACK_A_16B_SAT,
QPU_PACK_A_8888_SAT,
QPU_PACK_A_8A_SAT,
QPU_PACK_A_8B_SAT,
QPU_PACK_A_8C_SAT,
QPU_PACK_A_8D_SAT,
};
enum qpu_unpack_r4 {
QPU_UNPACK_R4_NOP,
QPU_UNPACK_R4_F16A_TO_F32,
QPU_UNPACK_R4_F16B_TO_F32,
QPU_UNPACK_R4_8D_REP,
QPU_UNPACK_R4_8A,
QPU_UNPACK_R4_8B,
QPU_UNPACK_R4_8C,
QPU_UNPACK_R4_8D,
};
#define QPU_MASK(high, low) \
((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
#define QPU_GET_FIELD(word, field) \
((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
#define QPU_SIG_SHIFT 60
#define QPU_SIG_MASK QPU_MASK(63, 60)
#define QPU_UNPACK_SHIFT 57
#define QPU_UNPACK_MASK QPU_MASK(59, 57)
/**
* If set, the pack field means PACK_MUL or R4 packing, instead of normal
* regfile a packing.
*/
#define QPU_PM ((uint64_t)1 << 56)
#define QPU_PACK_SHIFT 52
#define QPU_PACK_MASK QPU_MASK(55, 52)
#define QPU_COND_ADD_SHIFT 49
#define QPU_COND_ADD_MASK QPU_MASK(51, 49)
#define QPU_COND_MUL_SHIFT 46
#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
#define QPU_SF ((uint64_t)1 << 45)
#define QPU_WADDR_ADD_SHIFT 38
#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
#define QPU_WADDR_MUL_SHIFT 32
#define QPU_WADDR_MUL_MASK QPU_MASK(37, 32)
#define QPU_OP_MUL_SHIFT 29
#define QPU_OP_MUL_MASK QPU_MASK(31, 29)
#define QPU_RADDR_A_SHIFT 18
#define QPU_RADDR_A_MASK QPU_MASK(23, 18)
#define QPU_RADDR_B_SHIFT 12
#define QPU_RADDR_B_MASK QPU_MASK(17, 12)
#define QPU_SMALL_IMM_SHIFT 12
#define QPU_SMALL_IMM_MASK QPU_MASK(17, 12)
#define QPU_ADD_A_SHIFT 9
#define QPU_ADD_A_MASK QPU_MASK(11, 9)
#define QPU_ADD_B_SHIFT 6
#define QPU_ADD_B_MASK QPU_MASK(8, 6)
#define QPU_MUL_A_SHIFT 3
#define QPU_MUL_A_MASK QPU_MASK(5, 3)
#define QPU_MUL_B_SHIFT 0
#define QPU_MUL_B_MASK QPU_MASK(2, 0)
#define QPU_WS ((uint64_t)1 << 44)
#define QPU_OP_ADD_SHIFT 24
#define QPU_OP_ADD_MASK QPU_MASK(28, 24)
#endif /* VC4_QPU_DEFINES_H */
This diff is collapsed.
......@@ -28,9 +28,11 @@
#define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04
#define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
/**
* struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
......@@ -65,4 +67,27 @@ struct drm_vc4_mmap_bo {
__u64 offset;
};
/**
* struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
* shader BOs.
*
* Since allowing a shader to be overwritten while it's also being
* executed from would allow privlege escalation, shaders must be
* created using this ioctl, and they can't be mmapped later.
*/
struct drm_vc4_create_shader_bo {
/* Size of the data argument. */
__u32 size;
/* Flags, currently must be 0. */
__u32 flags;
/* Pointer to the data. */
__u64 data;
/** Returned GEM handle for the BO. */
__u32 handle;
/* Pad, must be 0. */
__u32 pad;
};
#endif /* _UAPI_VC4_DRM_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment