Commit af3847a7 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-gt-next-2022-05-05' of...

Merge tag 'drm-intel-gt-next-2022-05-05' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:

- Add kerneldoc for engine class enum (Matt Roper)
- Add compute engine ABI (Matt Roper)

Driver Changes:

- Define GuC firmware version for DG2 (John Harrison)
- Clear SET_PREDICATE_RESULT prior to executing the ring (Chris Wilson)
- Fix race in __i915_vma_remove_closed (Karol Herbst)

- Add register for compute engine's MMIO-based TLB invalidation (Matt Roper)
- Xe_HP SDV and DG2 have up to 4 CCS engines (Daniele Ceraolo Spurio)
- Add initial Ponte Vecchio definitions (Stuart Summers)
- Document the eviction of the Flat-CCS objects (Ramalingam C)

- Use existing uncore helper to read gpm_timestamp (Umesh Nerlige Ramappa)
- Fix issue with LRI relative addressing (Akeem G Abodunrin)
- Skip poisoning SET_PREDICATE_RESULT on dg2 (Chris Wilson)
- Optimize the ccs_sz calculation per chunk (Ramalingam C)
- Remove superfluous string helper include (Jani Nikula)
- Fix assert in i915_ggtt_pin (Tvrtko Ursulin)
- Use IOMEM_ERR_PTR() directly (Kefeng Wang)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YnNxCm1pyflu3taj@tursulin-mobl2
parents 97ab5308 1df1c79c
......@@ -5,6 +5,7 @@
#include "gen8_engine_cs.h"
#include "i915_drv.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_lrc.h"
#include "intel_ring.h"
......@@ -385,6 +386,59 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
static int __gen125_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags,
u32 arb)
{
struct intel_context *ce = rq->context;
u32 wa_offset = lrc_indirect_bb(ce);
u32 *cs;
cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_ARB_ON_OFF | arb;
*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT |
MI_LRI_LRM_CS_MMIO;
*cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
*cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
*cs++ = 0;
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
/* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
*cs++ = MI_BATCH_BUFFER_START_GEN8;
*cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
*cs++ = 0;
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
intel_ring_advance(rq, cs);
return 0;
}
int gen125_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
}
int gen125_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
}
int gen8_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
......
......@@ -31,6 +31,13 @@ int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int gen125_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int gen125_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
......
......@@ -148,6 +148,7 @@
(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) /* gen12+ */
#define MI_PREDICATE_RESULT_2(base) _MMIO((base) + 0x3bc)
#define LOWER_SLICE_ENABLED (1 << 0)
#define LOWER_SLICE_DISABLED (0 << 0)
......@@ -193,6 +194,7 @@
#define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8)
#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
#define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */
......
......@@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
/* TODO: Add COMPUTE_CLASS mapping once ABI is available */
[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
};
static int engine_cmp(void *priv, const struct list_head *A,
......
......@@ -3433,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
}
}
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = gen125_emit_bb_start;
else
engine->emit_bb_start = gen125_emit_bb_start_noarb;
} else {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = gen8_emit_bb_start;
else
engine->emit_bb_start = gen8_emit_bb_start_noarb;
}
engine->busyness = execlists_engine_busyness;
}
......
......@@ -39,6 +39,8 @@
#define MI_GLOBAL_GTT (1<<22)
#define MI_NOOP MI_INSTR(0, 0)
#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
#define MI_SET_PREDICATE_DISABLE (0 << 0)
#define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0)
#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
......
......@@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
[VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
[VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
[COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
};
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
......
......@@ -1007,6 +1007,7 @@
#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
#define RENDER_MOD_CTRL _MMIO(0xcf2c)
......
......@@ -904,6 +904,24 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
engine->name);
}
static u32 context_wa_bb_offset(const struct intel_context *ce)
{
return PAGE_SIZE * ce->wa_bb_page;
}
static u32 *context_indirect_bb(const struct intel_context *ce)
{
void *ptr;
GEM_BUG_ON(!ce->wa_bb_page);
ptr = ce->lrc_reg_state;
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
ptr += context_wa_bb_offset(ce);
return ptr;
}
void lrc_init_state(struct intel_context *ce,
struct intel_engine_cs *engine,
void *state)
......@@ -922,6 +940,10 @@ void lrc_init_state(struct intel_context *ce,
/* Clear the ppHWSP (inc. per-context counters) */
memset(state, 0, PAGE_SIZE);
/* Clear the indirect wa and storage */
if (ce->wa_bb_page)
memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
/*
* The second page of the context object contains some registers which
* must be set up prior to the first execution.
......@@ -929,6 +951,35 @@ void lrc_init_state(struct intel_context *ce,
__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
}
u32 lrc_indirect_bb(const struct intel_context *ce)
{
return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
}
static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
{
/* If predication is active, this will be noop'ed */
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
*cs++ = 0;
*cs++ = 0; /* No predication */
/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
*cs++ = MI_BATCH_BUFFER_END | BIT(15);
*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
/* Instructions are no longer predicated (disabled), we can proceed */
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
*cs++ = 0;
*cs++ = 1; /* enable predication before the next BB */
*cs++ = MI_BATCH_BUFFER_END;
GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
return cs;
}
static struct i915_vma *
__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
{
......@@ -1240,24 +1291,6 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
return cs;
}
static u32 context_wa_bb_offset(const struct intel_context *ce)
{
return PAGE_SIZE * ce->wa_bb_page;
}
static u32 *context_indirect_bb(const struct intel_context *ce)
{
void *ptr;
GEM_BUG_ON(!ce->wa_bb_page);
ptr = ce->lrc_reg_state;
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
ptr += context_wa_bb_offset(ce);
return ptr;
}
static void
setup_indirect_ctx_bb(const struct intel_context *ce,
const struct intel_engine_cs *engine,
......@@ -1271,9 +1304,11 @@ setup_indirect_ctx_bb(const struct intel_context *ce,
while ((unsigned long)cs % CACHELINE_BYTES)
*cs++ = MI_NOOP;
GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
i915_ggtt_offset(ce->state) +
context_wa_bb_offset(ce),
lrc_indirect_bb(ce),
(cs - start) * sizeof(*cs));
}
......
......@@ -145,4 +145,9 @@ static inline void lrc_runtime_stop(struct intel_context *ce)
WRITE_ONCE(stats->active, 0);
}
#define DG2_PREDICATE_RESULT_WA (PAGE_SIZE - sizeof(u64))
#define DG2_PREDICATE_RESULT_BB (2048)
u32 lrc_indirect_bb(const struct intel_context *ce);
#endif /* __INTEL_LRC_H__ */
......@@ -485,16 +485,21 @@ static bool wa_1209644611_applies(int ver, u32 size)
* And CCS data can be copied in and out of CCS region through
* XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
*
* When we exhaust the lmem, if the object's placements support smem, then we can
* directly decompress the compressed lmem object into smem and start using it
* from smem itself.
* I915 supports Flat-CCS on lmem only objects. When an objects has smem in
* its preference list, on memory pressure, i915 needs to migrate the lmem
* content into smem. If the lmem object is Flat-CCS compressed by userspace,
* then i915 needs to decompress it. But I915 lack the required information
* for such decompression. Hence I915 supports Flat-CCS only on lmem only objects.
*
* But when we need to swapout the compressed lmem object into a smem region
* though objects' placement doesn't support smem, then we copy the lmem content
* as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
* When the object is referred, lmem content will be swaped in along with
* restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
* location.
* When we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can
* be temporarily evicted to smem, along with the auxiliary CCS state, where
* it can be potentially swapped-out at a later point, if required.
* If userspace later touches the evicted pages, then we always move
* the backing memory back to lmem, which includes restoring the saved CCS state,
* and potentially performing any required swap-in.
*
* For the migration of the lmem objects with smem in placement list, such as
* {lmem, smem}, objects are treated as non Flat-CCS capable objects.
*/
static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
......@@ -647,17 +652,9 @@ static int scatter_list_length(struct scatterlist *sg)
static void
calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
int *src_sz, int *ccs_sz, u32 bytes_to_cpy,
u32 ccs_bytes_to_cpy)
int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
{
if (ccs_bytes_to_cpy) {
/*
* We can only copy the ccs data corresponding to
* the CHUNK_SZ of lmem which is
* GET_CCS_BYTES(i915, CHUNK_SZ))
*/
*ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ));
if (!src_is_lmem)
/*
* When CHUNK_SZ is passed all the pages upto CHUNK_SZ
......@@ -707,10 +704,10 @@ intel_context_migrate_copy(struct intel_context *ce,
struct drm_i915_private *i915 = ce->engine->i915;
u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
enum i915_cache_level ccs_cache_level;
int src_sz, dst_sz, ccs_sz;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
int src_sz, dst_sz;
bool ccs_is_src;
int err;
......@@ -791,7 +788,7 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz,
calculate_chunk_sz(i915, src_is_lmem, &src_sz,
bytes_to_cpy, ccs_bytes_to_cpy);
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
......@@ -825,37 +822,35 @@ intel_context_migrate_copy(struct intel_context *ce,
bytes_to_cpy -= len;
if (ccs_bytes_to_cpy) {
int ccs_sz;
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
ccs_sz = GET_CCS_BYTES(i915, len);
err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
ccs_is_src ? src_offset : dst_offset,
ccs_sz);
if (err < 0)
goto out_rq;
if (err < ccs_sz) {
err = -EINVAL;
goto out_rq;
}
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
/*
* Using max of src_sz and dst_sz, as we need to
* pass the lmem size corresponding to the ccs
* blocks we need to handle.
*/
ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz,
ccs_is_src ? dst_sz : ccs_sz);
err = emit_copy_ccs(rq, dst_offset, dst_access,
src_offset, src_access, ccs_sz);
src_offset, src_access, len);
if (err)
goto out_rq;
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
/* Converting back to ccs bytes */
ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz);
ccs_bytes_to_cpy -= ccs_sz;
}
......
......@@ -10,8 +10,6 @@
#include "intel_gt_regs.h"
#include "intel_sseu.h"
#include "linux/string_helpers.h"
void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
u8 max_subslices, u8 max_eus_per_subslice)
{
......
......@@ -128,6 +128,27 @@ static int context_flush(struct intel_context *ce, long timeout)
return err;
}
static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
{
if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
return ~0u;
if (GRAPHICS_VER(engine->i915) < 12)
return 0xfff;
switch (engine->class) {
default:
case RENDER_CLASS:
case COMPUTE_CLASS:
return 0x07ff;
case COPY_ENGINE_CLASS:
return 0x0fff;
case VIDEO_DECODE_CLASS:
case VIDEO_ENHANCEMENT_CLASS:
return 0x3fff;
}
}
static int live_lrc_layout(void *arg)
{
struct intel_gt *gt = arg;
......@@ -167,6 +188,7 @@ static int live_lrc_layout(void *arg)
dw = 0;
do {
u32 lri = READ_ONCE(hw[dw]);
u32 lri_mask;
if (lri == 0) {
dw++;
......@@ -194,6 +216,18 @@ static int live_lrc_layout(void *arg)
break;
}
/*
* When bit 19 of MI_LOAD_REGISTER_IMM instruction
* opcode is set on Gen12+ devices, HW does not
* care about certain register address offsets, and
* instead check the following for valid address
* ranges on specific engines:
* RCS && CCS: BITS(0 - 10)
* BCS: BITS(0 - 11)
* VECS && VCS: BITS(0 - 13)
*/
lri_mask = get_lri_mask(engine, lri);
lri &= 0x7f;
lri++;
dw++;
......@@ -201,7 +235,7 @@ static int live_lrc_layout(void *arg)
while (lri) {
u32 offset = READ_ONCE(hw[dw]);
if (offset != lrc[dw]) {
if ((offset ^ lrc[dw]) & lri_mask) {
pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
engine->name, dw, offset, lrc[dw]);
err = -EINVAL;
......@@ -911,6 +945,19 @@ create_user_vma(struct i915_address_space *vm, unsigned long size)
return vma;
}
static u32 safe_poison(u32 offset, u32 poison)
{
/*
* Do not enable predication as it will nop all subsequent commands,
* not only disabling the tests (by preventing all the other SRM) but
* also preventing the arbitration events at the end of the request.
*/
if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
poison &= ~REG_BIT(0);
return poison;
}
static struct i915_vma *
store_context(struct intel_context *ce, struct i915_vma *scratch)
{
......@@ -1120,7 +1167,9 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
*cs++ = MI_LOAD_REGISTER_IMM(len);
while (len--) {
*cs++ = hw[dw];
*cs++ = poison;
*cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
MI_LRI_LRM_CS_MMIO),
poison);
dw += 2;
}
} while (dw < PAGE_SIZE / sizeof(u32) &&
......
......@@ -1200,20 +1200,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt)
return 3 - shift;
}
static u64 gpm_timestamp(struct intel_gt *gt)
{
u32 lo, hi, old_hi, loop = 0;
hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
do {
lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
old_hi = hi;
hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
} while (old_hi != hi && loop++ < 2);
return ((u64)hi << 32) | lo;
}
static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
{
struct intel_gt *gt = guc_to_gt(guc);
......@@ -1223,7 +1209,8 @@ static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
lockdep_assert_held(&guc->timestamp.lock);
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
MISC_STATUS1) >> guc->timestamp.shift;
gt_stamp_lo = lower_32_bits(gpm_ts);
*now = ktime_get();
......@@ -3910,6 +3897,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
*/
engine->emit_bb_start = gen8_emit_bb_start;
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
engine->emit_bb_start = gen125_emit_bb_start;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
......
......@@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* firmware as TGL.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \
fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \
fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \
......
......@@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_COPY] = "copy",
[I915_ENGINE_CLASS_VIDEO] = "video",
[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
[I915_ENGINE_CLASS_COMPUTE] = "compute",
};
static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
......
......@@ -13,7 +13,7 @@
#include "gt/intel_engine_types.h"
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
struct drm_i915_private;
......
......@@ -1059,6 +1059,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P)
#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV)
#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2)
#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO)
#define IS_DG2_G10(dev_priv) \
IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10)
#define IS_DG2_G11(dev_priv) \
......
......@@ -1037,7 +1037,8 @@ static const struct intel_device_info xehpsdv_info = {
BIT(RCS0) | BIT(BCS0) |
BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) |
BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7),
BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7) |
BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
.require_force_probe = 1,
};
......@@ -1056,7 +1057,8 @@ static const struct intel_device_info xehpsdv_info = {
.platform_engine_mask = \
BIT(RCS0) | BIT(BCS0) | \
BIT(VECS0) | BIT(VECS1) | \
BIT(VCS0) | BIT(VCS2)
BIT(VCS0) | BIT(VCS2) | \
BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3)
__maybe_unused
static const struct intel_device_info dg2_info = {
......@@ -1074,6 +1076,27 @@ static const struct intel_device_info ats_m_info = {
.require_force_probe = 1,
};
#define XE_HPC_FEATURES \
XE_HP_FEATURES, \
.dma_mask_size = 52
__maybe_unused
static const struct intel_device_info pvc_info = {
XE_HPC_FEATURES,
XE_HPM_FEATURES,
DGFX_FEATURES,
.graphics.rel = 60,
.media.rel = 60,
PLATFORM(INTEL_PONTEVECCHIO),
.display = { 0 },
.has_flat_ccs = 0,
.platform_engine_mask =
BIT(BCS0) |
BIT(VCS0) |
BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
.require_force_probe = 1,
};
#undef PLATFORM
/*
......
......@@ -548,7 +548,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
int err;
if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY))
return IO_ERR_PTR(-EINVAL);
return IOMEM_ERR_PTR(-EINVAL);
if (!i915_gem_object_is_lmem(vma->obj)) {
if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
......@@ -601,7 +601,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
err_unpin:
__i915_vma_unpin(vma);
err:
return IO_ERR_PTR(err);
return IOMEM_ERR_PTR(err);
}
void i915_vma_flush_writes(struct i915_vma *vma)
......@@ -1565,9 +1565,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (ww)
return __i915_ggtt_pin(vma, ww, align, flags);
#ifdef CONFIG_LOCKDEP
WARN_ON(dma_resv_held(vma->obj->base.resv));
#endif
lockdep_assert_not_held(&vma->obj->base.resv->lock.base);
for_i915_gem_ww(&_ww, err, true) {
err = i915_gem_object_lock(vma->obj, &_ww);
......@@ -1615,17 +1613,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma)
{
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(&gt->closed_lock);
list_del_init(&vma->closed_link);
spin_unlock_irq(&gt->closed_lock);
}
void i915_vma_reopen(struct i915_vma *vma)
{
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(&gt->closed_lock);
if (i915_vma_is_closed(vma))
__i915_vma_remove_closed(vma);
spin_unlock_irq(&gt->closed_lock);
}
static void force_unbind(struct i915_vma *vma)
......@@ -1641,6 +1639,7 @@ static void force_unbind(struct i915_vma *vma)
static void release_references(struct i915_vma *vma, bool vm_ddestroy)
{
struct drm_i915_gem_object *obj = vma->obj;
struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
......@@ -1651,7 +1650,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
spin_unlock(&obj->vma.lock);
spin_lock_irq(&gt->closed_lock);
__i915_vma_remove_closed(vma);
spin_unlock_irq(&gt->closed_lock);
if (vm_ddestroy)
i915_vm_resv_put(vma->vm);
......
......@@ -317,7 +317,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node,
* Returns a valid iomapped pointer or ERR_PTR.
*/
void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
/**
* i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
......
......@@ -72,6 +72,7 @@ static const char * const platform_names[] = {
PLATFORM_NAME(ALDERLAKE_P),
PLATFORM_NAME(XEHPSDV),
PLATFORM_NAME(DG2),
PLATFORM_NAME(PONTEVECCHIO),
};
#undef PLATFORM_NAME
......
......@@ -88,6 +88,7 @@ enum intel_platform {
INTEL_ALDERLAKE_P,
INTEL_XEHPSDV,
INTEL_DG2,
INTEL_PONTEVECCHIO,
INTEL_MAX_PLATFORMS
};
......
......@@ -154,21 +154,71 @@ enum i915_mocs_table_index {
I915_MOCS_CACHED,
};
/*
/**
* enum drm_i915_gem_engine_class - uapi engine type enumeration
*
* Different engines serve different roles, and there may be more than one
* engine serving each role. enum drm_i915_gem_engine_class provides a
* classification of the role of the engine, which may be used when requesting
* operations to be performed on a certain subset of engines, or for providing
* information about that group.
* engine serving each role. This enum provides a classification of the role
* of the engine, which may be used when requesting operations to be performed
* on a certain subset of engines, or for providing information about that
* group.
*/
enum drm_i915_gem_engine_class {
/**
* @I915_ENGINE_CLASS_RENDER:
*
* Render engines support instructions used for 3D, Compute (GPGPU),
* and programmable media workloads. These instructions fetch data and
* dispatch individual work items to threads that operate in parallel.
* The threads run small programs (called "kernels" or "shaders") on
* the GPU's execution units (EUs).
*/
I915_ENGINE_CLASS_RENDER = 0,
/**
* @I915_ENGINE_CLASS_COPY:
*
* Copy engines (also referred to as "blitters") support instructions
* that move blocks of data from one location in memory to another,
* or that fill a specified location of memory with fixed data.
* Copy engines can perform pre-defined logical or bitwise operations
* on the source, destination, or pattern data.
*/
I915_ENGINE_CLASS_COPY = 1,
/**
* @I915_ENGINE_CLASS_VIDEO:
*
* Video engines (also referred to as "bit stream decode" (BSD) or
* "vdbox") support instructions that perform fixed-function media
* decode and encode.
*/
I915_ENGINE_CLASS_VIDEO = 2,
/**
* @I915_ENGINE_CLASS_VIDEO_ENHANCE:
*
* Video enhancement engines (also referred to as "vebox") support
* instructions related to image enhancement.
*/
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
/* should be kept compact */
/**
* @I915_ENGINE_CLASS_COMPUTE:
*
* Compute engines support a subset of the instructions available
* on render engines: compute engines support Compute (GPGPU) and
* programmable media workloads, but do not support the 3D pipeline.
*/
I915_ENGINE_CLASS_COMPUTE = 4,
/* Values in this enum should be kept compact. */
/**
* @I915_ENGINE_CLASS_INVALID:
*
* Placeholder value to represent an invalid engine class assignment.
*/
I915_ENGINE_CLASS_INVALID = -1
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment