summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug4
-rw-r--r--drivers/gpu/drm/xe/Makefile6
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h20
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h38
-rw-r--r--drivers/gpu/drm/xe/abi/guc_capture_abi.h2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h20
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h16
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h (renamed from drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h)0
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_irq.c13
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c25
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c116
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h2
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c12
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c8
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h9
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h19
-rw-r--r--drivers/gpu/drm/xe/regs/xe_reg_defs.h2
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h4
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c30
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c5
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h8
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c190
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h33
-rw-r--r--drivers/gpu/drm/xe/xe_bo_doc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c14
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c119
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h7
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h10
-rw-r--r--drivers/gpu/drm/xe/xe_device.c19
-rw-r--r--drivers/gpu/drm/xe/xe_device.h3
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h65
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c83
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h1
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c24
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c10
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c37
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c47
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c12
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h31
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c80
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c25
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c63
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c29
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c350
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c26
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c35
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c36
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c79
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c153
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h10
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c22
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h4
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c350
-rw-r--r--drivers/gpu/drm/xe/xe_irq.h8
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c53
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h6
-rw-r--r--drivers/gpu/drm/xe/xe_macros.h12
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c26
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c2
-rw-r--r--drivers/gpu/drm/xe/xe_module.c2
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c174
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c4
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c13
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c5
-rw-r--r--drivers/gpu/drm/xe/xe_query.c5
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c53
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c37
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c5
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h15
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c4
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_helpers.h2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h17
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c263
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h14
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h11
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h17
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h52
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c61
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c37
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h1
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h22
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.c233
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.h11
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c6
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules3
128 files changed, 2864 insertions, 859 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 2de0de41b8dd..0d749ed44878 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -66,7 +66,7 @@ config DRM_XE_DEBUG_MEM
bool "Enable passing SYS/VRAM addresses to user space"
default n
help
- Pass object location trough uapi. Intended for extended
+ Pass object location through uapi. Intended for extended
testing and development only.
Recommended for driver developers only.
@@ -104,5 +104,5 @@ config DRM_XE_USERPTR_INVAL_INJECT
Choose this option when debugging error paths that
are hit during checks for userptr invalidations.
- Recomended for driver developers only.
+ Recommended for driver developers only.
If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index bc7a04ce69fd..5c97ad6ed738 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -101,6 +101,7 @@ xe-y += xe_bb.o \
xe_trace.o \
xe_trace_bo.o \
xe_trace_guc.o \
+ xe_trace_lrc.o \
xe_ttm_sys_mgr.o \
xe_ttm_stolen_mgr.o \
xe_ttm_vram_mgr.o \
@@ -110,6 +111,7 @@ xe-y += xe_bb.o \
xe_vm.o \
xe_vram.o \
xe_vram_freq.o \
+ xe_vsec.o \
xe_wait_user_fence.o \
xe_wa.o \
xe_wopcm.o
@@ -124,7 +126,8 @@ xe-y += \
xe_gt_sriov_vf.o \
xe_guc_relay.o \
xe_memirq.o \
- xe_sriov.o
+ xe_sriov.o \
+ xe_sriov_vf.o
xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
@@ -206,6 +209,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_ddi.o \
i915-display/intel_ddi_buf_trans.o \
i915-display/intel_display.o \
+ i915-display/intel_display_conversion.o \
i915-display/intel_display_device.o \
i915-display/intel_display_driver.o \
i915-display/intel_display_irq.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index b54fe40fc5a9..fee385532fb0 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -134,6 +134,8 @@ enum xe_guc_action {
XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+ XE_GUC_ACTION_REGISTER_G2G = 0x4507,
+ XE_GUC_ACTION_DEREGISTER_G2G = 0x4508,
XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
@@ -218,4 +220,22 @@ enum xe_guc_tlb_inval_mode {
XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
};
+/*
+ * GuC to GuC communication (de-)registration fields:
+ */
+enum xe_guc_g2g_type {
+ XE_G2G_TYPE_IN = 0x0,
+ XE_G2G_TYPE_OUT,
+ XE_G2G_TYPE_LIMIT,
+};
+
+#define XE_G2G_REGISTER_DEVICE REG_GENMASK(16, 16)
+#define XE_G2G_REGISTER_TILE REG_GENMASK(15, 12)
+#define XE_G2G_REGISTER_TYPE REG_GENMASK(11, 8)
+#define XE_G2G_REGISTER_SIZE REG_GENMASK(7, 0)
+
+#define XE_G2G_DEREGISTER_DEVICE REG_GENMASK(16, 16)
+#define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12)
+#define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8)
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index b6a1852749dd..0b28659d94e9 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -502,6 +502,44 @@
#define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
/**
+ * DOC: VF2GUC_NOTIFY_RESFIX_DONE
+ *
+ * This action is used by VF to notify the GuC that the VF KMD has completed
+ * post-migration recovery steps.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u
+
+#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN
+#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+
+#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
* DOC: VF2GUC_QUERY_SINGLE_KLV
*
* This action is used by VF to query value of the single KLV data.
diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
index e7898edc6236..dd4117553739 100644
--- a/drivers/gpu/drm/xe/abi/guc_capture_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h
@@ -25,7 +25,7 @@ enum guc_state_capture_type {
#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
-/* Class indecies for capture_class and capture_instance arrays */
+/* Class indices for capture_class and capture_instance arrays */
enum guc_capture_list_class_type {
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 37606cf8cc5e..d633f1c739e4 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -132,7 +132,7 @@ enum {
* _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
* This config sets whether strict scheduling is enabled whereby any VF
* that doesn’t have work to submit is still allocated a fixed execution
- * time-slice to ensure active VFs execution is always consitent even
+ * time-slice to ensure active VFs execution is always consistent even
* during other VF reprovisiong / rebooting events. Changing this KLV
* impacts all VFs and takes effect on the next VF-Switch event.
*
@@ -207,7 +207,7 @@ enum {
* of and this will never be perfectly-exact (accumulated nano-second
* granularity) since the GPUs clock time runs off a different crystal
* from the CPUs clock. Changing this KLV on a VF that is currently
- * running a context wont take effect until a new context is scheduled in.
+ * running a context won't take effect until a new context is scheduled in.
* That said, when the PF is changing this value from 0x0 to
* a non-zero value, it might never take effect if the VF is running an
* infinitely long compute or shader kernel. In such a scenario, the
@@ -227,7 +227,7 @@ enum {
* HW is capable and this will never be perfectly-exact (accumulated
* nano-second granularity) since the GPUs clock time runs off a
* different crystal from the CPUs clock. Changing this KLV on a VF
- * that is currently running a context wont take effect until a new
+ * that is currently running a context won't take effect until a new
* context is scheduled in.
* That said, when the PF is changing this value from 0x0 to
* a non-zero value, it might never take effect if the VF is running an
@@ -291,6 +291,14 @@ enum {
*
* :0: (default)
* :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_SCHED_PRIORITY` : 0x8A0C
+ * This config controls VF’s scheduling priority.
+ *
+ * :0: LOW = schedule VF only if it has active work (default)
+ * :1: NORMAL = schedule VF always, irrespective of whether it has work or not
+ * :2: HIGH = schedule VF in the next time-slice after current active
+ * time-slice completes if it has active work
*/
#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001
@@ -343,6 +351,12 @@ enum {
#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b
#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u
+#define GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY 0x8a0c
+#define GUC_KLV_VF_CFG_SCHED_PRIORITY_LEN 1u
+#define GUC_SCHED_PRIORITY_LOW 0u
+#define GUC_SCHED_PRIORITY_NORMAL 1u
+#define GUC_SCHED_PRIORITY_HIGH 2u
+
/*
* Workaround keys:
*/
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index 0382beb4035b..4fc3e535de91 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -10,6 +10,11 @@
#include "xe_device_types.h"
#include "xe_mmio.h"
+static inline struct intel_uncore *to_intel_uncore(struct drm_device *drm)
+{
+ return &to_xe_device(drm)->uncore;
+}
+
static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncore)
{
struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
@@ -117,10 +122,19 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
unsigned int slow_timeout_ms, u32 *out_value)
{
struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+ bool atomic;
+
+ /*
+ * Replicate the behavior from i915 here, in which sleep is not
+ * performed if slow_timeout_ms == 0. This is necessary because
+ * of some paths in display code where waits are done in atomic
+ * context.
+ */
+ atomic = !slow_timeout_ms && fast_timeout_us > 0;
return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
fast_timeout_us + 1000 * slow_timeout_ms,
- out_value, false);
+ out_value, atomic);
}
static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h
index d429d421ac70..d429d421ac70 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore_trace.h
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
index a7dbc6554d69..ac4cda2d81c7 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_irq.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -53,18 +53,7 @@ void gen2_irq_init(struct intel_uncore *uncore, struct i915_irq_regs regs,
bool intel_irqs_enabled(struct xe_device *xe)
{
- /*
- * XXX: i915 has a racy handling of the irq.enabled, since it doesn't
- * lock its transitions. Because of that, the irq.enabled sometimes
- * is not read with the irq.lock in place.
- * However, the most critical cases like vblank and page flips are
- * properly using the locks.
- * We cannot take the lock in here or run any kind of assert because
- * of i915 inconsistency.
- * But at this point the xe irq is better protected against races,
- * although the full solution would be protecting the i915 side.
- */
- return xe->irq.enabled;
+ return atomic_read(&xe->irq.enabled);
}
void intel_synchronize_irq(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c
index 9f54fad0f1c0..b463f5bd4eed 100644
--- a/drivers/gpu/drm/xe/display/intel_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_bo.c
@@ -40,31 +40,8 @@ int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size)
{
struct xe_bo *bo = gem_to_xe_bo(obj);
- struct ttm_bo_kmap_obj map;
- void *src;
- bool is_iomem;
- int ret;
- ret = xe_bo_lock(bo, true);
- if (ret)
- return ret;
-
- ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map);
- if (ret)
- goto out_unlock;
-
- offset &= ~PAGE_MASK;
- src = ttm_kmap_obj_virtual(&map, &is_iomem);
- src += offset;
- if (is_iomem)
- memcpy_fromio(dst, (void __iomem *)src, size);
- else
- memcpy(dst, src, size);
-
- ttm_bo_kunmap(&map);
-out_unlock:
- xe_bo_unlock(bo);
- return ret;
+ return xe_bo_read(bo, offset, dst, size);
}
struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index b5502f335f53..b3921dbc52ff 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -22,6 +22,7 @@
#include "intel_display_irq.h"
#include "intel_display_types.h"
#include "intel_dmc.h"
+#include "intel_dmc_wl.h"
#include "intel_dp.h"
#include "intel_encoder.h"
#include "intel_fbdev.h"
@@ -103,11 +104,12 @@ int xe_display_create(struct xe_device *xe)
static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
{
struct xe_device *xe = to_xe_device(dev);
+ struct intel_display *display = &xe->display;
if (!xe->info.probe_display)
return;
- intel_power_domains_cleanup(xe);
+ intel_power_domains_cleanup(display);
}
int xe_display_init_nommio(struct xe_device *xe)
@@ -132,7 +134,7 @@ static void xe_display_fini_noirq(void *arg)
if (!xe->info.probe_display)
return;
- intel_display_driver_remove_noirq(xe);
+ intel_display_driver_remove_noirq(display);
intel_opregion_cleanup(display);
}
@@ -144,7 +146,7 @@ int xe_display_init_noirq(struct xe_device *xe)
if (!xe->info.probe_display)
return 0;
- intel_display_driver_early_probe(xe);
+ intel_display_driver_early_probe(display);
/* Early display init.. */
intel_opregion_setup(display);
@@ -157,9 +159,9 @@ int xe_display_init_noirq(struct xe_device *xe)
intel_bw_init_hw(xe);
- intel_display_device_info_runtime_init(xe);
+ intel_display_device_info_runtime_init(display);
- err = intel_display_driver_probe_noirq(xe);
+ err = intel_display_driver_probe_noirq(display);
if (err) {
intel_opregion_cleanup(display);
return err;
@@ -171,21 +173,23 @@ int xe_display_init_noirq(struct xe_device *xe)
static void xe_display_fini_noaccel(void *arg)
{
struct xe_device *xe = arg;
+ struct intel_display *display = &xe->display;
if (!xe->info.probe_display)
return;
- intel_display_driver_remove_nogem(xe);
+ intel_display_driver_remove_nogem(display);
}
int xe_display_init_noaccel(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
int err;
if (!xe->info.probe_display)
return 0;
- err = intel_display_driver_probe_nogem(xe);
+ err = intel_display_driver_probe_nogem(display);
if (err)
return err;
@@ -194,10 +198,12 @@ int xe_display_init_noaccel(struct xe_device *xe)
int xe_display_init(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return 0;
- return intel_display_driver_probe(xe);
+ return intel_display_driver_probe(display);
}
void xe_display_fini(struct xe_device *xe)
@@ -215,30 +221,36 @@ void xe_display_fini(struct xe_device *xe)
void xe_display_register(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
- intel_display_driver_register(xe);
+ intel_display_driver_register(display);
+ intel_power_domains_enable(display);
intel_register_dsm_handler();
- intel_power_domains_enable(xe);
}
void xe_display_unregister(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
intel_unregister_dsm_handler();
- intel_power_domains_disable(xe);
- intel_display_driver_unregister(xe);
+ intel_power_domains_disable(display);
+ intel_display_driver_unregister(display);
}
void xe_display_driver_remove(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
- intel_display_driver_remove(xe);
+ intel_display_driver_remove(display);
}
/* IRQ-related functions */
@@ -322,25 +334,22 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
* We do a lot of poking in a lot of registers, make sure they work
* properly.
*/
- intel_power_domains_disable(xe);
+ intel_power_domains_disable(display);
if (!runtime)
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
if (!runtime && has_display(xe)) {
drm_kms_helper_poll_disable(&xe->drm);
- intel_display_driver_disable_user_access(xe);
- intel_display_driver_suspend(xe);
+ intel_display_driver_disable_user_access(display);
+ intel_display_driver_suspend(display);
}
xe_display_flush_cleanup_work(xe);
- if (!runtime)
- intel_dp_mst_suspend(xe);
-
intel_hpd_cancel_work(xe);
if (!runtime && has_display(xe)) {
- intel_display_driver_suspend_access(xe);
+ intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(&xe->display);
}
@@ -364,20 +373,20 @@ void xe_display_pm_shutdown(struct xe_device *xe)
if (!xe->info.probe_display)
return;
- intel_power_domains_disable(xe);
+ intel_power_domains_disable(display);
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
if (has_display(xe)) {
drm_kms_helper_poll_disable(&xe->drm);
- intel_display_driver_disable_user_access(xe);
- intel_display_driver_suspend(xe);
+ intel_display_driver_disable_user_access(display);
+ intel_display_driver_suspend(display);
}
xe_display_flush_cleanup_work(xe);
- intel_dp_mst_suspend(xe);
+ intel_dp_mst_suspend(display);
intel_hpd_cancel_work(xe);
if (has_display(xe))
- intel_display_driver_suspend_access(xe);
+ intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(display);
intel_encoder_shutdown_all(display);
@@ -402,17 +411,37 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe)
void xe_display_pm_suspend_late(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle();
+
+ if (!xe->info.probe_display)
+ return;
+
+ intel_display_power_suspend_late(display, s2idle);
+}
+
+void xe_display_pm_runtime_suspend_late(struct xe_device *xe)
+{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
- intel_power_domains_suspend(xe, s2idle);
+ if (xe->d3cold.allowed)
+ xe_display_pm_suspend_late(xe);
- intel_display_power_suspend_late(xe);
+ /*
+ * If xe_display_pm_suspend_late() is not called, it is likely
+ * that we will be on dynamic DC states with DMC wakelock enabled. We
+ * need to flush the release work in that case.
+ */
+ intel_dmc_wl_flush_release_work(display);
}
void xe_display_pm_shutdown_late(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
@@ -421,17 +450,17 @@ void xe_display_pm_shutdown_late(struct xe_device *xe)
* for now leaving all display power wells in the INIT power domain
* enabled.
*/
- intel_power_domains_driver_remove(xe);
+ intel_power_domains_driver_remove(display);
}
void xe_display_pm_resume_early(struct xe_device *xe)
{
+ struct intel_display *display = &xe->display;
+
if (!xe->info.probe_display)
return;
- intel_display_power_resume_early(xe);
-
- intel_power_domains_resume(xe);
+ intel_display_power_resume_early(display);
}
static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
@@ -446,20 +475,17 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
if (has_display(xe))
drm_mode_config_reset(&xe->drm);
- intel_display_driver_init_hw(xe);
- intel_hpd_init(xe);
+ intel_display_driver_init_hw(display);
if (!runtime && has_display(xe))
- intel_display_driver_resume_access(xe);
+ intel_display_driver_resume_access(display);
- /* MST sideband requires HPD interrupts enabled */
- if (!runtime)
- intel_dp_mst_resume(xe);
+ intel_hpd_init(xe);
if (!runtime && has_display(xe)) {
- intel_display_driver_resume(xe);
+ intel_display_driver_resume(display);
drm_kms_helper_poll_enable(&xe->drm);
- intel_display_driver_enable_user_access(xe);
+ intel_display_driver_enable_user_access(display);
}
if (has_display(xe))
@@ -470,7 +496,7 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
if (!runtime)
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
- intel_power_domains_enable(xe);
+ intel_power_domains_enable(display);
}
void xe_display_pm_resume(struct xe_device *xe)
@@ -495,21 +521,23 @@ void xe_display_pm_runtime_resume(struct xe_device *xe)
static void display_device_remove(struct drm_device *dev, void *arg)
{
- struct xe_device *xe = arg;
+ struct intel_display *display = arg;
- intel_display_device_remove(xe);
+ intel_display_device_remove(display);
}
int xe_display_probe(struct xe_device *xe)
{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct intel_display *display;
int err;
if (!xe->info.probe_display)
goto no_display;
- intel_display_device_probe(xe);
+ display = intel_display_device_probe(pdev);
- err = drmm_add_action_or_reset(&xe->drm, display_device_remove, xe);
+ err = drmm_add_action_or_reset(&xe->drm, display_device_remove, display);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 17afa537aee5..233f81a26c25 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -41,6 +41,7 @@ void xe_display_pm_shutdown_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
+void xe_display_pm_runtime_suspend_late(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe);
#else
@@ -74,6 +75,7 @@ static inline void xe_display_pm_shutdown_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_runtime_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
#endif /* CONFIG_DRM_XE_DISPLAY */
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 761510ae0690..9fa51b84737c 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -161,7 +161,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
}
vma->dpt = dpt;
- vma->node = dpt->ggtt_node;
+ vma->node = dpt->ggtt_node[tile0->id];
return 0;
}
@@ -213,8 +213,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
align = max_t(u32, align, SZ_64K);
- if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) {
- vma->node = bo->ggtt_node;
+ if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) {
+ vma->node = bo->ggtt_node[ggtt->tile->id];
} else if (view->type == I915_GTT_VIEW_NORMAL) {
u32 x, size = bo->ttm.base.size;
@@ -345,10 +345,12 @@ err:
static void __xe_unpin_fb_vma(struct i915_vma *vma)
{
+ u8 tile_id = vma->node->ggtt->tile->id;
+
if (vma->dpt)
xe_bo_unpin_map_no_vm(vma->dpt);
- else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) ||
- vma->bo->ggtt_node->base.start != vma->node->base.start)
+ else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) ||
+ vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start)
xe_ggtt_node_remove(vma->node, false);
ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 8c113463a3d5..2eb9633f163a 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -275,12 +275,12 @@ static void plane_config_fini(struct intel_initial_plane_config *plane_config)
}
}
-void intel_initial_plane_config(struct drm_i915_private *i915)
+void intel_initial_plane_config(struct intel_display *display)
{
struct intel_initial_plane_config plane_configs[I915_MAX_PIPES] = {};
struct intel_crtc *crtc;
- for_each_intel_crtc(&i915->drm, crtc) {
+ for_each_intel_crtc(display->drm, crtc) {
struct intel_initial_plane_config *plane_config =
&plane_configs[crtc->pipe];
@@ -294,7 +294,7 @@ void intel_initial_plane_config(struct drm_i915_private *i915)
* can even allow for smooth boot transitions if the BIOS
* fb is large enough for the active pipe configuration.
*/
- i915->display.funcs.display->get_initial_plane_config(crtc, plane_config);
+ display->funcs.display->get_initial_plane_config(crtc, plane_config);
/*
* If the fb is shared between multiple heads, we'll
@@ -302,7 +302,7 @@ void intel_initial_plane_config(struct drm_i915_private *i915)
*/
intel_find_initial_plane_obj(crtc, plane_configs);
- if (i915->display.funcs.display->fixup_initial_plane_config(crtc, plane_config))
+ if (display->funcs.display->fixup_initial_plane_config(crtc, plane_config))
intel_crtc_wait_for_next_vblank(crtc);
plane_config_fini(plane_config);
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 7c78496e6213..d86219dedde2 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -83,6 +83,8 @@
#define RING_IMR(base) XE_REG((base) + 0xa8)
#define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac)
+#define CS_INT_VEC(base) XE_REG((base) + 0x1b8)
+
#define RING_EIR(base) XE_REG((base) + 0xb0)
#define RING_EMR(base) XE_REG((base) + 0xb4)
#define RING_ESR(base) XE_REG((base) + 0xb8)
@@ -138,6 +140,7 @@
#define RING_MODE(base) XE_REG((base) + 0x29c)
#define GFX_DISABLE_LEGACY_MODE REG_BIT(3)
+#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13)
#define RING_TIMESTAMP(base) XE_REG((base) + 0x358)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 0c9e4b2fafab..162f18e975da 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -445,6 +445,8 @@
#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
#define ENABLE_SMALLPL REG_BIT(15)
+#define SMP_WAIT_FETCH_MERGING_COUNTER REG_GENMASK(11, 10)
+#define SMP_FORCE_128B_OVERFETCH REG_FIELD_PREP(SMP_WAIT_FETCH_MERGING_COUNTER, 1)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
#define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 045dfd09db99..57944f90bbf6 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -25,6 +25,9 @@
#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
+#define CTX_CS_INT_VEC_REG 0x5a
+#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1)
+
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
#define INDIRECT_CTX_RING_START (0x06 + 1)
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index a9b0091cb7ee..a49561e9f3c3 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -41,14 +41,6 @@
#define OAG_OABUFFER XE_REG(0xdb08)
#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3)
-#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
-#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
-#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
-#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
-#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
-#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
-#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
-#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
#define OAG_OACONTROL XE_REG(0xdaf4)
@@ -63,6 +55,7 @@
#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED)
#define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14)
#define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13)
+#define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12)
#define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8)
#define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7)
#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h
new file mode 100644
index 000000000000..f45abcd96ba8
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_pmt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_PMT_H_
+#define _XE_PMT_H_
+
+#define SOC_BASE 0x280000
+
+#define BMG_PMT_BASE_OFFSET 0xDB000
+#define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET)
+
+#define BMG_TELEMETRY_BASE_OFFSET 0xE0000
+#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET)
+
+#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08)
+#define SG_REMAP_BITS REG_GENMASK(31, 24)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index 51fd40ffafcb..0eedd6c26b1b 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -13,7 +13,7 @@
/**
* struct xe_reg - Register definition
*
- * Register defintion to be used by the individual register. Although the same
+ * Register definition to be used by the individual register. Although the same
* definition is used for xe_reg and xe_reg_mcr, they use different internal
* APIs for accesses.
*/
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 3293172b0128..6cf282618836 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -44,12 +44,16 @@
#define MTL_RP_STATE_CAP XE_REG(0x138000)
+#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008)
#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c)
#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020)
#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16)
#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0)
+#define MTL_MPA_FREQUENCY XE_REG(0x138028)
+#define MTL_RPA_MASK REG_GENMASK(8, 0)
+
#define MTL_MPE_FREQUENCY XE_REG(0x13802c)
#define MTL_RPE_MASK REG_GENMASK(8, 0)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 3e0ae40ebbd2..6795d1d916e4 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -49,6 +49,13 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
KUNIT_FAIL(test, "Failed to submit bo clear.\n");
return PTR_ERR(fence);
}
+
+ if (dma_fence_wait_timeout(fence, false, 5 * HZ) <= 0) {
+ dma_fence_put(fence);
+ KUNIT_FAIL(test, "Timeout while clearing bo.\n");
+ return -ETIME;
+ }
+
dma_fence_put(fence);
}
@@ -257,10 +264,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
* however seems quite fragile not to also restart the GT. Try
* to do that here by triggering a GT reset.
*/
- for_each_gt(__gt, xe, id) {
- xe_gt_reset_async(__gt);
- flush_work(&__gt->reset.worker);
- }
+ for_each_gt(__gt, xe, id)
+ xe_gt_reset(__gt);
+
if (err) {
KUNIT_FAIL(test, "restore kernel err=%pe\n",
ERR_PTR(err));
@@ -599,8 +605,6 @@ static void xe_bo_shrink_kunit(struct kunit *test)
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
- KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
- {.speed = KUNIT_SPEED_SLOW}),
{}
};
@@ -611,3 +615,17 @@ struct kunit_suite xe_bo_test_suite = {
.init = xe_kunit_helper_xe_device_live_test_init,
};
EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
+
+static struct kunit_case xe_bo_shrink_test[] = {
+ KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
+ {.speed = KUNIT_SPEED_SLOW}),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_bo_shrink_test_suite = {
+ .name = "xe_bo_shrink",
+ .test_cases = xe_bo_shrink_test,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
index 0d36ab864ec0..81277c77016d 100644
--- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -6,11 +6,13 @@
#include <kunit/test.h>
extern struct kunit_suite xe_bo_test_suite;
+extern struct kunit_suite xe_bo_shrink_test_suite;
extern struct kunit_suite xe_dma_buf_test_suite;
extern struct kunit_suite xe_migrate_test_suite;
extern struct kunit_suite xe_mocs_test_suite;
kunit_test_suite(xe_bo_test_suite);
+kunit_test_suite(xe_bo_shrink_test_suite);
kunit_test_suite(xe_dma_buf_test_suite);
kunit_test_suite(xe_migrate_test_suite);
kunit_test_suite(xe_mocs_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 3bbdb362d6f0..d5fe0ea889ad 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -83,7 +83,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
bo->size,
ttm_bo_type_kernel,
region |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(remote)) {
KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
str, remote);
@@ -642,7 +643,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(sys_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -666,7 +669,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(ccs_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -690,7 +694,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED);
if (IS_ERR(vram_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(vram_bo));
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 6f9b7a266b41..ef1e5256c56a 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt,
mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
} else {
- /* Just re-use value read on previous iteration */
+ /* Just reuse value read on previous iteration */
reg_val >>= 16;
}
@@ -162,8 +162,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
if (flags & HAS_LNCF_MOCS)
read_l3cc_table(gt, &mocs.table);
- xe_gt_reset_async(gt);
- flush_work(&gt->reset.worker);
+ xe_gt_reset(gt);
kunit_info(test, "mocs_reset_test after reset\n");
if (flags & HAS_GLOBAL_MOCS)
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index 04d6b95c6d87..68fe70ce2be3 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -14,7 +14,7 @@
#include "xe_step.h"
/**
- * DOC: Xe ASSERTs
+ * DOC: Xe Asserts
*
* While Xe driver aims to be simpler than legacy i915 driver it is still
* complex enough that some changes introduced while adding new functionality
@@ -103,7 +103,7 @@
* (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
* or as a condition.
*
- * See `Xe ASSERTs`_ for general usage guidelines.
+ * See `Xe Asserts`_ for general usage guidelines.
*/
#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "")
#define xe_assert_msg(xe, condition, msg, arg...) ({ \
@@ -138,7 +138,7 @@
* (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
* or as a condition.
*
- * See `Xe ASSERTs`_ for general usage guidelines.
+ * See `Xe Asserts`_ for general usage guidelines.
*/
#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "")
#define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \
@@ -162,7 +162,7 @@
* (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
* or as a condition.
*
- * See `Xe ASSERTs`_ for general usage guidelines.
+ * See `Xe Asserts`_ for general usage guidelines.
*/
#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "")
#define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index ef777dbdf4ec..9570672fce33 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
/*
* We need to allocate space for the requested number of dwords,
* one additional MI_BATCH_BUFFER_END dword, and additional buffer
- * space to accomodate the platform-specific hardware prefetch
+ * space to accommodate the platform-specific hardware prefetch
* requirements.
*/
bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index f61a8ef38094..3f5391d416d4 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -162,6 +162,15 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
}
}
+static bool force_contiguous(u32 bo_flags)
+{
+ /*
+ * For eviction / restore on suspend / resume objects pinned in VRAM
+ * must be contiguous, also only contiguous BOs support xe_bo_vmap.
+ */
+ return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+}
+
static void add_vram(struct xe_device *xe, struct xe_bo *bo,
struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
{
@@ -175,12 +184,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
xe_assert(xe, vram && vram->usable_size);
io_size = vram->io_size;
- /*
- * For eviction / restore on suspend / resume objects
- * pinned in VRAM must be contiguous
- */
- if (bo_flags & (XE_BO_FLAG_PINNED |
- XE_BO_FLAG_GGTT))
+ if (force_contiguous(bo_flags))
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (io_size < vram->usable_size) {
@@ -212,8 +216,7 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_STOLEN,
- .flags = bo_flags & (XE_BO_FLAG_PINNED |
- XE_BO_FLAG_GGTT) ?
+ .flags = force_contiguous(bo_flags) ?
TTM_PL_FLAG_CONTIGUOUS : 0,
};
*c += 1;
@@ -442,6 +445,14 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
kfree(tt);
}
+static bool xe_ttm_resource_visible(struct ttm_resource *mem)
+{
+ struct xe_ttm_vram_mgr_resource *vres =
+ to_xe_ttm_vram_mgr_resource(mem);
+
+ return vres->used_visible_size == mem->size;
+}
+
static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
@@ -453,11 +464,9 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
return 0;
case XE_PL_VRAM0:
case XE_PL_VRAM1: {
- struct xe_ttm_vram_mgr_resource *vres =
- to_xe_ttm_vram_mgr_resource(mem);
struct xe_mem_region *vram = res_to_mem_region(mem);
- if (vres->used_visible_size < mem->size)
+ if (!xe_ttm_resource_visible(mem))
return -EINVAL;
mem->bus.offset = mem->start << PAGE_SHIFT;
@@ -777,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
* / resume, some of the pinned memory is required for the
* device to resume / use the GPU to move other evicted memory
* (user memory) around. This likely could be optimized a bit
- * futher where we find the minimum set of pinned memory
+ * further where we find the minimum set of pinned memory
* required for resume but for simplity doing a memcpy for all
* pinned memory.
*/
@@ -866,7 +875,7 @@ out:
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
* @bo: The buffer object to move.
*
- * On successful completion, the object memory will be moved to sytem memory.
+ * On successful completion, the object memory will be moved to system memory.
*
* This is needed to for special handling of pinned VRAM object during
* suspend-resume.
@@ -884,6 +893,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
};
struct ttm_operation_ctx ctx = {
.interruptible = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_resource *new_mem;
int ret;
@@ -945,6 +955,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
{
struct ttm_operation_ctx ctx = {
.interruptible = false,
+ .gfp_retry_mayfail = false,
};
struct ttm_resource *new_mem;
struct ttm_place *place = &bo->placements[0];
@@ -1114,7 +1125,8 @@ static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operati
static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
{
struct ttm_operation_ctx ctx = {
- .interruptible = false
+ .interruptible = false,
+ .gfp_retry_mayfail = false,
};
if (ttm_bo->ttm) {
@@ -1126,6 +1138,52 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
}
}
+static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
+ unsigned long offset, void *buf, int len,
+ int write)
+{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct iosys_map vmap;
+ struct xe_res_cursor cursor;
+ struct xe_mem_region *vram;
+ int bytes_left = len;
+
+ xe_bo_assert_held(bo);
+ xe_device_assert_mem_access(xe);
+
+ if (!mem_type_is_vram(ttm_bo->resource->mem_type))
+ return -EIO;
+
+ /* FIXME: Use GPU for non-visible VRAM */
+ if (!xe_ttm_resource_visible(ttm_bo->resource))
+ return -EIO;
+
+ vram = res_to_mem_region(ttm_bo->resource);
+ xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
+ bo->size - (offset & PAGE_MASK), &cursor);
+
+ do {
+ unsigned long page_offset = (offset & ~PAGE_MASK);
+ int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
+
+ iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
+ cursor.start);
+ if (write)
+ xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
+ else
+ xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
+
+ buf += byte_count;
+ offset += byte_count;
+ bytes_left -= byte_count;
+ if (bytes_left)
+ xe_res_next(&cursor, PAGE_SIZE);
+ } while (bytes_left);
+
+ return len;
+}
+
const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
@@ -1135,6 +1193,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.move = xe_bo_move,
.io_mem_reserve = xe_ttm_io_mem_reserve,
.io_mem_pfn = xe_ttm_io_mem_pfn,
+ .access_memory = xe_ttm_access_memory,
.release_notify = xe_ttm_bo_release_notify,
.eviction_valuable = ttm_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
@@ -1145,6 +1204,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
{
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct xe_tile *tile;
+ u8 id;
if (bo->ttm.base.import_attach)
drm_prime_gem_destroy(&bo->ttm.base, NULL);
@@ -1152,8 +1213,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
- if (bo->ggtt_node && bo->ggtt_node->base.size)
- xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
+ for_each_tile(tile, xe, id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
+ xe_ggtt_remove_bo(tile->mem.ggtt, bo);
#ifdef CONFIG_PROC_FS
if (bo->client)
@@ -1251,11 +1313,50 @@ out:
return ret;
}
+static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = xe_bo_device(bo);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = ttm_bo_vm_access(vma, addr, buf, len, write);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+/**
+ * xe_bo_read() - Read from an xe_bo
+ * @bo: The buffer object to read from.
+ * @offset: The byte offset to start reading from.
+ * @dst: Location to store the read.
+ * @size: Size in bytes for the read.
+ *
+ * Read @size bytes from the @bo, starting from @offset, storing into @dst.
+ *
+ * Return: Zero on success, or negative error.
+ */
+int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
+{
+ int ret;
+
+ ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
+ if (ret >= 0 && ret != size)
+ ret = -EIO;
+ else if (ret == size)
+ ret = 0;
+
+ return ret;
+}
+
static const struct vm_operations_struct xe_gem_vm_ops = {
.fault = xe_gem_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
- .access = ttm_bo_vm_access
+ .access = xe_bo_vm_access,
};
static const struct drm_gem_object_funcs xe_gem_object_funcs = {
@@ -1269,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = {
/**
* xe_bo_alloc - Allocate storage for a struct xe_bo
*
- * This funcition is intended to allocate storage to be used for input
+ * This function is intended to allocate storage to be used for input
* to __xe_bo_create_locked(), in the case a pointer to the bo to be
* created is needed before the call to __xe_bo_create_locked().
* If __xe_bo_create_locked ends up never to be called, then the
@@ -1309,6 +1410,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement *placement;
uint32_t alignment;
@@ -1323,6 +1425,10 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
return ERR_PTR(-EINVAL);
}
+ /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
+ if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
+ return ERR_PTR(-EINVAL);
+
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
@@ -1513,19 +1619,29 @@ __xe_bo_create_locked(struct xe_device *xe,
bo->vm = vm;
if (bo->flags & XE_BO_FLAG_GGTT) {
- if (!tile && flags & XE_BO_FLAG_STOLEN)
- tile = xe_device_get_root_tile(xe);
+ struct xe_tile *t;
+ u8 id;
- xe_assert(xe, tile);
+ if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
+ if (!tile && flags & XE_BO_FLAG_STOLEN)
+ tile = xe_device_get_root_tile(xe);
- if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
- err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
- start + bo->size, U64_MAX);
- } else {
- err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
+ xe_assert(xe, tile);
+ }
+
+ for_each_tile(t, xe, id) {
+ if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
+ continue;
+
+ if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
+ err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
+ start + bo->size, U64_MAX);
+ } else {
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ }
+ if (err)
+ goto err_unlock_put_bo;
}
- if (err)
- goto err_unlock_put_bo;
}
return bo;
@@ -1908,6 +2024,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
if (vm) {
@@ -1918,6 +2035,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
ctx.resv = xe_vm_resv(vm);
}
+ trace_xe_bo_validate(bo);
return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
}
@@ -1969,13 +2087,15 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
int xe_bo_vmap(struct xe_bo *bo)
{
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
void *virtual;
bool is_iomem;
int ret;
xe_bo_assert_held(bo);
- if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
+ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
+ !force_contiguous(bo->flags)))
return -EINVAL;
if (!iosys_map_is_null(&bo->vmap))
@@ -2251,6 +2371,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement placement;
struct ttm_place requested;
@@ -2291,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
* @force_alloc: Set force_alloc in ttm_operation_ctx
*
* On successful completion, the object memory will be moved to evict
- * placement. Ths function blocks until the object has been fully moved.
+ * placement. This function blocks until the object has been fully moved.
*
* Return: 0 on success. Negative error code on failure.
*/
@@ -2301,6 +2422,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
.interruptible = false,
.no_wait_gpu = false,
.force_alloc = force_alloc,
+ .gfp_retry_mayfail = true,
};
struct ttm_placement placement;
int ret;
@@ -2380,14 +2502,18 @@ void xe_bo_put_commit(struct llist_head *deferred)
void xe_bo_put(struct xe_bo *bo)
{
+ struct xe_tile *tile;
+ u8 id;
+
might_sleep();
if (bo) {
#ifdef CONFIG_PROC_FS
if (bo->client)
might_lock(&bo->client->bos_lock);
#endif
- if (bo->ggtt_node && bo->ggtt_node->ggtt)
- might_lock(&bo->ggtt_node->ggtt->lock);
+ for_each_tile(tile, xe_bo_device(bo), id)
+ if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
+ might_lock(&bo->ggtt_node[id]->ggtt->lock);
drm_gem_object_put(&bo->ttm.base);
}
}
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 7fa44a0138b0..d9386ab03140 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -39,10 +39,22 @@
#define XE_BO_FLAG_NEEDS_64K BIT(15)
#define XE_BO_FLAG_NEEDS_2M BIT(16)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
+#define XE_BO_FLAG_GGTT0 BIT(18)
+#define XE_BO_FLAG_GGTT1 BIT(19)
+#define XE_BO_FLAG_GGTT2 BIT(20)
+#define XE_BO_FLAG_GGTT3 BIT(21)
+#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \
+ XE_BO_FLAG_GGTT1 | \
+ XE_BO_FLAG_GGTT2 | \
+ XE_BO_FLAG_GGTT3)
+
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
+#define XE_BO_FLAG_GGTTx(tile) \
+ (XE_BO_FLAG_GGTT0 << (tile)->id)
+
#define XE_PTE_SHIFT 12
#define XE_PAGE_SIZE (1 << XE_PTE_SHIFT)
#define XE_PTE_MASK (XE_PAGE_SIZE - 1)
@@ -194,18 +206,29 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
}
static inline u32
-xe_bo_ggtt_addr(struct xe_bo *bo)
+__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id)
{
- if (XE_WARN_ON(!bo->ggtt_node))
+ struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id];
+
+ if (XE_WARN_ON(!ggtt_node))
return 0;
- XE_WARN_ON(bo->ggtt_node->base.size > bo->size);
- XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32));
- return bo->ggtt_node->base.start;
+ XE_WARN_ON(ggtt_node->base.size > bo->size);
+ XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32));
+ return ggtt_node->base.start;
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+ xe_assert(xe_bo_device(bo), bo->tile);
+
+ return __xe_bo_ggtt_addr(bo, bo->tile->id);
}
int xe_bo_vmap(struct xe_bo *bo);
void xe_bo_vunmap(struct xe_bo *bo);
+int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size);
bool mem_type_is_vram(u32 mem_type);
bool xe_bo_is_vram(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
index f57d440cc95a..25a884c64bf1 100644
--- a/drivers/gpu/drm/xe/xe_bo_doc.h
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -41,7 +41,7 @@
* created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
* access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
* user BOs are evictable and user BOs are never pinned by XE. The allocation of
- * the backing store can be defered from creation time until first use which is
+ * the backing store can be deferred from creation time until first use which is
* either mmap, bind, or pagefault.
*
* Private BOs
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 8fb2be061003..6a40eedd9db1 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -152,11 +152,17 @@ int xe_bo_restore_kernel(struct xe_device *xe)
}
if (bo->flags & XE_BO_FLAG_GGTT) {
- struct xe_tile *tile = bo->tile;
+ struct xe_tile *tile;
+ u8 id;
- mutex_lock(&tile->mem.ggtt->lock);
- xe_ggtt_map_bo(tile->mem.ggtt, bo);
- mutex_unlock(&tile->mem.ggtt->lock);
+ for_each_tile(tile, xe, id) {
+ if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
+ continue;
+
+ mutex_lock(&tile->mem.ggtt->lock);
+ xe_ggtt_map_bo(tile->mem.ggtt, bo);
+ mutex_unlock(&tile->mem.ggtt->lock);
+ }
}
/*
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 13c6d8a69e91..46dc9e4e3e46 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -10,9 +10,9 @@
#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_device.h>
-#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_placement.h>
+#include "xe_device_types.h"
#include "xe_ggtt_types.h"
struct xe_device;
@@ -39,8 +39,8 @@ struct xe_bo {
struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
/** @placement: current placement for this BO */
struct ttm_placement placement;
- /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
- struct xe_ggtt_node *ggtt_node;
+ /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */
+ struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE];
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 21a50d539426..81dc7795c065 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -30,30 +30,39 @@
/**
* DOC: Xe device coredump
*
- * Devices overview:
* Xe uses dev_coredump infrastructure for exposing the crash errors in a
- * standardized way.
- * devcoredump exposes a temporary device under /sys/class/devcoredump/
- * which is linked with our card device directly.
- * The core dump can be accessed either from
- * /sys/class/drm/card<n>/device/devcoredump/ or from
- * /sys/class/devcoredump/devcd<m> where
- * /sys/class/devcoredump/devcd<m>/failing_device is a link to
- * /sys/class/drm/card<n>/device/.
+ * standardized way. Once a crash occurs, devcoredump exposes a temporary
+ * node under ``/sys/class/devcoredump/devcd<m>/``. The same node is also
+ * accessible in ``/sys/class/drm/card<n>/device/devcoredump/``. The
+ * ``failing_device`` symlink points to the device that crashed and created the
+ * coredump.
*
- * Snapshot at hang:
- * The 'data' file is printed with a drm_printer pointer at devcoredump read
- * time. For this reason, we need to take snapshots from when the hang has
- * happened, and not only when the user is reading the file. Otherwise the
- * information is outdated since the resets might have happened in between.
+ * The following characteristics are observed by xe when creating a device
+ * coredump:
*
- * 'First' failure snapshot:
- * In general, the first hang is the most critical one since the following hangs
- * can be a consequence of the initial hang. For this reason we only take the
- * snapshot of the 'first' failure and ignore subsequent calls of this function,
- * at least while the coredump device is alive. Dev_coredump has a delayed work
- * queue that will eventually delete the device and free all the dump
- * information.
+ * **Snapshot at hang**:
+ * The 'data' file contains a snapshot of the HW and driver states at the time
+ * the hang happened. Due to the driver recovering from resets/crashes, it may
+ * not correspond to the state of the system when the file is read by
+ * userspace.
+ *
+ * **Coredump release**:
+ * After a coredump is generated, it stays in kernel memory until released by
+ * userspace by writing anything to it, or after an internal timer expires. The
+ * exact timeout may vary and should not be relied upon. Example to release
+ * a coredump:
+ *
+ * .. code-block:: shell
+ *
+ * $ > /sys/class/drm/card0/device/devcoredump/data
+ *
+ * **First failure only**:
+ * In general, the first hang is the most critical one since the following
+ * hangs can be a consequence of the initial hang. For this reason a snapshot
+ * is taken only for the first failure. Until the devcoredump is released by
+ * userspace or kernel, all subsequent hangs do not override the snapshot nor
+ * create new ones. Devcoredump has a delayed work queue that will eventually
+ * delete the file node and free all the dump information.
*/
#ifdef CONFIG_DEV_COREDUMP
@@ -91,6 +100,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
p = drm_coredump_printer(&iter);
drm_puts(&p, "**** Xe Device Coredump ****\n");
+ drm_printf(&p, "Reason: %s\n", ss->reason);
drm_puts(&p, "kernel: " UTS_RELEASE "\n");
drm_puts(&p, "module: " KBUILD_MODNAME "\n");
@@ -98,7 +108,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
ts = ktime_to_timespec64(ss->boot_time);
drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
- drm_printf(&p, "Process: %s\n", ss->process_name);
+ drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid);
xe_device_snapshot_print(xe, &p);
drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id);
@@ -134,6 +144,9 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
{
int i;
+ kfree(ss->reason);
+ ss->reason = NULL;
+
xe_guc_log_snapshot_free(ss->guc.log);
ss->guc.log = NULL;
@@ -174,16 +187,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
- if (!ss->read.buffer)
+ mutex_lock(&coredump->lock);
+
+ if (!ss->read.buffer) {
+ mutex_unlock(&coredump->lock);
return -ENODEV;
+ }
- if (offset >= ss->read.size)
+ if (offset >= ss->read.size) {
+ mutex_unlock(&coredump->lock);
return 0;
+ }
byte_copied = count < ss->read.size - offset ? count :
ss->read.size - offset;
memcpy(buffer, ss->read.buffer + offset, byte_copied);
+ mutex_unlock(&coredump->lock);
+
return byte_copied;
}
@@ -197,15 +218,18 @@ static void xe_devcoredump_free(void *data)
cancel_work_sync(&coredump->snapshot.work);
+ mutex_lock(&coredump->lock);
+
xe_devcoredump_snapshot_free(&coredump->snapshot);
kvfree(coredump->snapshot.read.buffer);
/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
coredump->captured = false;
- coredump->job = NULL;
drm_info(&coredump_to_xe(coredump)->drm,
"Xe device coredump has been deleted.\n");
+
+ mutex_unlock(&coredump->lock);
}
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
@@ -248,10 +272,10 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+ struct xe_exec_queue *q,
struct xe_sched_job *job)
{
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
- struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
u32 adj_logical_mask = q->logical_mask;
u32 width_mask = (0x1 << q->width) - 1;
@@ -264,12 +288,14 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
- if (q->vm && q->vm->xef)
+ if (q->vm && q->vm->xef) {
process_name = q->vm->xef->process_name;
+ ss->pid = q->vm->xef->pid;
+ }
+
strscpy(ss->process_name, process_name);
ss->gt = q->gt;
- coredump->job = job;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
@@ -288,10 +314,11 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
ss->ge = xe_guc_exec_queue_snapshot_capture(q);
- ss->job = xe_sched_job_snapshot_capture(job);
+ if (job)
+ ss->job = xe_sched_job_snapshot_capture(job);
ss->vm = xe_vm_snapshot_capture(q->vm);
- xe_engine_snapshot_capture_for_job(job);
+ xe_engine_snapshot_capture_for_queue(q);
queue_work(system_unbound_wq, &ss->work);
@@ -301,28 +328,42 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
/**
* xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
* @job: The faulty xe_sched_job, where the issue was detected.
+ * @fmt: Printf format + args to describe the reason for the core dump
*
* This function should be called at the crash time within the serialized
* gt_reset. It is skipped if we still have the core dump device available
* with the information of the 'first' snapshot.
*/
-void xe_devcoredump(struct xe_sched_job *job)
+__printf(3, 4)
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...)
{
- struct xe_device *xe = gt_to_xe(job->q->gt);
+ struct xe_device *xe = gt_to_xe(q->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
+ va_list varg;
+
+ mutex_lock(&coredump->lock);
if (coredump->captured) {
drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+ mutex_unlock(&coredump->lock);
return;
}
coredump->captured = true;
- devcoredump_snapshot(coredump, job);
+
+ va_start(varg, fmt);
+ coredump->snapshot.reason = kvasprintf(GFP_ATOMIC, fmt, varg);
+ va_end(varg);
+
+ devcoredump_snapshot(coredump, q, job);
drm_info(&xe->drm, "Xe device coredump has been created\n");
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
xe->drm.primary->index);
+
+ mutex_unlock(&coredump->lock);
}
static void xe_driver_devcoredump_fini(void *arg)
@@ -334,6 +375,18 @@ static void xe_driver_devcoredump_fini(void *arg)
int xe_devcoredump_init(struct xe_device *xe)
{
+ int err;
+
+ err = drmm_mutex_init(&xe->drm, &xe->devcoredump.lock);
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&xe->devcoredump.lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index a4eebc285fc8..6a17e6d60102 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -10,13 +10,16 @@
struct drm_printer;
struct xe_device;
+struct xe_exec_queue;
struct xe_sched_job;
#ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_sched_job *job);
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...);
int xe_devcoredump_init(struct xe_device *xe);
#else
-static inline void xe_devcoredump(struct xe_sched_job *job)
+static inline void xe_devcoredump(struct xe_exec_queue *q,
+ struct xe_sched_job *job,
+ const char *fmt, ...)
{
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 3703ddea1252..1a1d16a96b2d 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -28,6 +28,10 @@ struct xe_devcoredump_snapshot {
ktime_t boot_time;
/** @process_name: Name of process that triggered this gpu hang */
char process_name[TASK_COMM_LEN];
+ /** @pid: Process id of process that triggered this gpu hang */
+ pid_t pid;
+ /** @reason: The reason the coredump was triggered */
+ char *reason;
/** @gt: Affected GT, used by forcewake for delayed capture */
struct xe_gt *gt;
@@ -76,12 +80,12 @@ struct xe_devcoredump_snapshot {
* for reading the information.
*/
struct xe_devcoredump {
- /** @captured: The snapshot of the first hang has already been taken. */
+ /** @lock: protects access to entire structure */
+ struct mutex lock;
+ /** @captured: The snapshot of the first hang has already been taken */
bool captured;
/** @snapshot: Snapshot is captured at time of the first crash */
struct xe_devcoredump_snapshot snapshot;
- /** @job: Point to the faulting job */
- struct xe_sched_job *job;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 06d6db8b50f9..4e1839b483a0 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -44,6 +44,7 @@
#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_module.h"
+#include "xe_oa.h"
#include "xe_observation.h"
#include "xe_pat.h"
#include "xe_pcode.h"
@@ -55,6 +56,7 @@
#include "xe_ttm_sys_mgr.h"
#include "xe_vm.h"
#include "xe_vram.h"
+#include "xe_vsec.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"
@@ -269,7 +271,6 @@ static struct drm_driver driver = {
.fops = &xe_driver_fops,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = DRIVER_MAJOR,
.minor = DRIVER_MINOR,
.patchlevel = DRIVER_PATCHLEVEL,
@@ -324,7 +325,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->info.revid = pdev->revision;
xe->info.force_execlist = xe_modparam.force_execlist;
- spin_lock_init(&xe->irq.lock);
+ err = xe_irq_init(xe);
+ if (err)
+ goto err;
init_waitqueue_head(&xe->ufence_wq);
@@ -366,6 +369,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
goto err;
}
+ err = drmm_mutex_init(&xe->drm, &xe->pmt.lock);
+ if (err)
+ goto err;
+
err = xe_display_create(xe);
if (WARN_ON(err))
goto err;
@@ -514,7 +521,7 @@ static int wait_for_lmem_ready(struct xe_device *xe)
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
start = jiffies;
- timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+ timeout = start + secs_to_jiffies(60); /* 60 sec! */
do {
if (signal_pending(current))
@@ -599,7 +606,7 @@ static int probe_has_flat_ccs(struct xe_device *xe)
u32 reg;
/* Always enabled/disabled, no runtime check to do */
- if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe))
return 0;
gt = xe_root_mmio_gt(xe);
@@ -760,6 +767,8 @@ int xe_device_probe(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_sanitize_freq(gt);
+ xe_vsec_init(xe);
+
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_fini_display:
@@ -990,7 +999,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
* xe_device_declare_wedged - Declare device wedged
* @xe: xe device instance
*
- * This is a final state that can only be cleared with a mudule
+ * This is a final state that can only be cleared with a module
* re-probe (unbind + bind).
* In this state every IOCTL will be blocked so the GT cannot be used.
* In general it will be called upon any critical error such as gt reset
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index f1fbfe916867..fc3c2af3fb7f 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -157,8 +157,7 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
static inline bool xe_device_has_msix(struct xe_device *xe)
{
- /* TODO: change this when MSI-X support is fully integrated */
- return false;
+ return xe->irq.msix.nvec > 0;
}
static inline bool xe_device_has_memirq(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index b9ea455d6f59..8a7b15972413 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -16,7 +16,7 @@
#include "xe_heci_gsc.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
-#include "xe_oa.h"
+#include "xe_oa_types.h"
#include "xe_platform_types.h"
#include "xe_pt_types.h"
#include "xe_sriov_types.h"
@@ -42,8 +42,6 @@ struct xe_pat_ops;
#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
#define IS_DGFX(xe) ((xe)->info.is_dgfx)
-#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
-#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi)
#define XE_VRAM_FLAGS_NEED64K BIT(0)
@@ -296,14 +294,24 @@ struct xe_device {
/** @info.va_bits: Maximum bits of a virtual address */
u8 va_bits;
- /** @info.is_dgfx: is discrete device */
- u8 is_dgfx:1;
- /** @info.has_asid: Has address space ID */
- u8 has_asid:1;
+ /*
+ * Keep all flags below alphabetically sorted
+ */
+
/** @info.force_execlist: Forced execlist submission */
u8 force_execlist:1;
+ /** @info.has_asid: Has address space ID */
+ u8 has_asid:1;
+ /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
+ u8 has_atomic_enable_pte_bit:1;
+ /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
+ u8 has_device_atomics_on_smem:1;
/** @info.has_flat_ccs: Whether flat CCS metadata is used */
u8 has_flat_ccs:1;
+ /** @info.has_heci_cscfi: device has heci cscfi */
+ u8 has_heci_cscfi:1;
+ /** @info.has_heci_gscfi: device has heci gscfi */
+ u8 has_heci_gscfi:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
/** @info.has_mmio_ext: Device has extra MMIO address range */
@@ -314,6 +322,8 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
+ /** @info.is_dgfx: is discrete device */
+ u8 is_dgfx:1;
/**
* @info.probe_display: Probe display hardware. If set to
* false, the driver will behave as if there is no display
@@ -323,20 +333,12 @@ struct xe_device {
* state the firmware or bootloader left it in.
*/
u8 probe_display:1;
+ /** @info.skip_guc_pc: Skip GuC based PM feature init */
+ u8 skip_guc_pc:1;
/** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
u8 skip_mtcfg:1;
/** @info.skip_pcode: skip access to PCODE uC */
u8 skip_pcode:1;
- /** @info.has_heci_gscfi: device has heci gscfi */
- u8 has_heci_gscfi:1;
- /** @info.has_heci_cscfi: device has heci cscfi */
- u8 has_heci_cscfi:1;
- /** @info.skip_guc_pc: Skip GuC based PM feature init */
- u8 skip_guc_pc:1;
- /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
- u8 has_atomic_enable_pte_bit:1;
- /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
- u8 has_device_atomics_on_smem:1;
} info;
/** @irq: device interrupt state */
@@ -345,7 +347,15 @@ struct xe_device {
spinlock_t lock;
/** @irq.enabled: interrupts enabled on this device */
- bool enabled;
+ atomic_t enabled;
+
+ /** @irq.msix: irq info for platforms that support MSI-X */
+ struct {
+ /** @irq.msix.nvec: number of MSI-X interrupts */
+ u16 nvec;
+ /** @irq.msix.indexes: used to allocate MSI-X indexes */
+ struct xarray indexes;
+ } msix;
} irq;
/** @ttm: ttm device */
@@ -374,6 +384,8 @@ struct xe_device {
/** @sriov.pf: PF specific data */
struct xe_device_pf pf;
+ /** @sriov.vf: VF specific data */
+ struct xe_device_vf vf;
/** @sriov.wq: workqueue used by the virtualization workers */
struct workqueue_struct *wq;
@@ -481,6 +493,12 @@ struct xe_device {
struct mutex lock;
} d3cold;
+ /** @pmt: Support the PMT driver callback interface */
+ struct {
+ /** @pmt.lock: protect access for telemetry data */
+ struct mutex lock;
+ } pmt;
+
/**
* @pm_callback_task: Track the active task that is running in either
* the runtime_suspend or runtime_resume callbacks.
@@ -588,7 +606,7 @@ struct xe_file {
/** @vm.xe: xarray to store VMs */
struct xarray xa;
/**
- * @vm.lock: Protects VM lookup + reference and removal a from
+ * @vm.lock: Protects VM lookup + reference and removal from
* file xarray. Not an intended to be an outer lock which does
* thing while being held.
*/
@@ -601,10 +619,15 @@ struct xe_file {
struct xarray xa;
/**
* @exec_queue.lock: Protects exec queue lookup + reference and
- * removal a frommfile xarray. Not an intended to be an outer
- * lock which does thing while being held.
+ * removal from file xarray. Not intended to be an outer lock
+ * which does things while being held.
*/
struct mutex lock;
+ /**
+ * @exec_queue.pending_removal: items pending to be removed to
+ * synchronize GPU state update with ongoing query.
+ */
+ atomic_t pending_removal;
} exec_queue;
/** @run_ticks: hw engine class run time in ticks for this drm client */
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 22f0f1a6dfd5..63f30b6df70b 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -261,6 +261,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
if (man) {
drm_print_memory_stats(p,
&stats[mem_type],
+ DRM_GEM_OBJECT_ACTIVE |
DRM_GEM_OBJECT_RESIDENT |
(mem_type != XE_PL_SYSTEM ? 0 :
DRM_GEM_OBJECT_PURGEABLE),
@@ -269,6 +270,49 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
}
}
+static struct xe_hw_engine *any_engine(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned long gt_id;
+
+ for_each_gt(gt, xe, gt_id) {
+ struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt);
+
+ if (hwe)
+ return hwe;
+ }
+
+ return NULL;
+}
+
+static bool force_wake_get_any_engine(struct xe_device *xe,
+ struct xe_hw_engine **phwe,
+ unsigned int *pfw_ref)
+{
+ enum xe_force_wake_domains domain;
+ unsigned int fw_ref;
+ struct xe_hw_engine *hwe;
+ struct xe_force_wake *fw;
+
+ hwe = any_engine(xe);
+ if (!hwe)
+ return false;
+
+ domain = xe_hw_engine_to_fw_domain(hwe);
+ fw = gt_to_fw(hwe->gt);
+
+ fw_ref = xe_force_wake_get(fw, domain);
+ if (!xe_force_wake_ref_has_domain(fw_ref, domain)) {
+ xe_force_wake_put(fw, fw_ref);
+ return false;
+ }
+
+ *phwe = hwe;
+ *pfw_ref = fw_ref;
+
+ return true;
+}
+
static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
{
unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { };
@@ -280,7 +324,18 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
u64 gpu_timestamp;
unsigned int fw_ref;
+ /*
+ * Wait for any exec queue going away: their cycles will get updated on
+ * context switch out, so wait for that to happen
+ */
+ wait_var_event(&xef->exec_queue.pending_removal,
+ !atomic_read(&xef->exec_queue.pending_removal));
+
xe_pm_runtime_get(xe);
+ if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) {
+ xe_pm_runtime_put(xe);
+ return;
+ }
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
@@ -295,33 +350,11 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
}
mutex_unlock(&xef->exec_queue.lock);
- /* Get the total GPU cycles */
- for_each_gt(gt, xe, gt_id) {
- enum xe_force_wake_domains fw;
-
- hwe = xe_gt_any_hw_engine(gt);
- if (!hwe)
- continue;
-
- fw = xe_hw_engine_to_fw_domain(hwe);
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), fw);
- if (!xe_force_wake_ref_has_domain(fw_ref, fw)) {
- hwe = NULL;
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- break;
- }
-
- gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- break;
- }
+ gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+ xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref);
xe_pm_runtime_put(xe);
- if (unlikely(!hwe))
- return;
-
for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) {
const char *class_name;
@@ -352,7 +385,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
* @p: The drm_printer ptr
* @file: The drm_file ptr
*
- * This is callabck for drm fdinfo interface. Register this callback
+ * This is callback for drm fdinfo interface. Register this callback
* in drm driver ops for show_fdinfo.
*
* Return: void
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
index d45b71426cc8..d61650d4aa0b 100644
--- a/drivers/gpu/drm/xe/xe_drv.h
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -10,7 +10,6 @@
#define DRIVER_NAME "xe"
#define DRIVER_DESC "Intel Xe Graphics"
-#define DRIVER_DATE "20201103"
/* Interface history:
*
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 31cca938956f..df8ce550deb4 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -33,7 +33,7 @@
*
* In XE we avoid all of this complication by not allowing a BO list to be
* passed into an exec, using the dma-buf implicit sync uAPI, have binds as
- * seperate operations, and using the DRM scheduler to flow control the ring.
+ * separate operations, and using the DRM scheduler to flow control the ring.
* Let's deep dive on each of these.
*
* We can get away from a BO list by forcing the user to use in / out fences on
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 268cd3123be9..7e1abbbfba12 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -17,6 +17,7 @@
#include "xe_hw_engine_class_sysfs.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
+#include "xe_irq.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_migrate.h"
@@ -69,6 +70,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
q->gt = gt;
q->class = hwe->class;
q->width = width;
+ q->msix_vec = XE_IRQ_DEFAULT_MSIX;
q->logical_mask = logical_mask;
q->fence_irq = &gt->fence_irq[hwe->class];
q->ring_ops = gt->ring_ops[hwe->class];
@@ -118,7 +120,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
}
for (i = 0; i < q->width; ++i) {
- q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
+ q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
goto err_unlock;
@@ -241,6 +243,7 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
return q;
}
+ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
void xe_exec_queue_destroy(struct kref *ref)
{
@@ -263,8 +266,11 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
/*
* Before releasing our ref to lrc and xef, accumulate our run ticks
+ * and wakeup any waiters.
*/
xe_exec_queue_update_run_ticks(q);
+ if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
+ wake_up_var(&q->xef->exec_queue.pending_removal);
for (i = 0; i < q->width; ++i)
xe_lrc_put(q->lrc[i]);
@@ -764,25 +770,20 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
struct xe_device *xe = gt_to_xe(q->gt);
- struct xe_file *xef;
struct xe_lrc *lrc;
u32 old_ts, new_ts;
int idx;
/*
- * Jobs that are run during driver load may use an exec_queue, but are
- * not associated with a user xe file, so avoid accumulating busyness
- * for kernel specific work.
+ * Jobs that are executed by kernel doesn't have a corresponding xe_file
+ * and thus are not accounted.
*/
- if (!q->vm || !q->vm->xef)
+ if (!q->xef)
return;
/* Synchronize with unbind while holding the xe file open */
if (!drm_dev_enter(&xe->drm, &idx))
return;
-
- xef = q->vm->xef;
-
/*
* Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by
@@ -793,7 +794,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/
lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
- xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+ q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
drm_dev_exit(idx);
}
@@ -835,7 +836,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
mutex_lock(&xef->exec_queue.lock);
q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+ if (q)
+ atomic_inc(&xef->exec_queue.pending_removal);
mutex_unlock(&xef->exec_queue.lock);
+
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 1158b6062a6c..5af5419cec7a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -41,7 +41,7 @@ struct xe_exec_queue {
/** @xef: Back pointer to xe file if this is user created exec queue */
struct xe_file *xef;
- /** @gt: graphics tile this exec queue can submit to */
+ /** @gt: GT structure this exec queue can submit to */
struct xe_gt *gt;
/**
* @hwe: A hardware of the same class. May (physical engine) or may not
@@ -63,6 +63,8 @@ struct xe_exec_queue {
char name[MAX_FENCE_NAME_LEN];
/** @width: width (number BB submitted per exec) of this exec queue */
u16 width;
+ /** @msix_vec: MSI-X vector (for platforms that support it) */
+ u16 msix_vec;
/** @fence_irq: fence IRQ used to signal job completion */
struct xe_hw_fence_irq *fence_irq;
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index a8c416a48812..5ef96deaa881 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -17,6 +17,7 @@
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_hw_fence.h"
+#include "xe_irq.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_mmio.h"
@@ -47,6 +48,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
struct xe_mmio *mmio = &gt->mmio;
struct xe_device *xe = gt_to_xe(gt);
u64 lrc_desc;
+ u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
lrc_desc = xe_lrc_descriptor(lrc);
@@ -80,8 +82,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
xe_bo_ggtt_addr(hwe->hwsp));
xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
- xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
- _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+ if (xe_device_has_msix(gt_to_xe(hwe->gt)))
+ ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
+ xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode);
xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
lower_32_bits(lrc_desc));
@@ -265,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
port->hwe = hwe;
- port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
+ port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX);
if (IS_ERR(port->lrc)) {
err = PTR_ERR(port->lrc);
goto err;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 558fac8bb6fb..5fcb2b4c2c13 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
/*
* So we don't need to worry about 64K GGTT layout when dealing with
- * scratch entires, rather keep the scratch page in system memory on
+ * scratch entries, rather keep the scratch page in system memory on
* platforms where 64K pages are needed for VRAM.
*/
flags = XE_BO_FLAG_PINNED;
@@ -598,10 +598,10 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
u64 start;
u64 offset, pte;
- if (XE_WARN_ON(!bo->ggtt_node))
+ if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id]))
return;
- start = bo->ggtt_node->base.start;
+ start = bo->ggtt_node[ggtt->tile->id]->base.start;
for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
@@ -612,15 +612,16 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
u64 start, u64 end)
{
- int err;
u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
+ u8 tile_id = ggtt->tile->id;
+ int err;
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
alignment = SZ_64K;
- if (XE_WARN_ON(bo->ggtt_node)) {
+ if (XE_WARN_ON(bo->ggtt_node[tile_id])) {
/* Someone's already inserted this BO in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
return 0;
}
@@ -630,19 +631,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
- bo->ggtt_node = xe_ggtt_node_init(ggtt);
- if (IS_ERR(bo->ggtt_node)) {
- err = PTR_ERR(bo->ggtt_node);
- bo->ggtt_node = NULL;
+ bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(bo->ggtt_node[tile_id])) {
+ err = PTR_ERR(bo->ggtt_node[tile_id]);
+ bo->ggtt_node[tile_id] = NULL;
goto out;
}
mutex_lock(&ggtt->lock);
- err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size,
- alignment, 0, start, end, 0);
+ err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base,
+ bo->size, alignment, 0, start, end, 0);
if (err) {
- xe_ggtt_node_fini(bo->ggtt_node);
- bo->ggtt_node = NULL;
+ xe_ggtt_node_fini(bo->ggtt_node[tile_id]);
+ bo->ggtt_node[tile_id] = NULL;
} else {
xe_ggtt_map_bo(ggtt, bo);
}
@@ -691,13 +692,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
*/
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
- if (XE_WARN_ON(!bo->ggtt_node))
+ u8 tile_id = ggtt->tile->id;
+
+ if (XE_WARN_ON(!bo->ggtt_node[tile_id]))
return;
/* This BO is not currently in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
- xe_ggtt_node_remove(bo->ggtt_node,
+ xe_ggtt_node_remove(bo->ggtt_node[tile_id],
bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index 64b2ae6839db..c250ea773491 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -71,8 +71,14 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
static inline
struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
{
- return list_first_entry_or_null(&sched->base.pending_list,
- struct xe_sched_job, drm.list);
+ struct xe_sched_job *job;
+
+ spin_lock(&sched->base.job_list_lock);
+ job = list_first_entry_or_null(&sched->base.pending_list,
+ struct xe_sched_job, drm.list);
+ spin_unlock(&sched->base.job_list_lock);
+
+ return job;
}
static inline int
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index fc64b45d324b..24cc6a4f9a96 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -139,17 +139,29 @@ static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size)
return 0;
}
-static int validate_proxy_header(struct xe_gsc_proxy_header *header,
+static int validate_proxy_header(struct xe_gt *gt,
+ struct xe_gsc_proxy_header *header,
u32 source, u32 dest, u32 max_size)
{
u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr);
u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr);
+ int ret = 0;
- if (header->destination != dest || header->source != source)
- return -ENOEXEC;
+ if (header->destination != dest || header->source != source) {
+ ret = -ENOEXEC;
+ goto out;
+ }
- if (length + PROXY_HDR_SIZE > max_size)
- return -E2BIG;
+ if (length + PROXY_HDR_SIZE > max_size) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ /* We only care about the status if this is a message for the driver */
+ if (dest == GSC_PROXY_ADDRESSING_KMD && header->status != 0) {
+ ret = -EIO;
+ goto out;
+ }
switch (type) {
case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD:
@@ -157,12 +169,20 @@ static int validate_proxy_header(struct xe_gsc_proxy_header *header,
break;
fallthrough;
case GSC_PROXY_MSG_TYPE_PROXY_INVALID:
- return -EIO;
+ ret = -EIO;
+ break;
default:
break;
}
- return 0;
+out:
+ if (ret)
+ xe_gt_err(gt,
+ "GSC proxy error: s=0x%x[0x%x], d=0x%x[0x%x], t=%u, l=0x%x, st=0x%x\n",
+ header->source, source, header->destination, dest,
+ type, length, header->status);
+
+ return ret;
}
#define proxy_header_wr(xe_, map_, offset_, field_, val_) \
@@ -228,12 +248,17 @@ static int proxy_query(struct xe_gsc *gsc)
xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc,
reply_offset, PROXY_HDR_SIZE);
- /* stop if this was the last message */
- if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END)
+ /* Check the status and stop if this was the last message */
+ if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) {
+ ret = validate_proxy_header(gt, to_csme_hdr,
+ GSC_PROXY_ADDRESSING_GSC,
+ GSC_PROXY_ADDRESSING_KMD,
+ GSC_PROXY_BUFFER_SIZE - reply_offset);
break;
+ }
/* make sure the GSC-to-CSME proxy header is sane */
- ret = validate_proxy_header(to_csme_hdr,
+ ret = validate_proxy_header(gt, to_csme_hdr,
GSC_PROXY_ADDRESSING_GSC,
GSC_PROXY_ADDRESSING_CSME,
GSC_PROXY_BUFFER_SIZE - reply_offset);
@@ -262,7 +287,7 @@ static int proxy_query(struct xe_gsc *gsc)
}
/* make sure the CSME-to-GSC proxy header is sane */
- ret = validate_proxy_header(gsc->proxy.from_csme,
+ ret = validate_proxy_header(gt, gsc->proxy.from_csme,
GSC_PROXY_ADDRESSING_CSME,
GSC_PROXY_ADDRESSING_GSC,
GSC_PROXY_BUFFER_SIZE - reply_offset);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d6744be01a68..26e64530ada2 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -387,6 +387,10 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_force_wake_init_gt(gt, gt_to_fw(gt));
spin_lock_init(&gt->global_invl_lock);
+ err = xe_gt_tlb_invalidation_init_early(gt);
+ if (err)
+ return err;
+
return 0;
}
@@ -588,10 +592,6 @@ int xe_gt_init(struct xe_gt *gt)
xe_hw_fence_irq_init(&gt->fence_irq[i]);
}
- err = xe_gt_tlb_invalidation_init(gt);
- if (err)
- return err;
-
err = xe_gt_pagefault_init(gt);
if (err)
return err;
@@ -748,10 +748,8 @@ static int do_gt_restart(struct xe_gt *gt)
if (err)
return err;
- for_each_hw_engine(hwe, gt, id) {
+ for_each_hw_engine(hwe, gt, id)
xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
- xe_reg_sr_apply_whitelist(hwe);
- }
/* Get CCS mode in sync between sw/hw */
xe_gt_apply_ccs_mode(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 82b9b7f82fca..e504cc33ade4 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt);
/**
* xe_gt_record_user_engines - save data related to engines available to
- * usersapce
+ * userspace
* @gt: GT structure
*
* Walk the available HW engines from gt->info.engine_mask and calculate data
@@ -57,6 +57,31 @@ int xe_gt_sanitize_freq(struct xe_gt *gt);
void xe_gt_remove(struct xe_gt *gt);
/**
+ * xe_gt_wait_for_reset - wait for gt's async reset to finalize.
+ * @gt: GT structure
+ * Return:
+ * %true if it waited for the work to finish execution,
+ * %false if there was no scheduled reset or it was done.
+ */
+static inline bool xe_gt_wait_for_reset(struct xe_gt *gt)
+{
+ return flush_work(&gt->reset.worker);
+}
+
+/**
+ * xe_gt_reset - perform synchronous reset
+ * @gt: GT structure
+ * Return:
+ * %true if it waited for the reset to finish,
+ * %false if there was no scheduled reset.
+ */
+static inline bool xe_gt_reset(struct xe_gt *gt)
+{
+ xe_gt_reset_async(gt);
+ return xe_gt_wait_for_reset(gt);
+}
+
+/**
* xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
* first that matches the same reset domain as @class
* @gt: GT structure
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index b6adfb9f2030..50fffc9ebf62 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -150,7 +150,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
gt->ccs_mode = num_engines;
xe_gt_record_user_engines(gt);
- xe_gt_reset_async(gt);
+ xe_gt_reset(gt);
}
mutex_unlock(&xe->drm.filelist_mutex);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 3e8c351a0eab..e7792858b1e4 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -132,11 +132,9 @@ static int force_reset(struct xe_gt *gt, struct drm_printer *p)
static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
- xe_gt_reset_async(gt);
+ xe_gt_reset(gt);
xe_pm_runtime_put(gt_to_xe(gt));
- flush_work(&gt->reset.worker);
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 6bd39b2c5003..604bdc7c8173 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev,
}
static DEVICE_ATTR_RO(rpe_freq);
+static ssize_t rpa_freq_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ u32 freq;
+
+ xe_pm_runtime_get(dev_to_xe(dev));
+ freq = xe_guc_pc_get_rpa_freq(pc);
+ xe_pm_runtime_put(dev_to_xe(dev));
+
+ return sysfs_emit(buf, "%d\n", freq);
+}
+static DEVICE_ATTR_RO(rpa_freq);
+
static ssize_t rpn_freq_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = {
&dev_attr_act_freq.attr,
&dev_attr_cur_freq.attr,
&dev_attr_rp0_freq.attr,
+ &dev_attr_rpa_freq.attr,
&dev_attr_rpe_freq.attr,
&dev_attr_rpn_freq.attr,
&dev_attr_min_freq.attr,
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index fd80afeef56a..ffd3ba7f6656 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -122,10 +122,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
if (!xe_gt_is_media_type(gt))
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
- for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
- if ((gt->info.engine_mask & BIT(i)))
- gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
- VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ if (xe->info.platform != XE_DG1) {
+ for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+ if ((gt->info.engine_mask & BIT(i)))
+ gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(j));
+ }
}
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 5013d674e17d..a1676b787fdc 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -371,7 +371,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
* @group: steering group ID
* @instance: steering instance ID
*
- * Return: the coverted DSS id.
+ * Return: the converted DSS id.
*/
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance)
{
@@ -550,9 +550,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
* Returns true if the caller should steer to the @group/@instance values
* returned. Returns false if the caller need not perform any steering
*/
-static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
- struct xe_reg_mcr reg_mcr,
- u8 *group, u8 *instance)
+bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+ struct xe_reg_mcr reg_mcr,
+ u8 *group, u8 *instance)
{
const struct xe_reg reg = to_xe_reg(reg_mcr);
const struct xe_mmio_range *implicit_ranges;
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index c0cd36021c24..bc06520befab 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
u32 value);
+bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+ struct xe_reg_mcr reg_mcr,
+ u8 *group, u8 *instance);
+
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 79c426dc2505..2606cd396df5 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -10,7 +10,6 @@
#include <drm/drm_exec.h>
#include <drm/drm_managed.h>
-#include <drm/ttm/ttm_execbuf_util.h>
#include "abi/guc_actions_abi.h"
#include "xe_bo.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 5dc71394372d..11da0228cea7 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -60,6 +60,21 @@ static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format
xe_gt_info(gt, "%pV", vaf);
}
+static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_gt *gt = p->arg;
+ struct drm_printer dbg;
+
+ /*
+ * The original xe_gt_dbg() callsite annotations are useless here,
+ * redirect to the tweaked drm_dbg_printer() instead.
+ */
+ dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL);
+ dbg.origin = p->origin;
+
+ drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf);
+}
+
/**
* xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err()
* @gt: the &xe_gt pointer to use in xe_gt_err()
@@ -90,4 +105,20 @@ static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt)
return p;
}
+/**
+ * xe_gt_dbg_printer - Construct a &drm_printer that outputs like xe_gt_dbg()
+ * @gt: the &xe_gt pointer to use in xe_gt_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_dbg_printer(struct xe_gt *gt)
+{
+ struct drm_printer p = {
+ .printfn = __xe_gt_printfn_dbg,
+ .arg = gt,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index ca49860168f6..878e96281c03 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -207,6 +207,11 @@ static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u
return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout);
}
+static int pf_push_vf_cfg_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority)
+{
+ return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY, priority);
+}
+
static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
{
return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size);
@@ -1540,8 +1545,6 @@ static u64 pf_query_max_lmem(struct xe_gt *gt)
#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
#define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */
-#else
-#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */
#endif
static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs)
@@ -1767,6 +1770,77 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi
return preempt_timeout;
}
+static const char *sched_priority_unit(u32 priority)
+{
+ return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" :
+ priority == GUC_SCHED_PRIORITY_NORMAL ? "(normal)" :
+ priority == GUC_SCHED_PRIORITY_HIGH ? "(high)" :
+ "(?)";
+}
+
+static int pf_provision_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int err;
+
+ err = pf_push_vf_cfg_sched_priority(gt, vfid, priority);
+ if (unlikely(err))
+ return err;
+
+ config->sched_priority = priority;
+ return 0;
+}
+
+static int pf_get_sched_priority(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->sched_priority;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_sched_priority() - Configure scheduling priority.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @priority: requested scheduling priority
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority)
+{
+ int err;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_provision_sched_priority(gt, vfid, priority);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_config_set_u32_done(gt, vfid, priority,
+ xe_gt_sriov_pf_config_get_sched_priority(gt, vfid),
+ "scheduling priority", sched_priority_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_sched_priority - Get VF's scheduling priority.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's) scheduling priority.
+ */
+u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid)
+{
+ u32 priority;
+
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ priority = pf_get_sched_priority(gt, vfid);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ return priority;
+}
+
static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config)
{
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
@@ -2087,7 +2161,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
*
* This function can only be called on PF.
*
- * Return: mininum size of the buffer or the number of bytes saved,
+ * Return: minimum size of the buffer or the number of bytes saved,
* or a negative error code on failure.
*/
ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 0c55aa40a1a7..f894e9d4abba 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -44,6 +44,9 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi
int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
u32 preempt_timeout);
+u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority);
+
u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid,
enum xe_guc_klv_threshold_index index);
int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
index 2d3b73d78f14..686c7b3b6d7a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -33,6 +33,8 @@ struct xe_gt_sriov_config {
u32 exec_quantum;
/** @preempt_timeout: preemption timeout in microseconds. */
u32 preempt_timeout;
+ /** @sched_priority: scheduling priority. */
+ u32 sched_priority;
/** @thresholds: GuC thresholds for adverse events notifications. */
u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS];
};
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 05df4ab3514b..b2521dd6ec42 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -164,6 +164,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
* │   │   ├── contexts_spare
* │   │   ├── exec_quantum_ms
* │   │   ├── preempt_timeout_us
+ * │   │   ├── sched_priority
* │   ├── vf1
* │   │   ├── ggtt_quota
* │   │   ├── lmem_quota
@@ -171,6 +172,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
* │   │   ├── contexts_quota
* │   │   ├── exec_quantum_ms
* │   │   ├── preempt_timeout_us
+ * │   │   ├── sched_priority
*/
#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \
@@ -209,6 +211,7 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n");
/*
* /sys/kernel/debug/dri/0/
@@ -295,6 +298,8 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
&exec_quantum_fops);
debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent,
&preempt_timeout_fops);
+ debugfs_create_file_unsafe("sched_priority", 0644, parent, parent,
+ &sched_priority_fops);
/* register all threshold attributes */
#define register_threshold_attribute(TAG, NAME, ...) \
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
index 0bf12d89ceb2..6af219d93c3b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
@@ -18,7 +18,7 @@
* is within a range of supported VF numbers (up to maximum number of VFs that
* driver can support, including VF0 that represents the PF itself).
*
- * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ * Note: Effective only on debug builds. See `Xe Asserts`_ for more information.
*/
#define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid))
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
index fae5be5a2a11..c00fb354705f 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
@@ -135,14 +135,33 @@ static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 valu
return 0;
}
+static void pf_bulk_reset_sched_priority(struct xe_gt *gt, u32 priority)
+{
+ unsigned int total_vfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int n;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 0; n < total_vfs; n++)
+ gt->sriov.pf.vfs[n].config.sched_priority = priority;
+}
+
static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable)
{
+ int err;
+
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
- return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY,
- &gt->sriov.pf.policy.guc.sched_if_idle,
- enable);
+ err = pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY,
+ &gt->sriov.pf.policy.guc.sched_if_idle,
+ enable);
+
+ if (!err)
+ pf_bulk_reset_sched_priority(gt, enable ? GUC_SCHED_PRIORITY_NORMAL :
+ GUC_SCHED_PRIORITY_LOW);
+ return err;
}
static int pf_reprovision_sched_if_idle(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index d3baba50f085..cca5d5732802 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -27,6 +27,7 @@
#include "xe_guc_relay.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_sriov_vf.h"
#include "xe_uc_fw.h"
#include "xe_wopcm.h"
@@ -223,6 +224,44 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
return 0;
}
+static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
+{
+ u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE),
+ };
+ int ret;
+
+ ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+/**
+ * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt)
+{
+ struct xe_guc *guc = &gt->uc.guc;
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ err = guc_action_vf_notify_resfix_done(guc);
+ if (unlikely(err))
+ xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n",
+ ERR_PTR(err));
+ else
+ xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n");
+
+ return err;
+}
+
static int guc_action_query_single_klv(struct xe_guc *guc, u32 key,
u32 *value, u32 value_len)
{
@@ -692,6 +731,30 @@ failed:
return err;
}
+/**
+ * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery,
+ * or just mark that a GuC is ready for it.
+ * @gt: the &xe_gt struct instance linked to target GuC
+ *
+ * This function shall be called only by VF.
+ */
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+
+ set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
+ /*
+ * We need to be certain that if all flags were set, at least one
+ * thread will notice that and schedule the recovery.
+ */
+ smp_mb__after_atomic();
+
+ xe_gt_sriov_info(gt, "ready for recovery after migration\n");
+ xe_sriov_vf_start_migration_recovery(xe);
+}
+
static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
{
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index e541ce57bec2..912d20814261 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -17,6 +17,8 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
int xe_gt_sriov_vf_connect(struct xe_gt *gt);
int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
+int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt);
+void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt);
u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index c7364a5aef8f..7a6c1d808e41 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -12,7 +12,7 @@
/**
* xe_gt_stats_incr - Increments the specified stats counter
- * @gt: graphics tile
+ * @gt: GT structure
* @id: xe_gt_stats_id type id that needs to be incremented
* @incr: value to be incremented with
*
@@ -32,7 +32,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
/**
* xe_gt_stats_print_info - Print the GT stats
- * @gt: graphics tile
+ * @gt: GT structure
* @p: drm_printer where it will be printed out.
*
* This prints out all the available GT stats.
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 91d944f6c4e4..38325ef53617 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -6,15 +6,11 @@
#ifndef _XE_GT_STATS_H_
#define _XE_GT_STATS_H_
+#include "xe_gt_stats_types.h"
+
struct xe_gt;
struct drm_printer;
-enum xe_gt_stats_id {
- XE_GT_STATS_ID_TLB_INVAL,
- /* must be the last entry */
- __XE_GT_STATS_NUM_IDS,
-};
-
#ifdef CONFIG_DEBUG_FS
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
new file mode 100644
index 000000000000..2fc055e39f27
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_STATS_TYPES_H_
+#define _XE_GT_STATS_TYPES_H_
+
+enum xe_gt_stats_id {
+ XE_GT_STATS_ID_TLB_INVAL,
+ /* must be the last entry */
+ __XE_GT_STATS_NUM_IDS,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 03b225364101..8db78d616b6f 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -8,6 +8,7 @@
#include <regs/xe_gt_regs.h>
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
#include "xe_mmio.h"
@@ -53,6 +54,7 @@ static u32 read_status(struct xe_gt *gt)
{
u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+ xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status);
return status;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 6146d1776bda..0a93831c0a02 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -106,15 +106,15 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
}
/**
- * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
- * @gt: graphics tile
+ * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state
+ * @gt: GT structure
*
* Initialize GT TLB invalidation state, purely software initialization, should
* be called once during driver load.
*
* Return: 0 on success, negative error code on error.
*/
-int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
+int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt)
{
gt->tlb_invalidation.seqno = 1;
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
@@ -128,7 +128,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
/**
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
- * @gt: graphics tile
+ * @gt: GT structure
*
* Signal any pending invalidation fences, should be called during a GT reset
*/
@@ -244,7 +244,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
/**
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
- * @gt: graphics tile
+ * @gt: GT structure
* @fence: invalidation fence which will be signal on TLB invalidation
* completion
*
@@ -261,14 +261,23 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
0, /* seqno, replaced in send_tlb_invalidation */
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
};
+ int ret;
+
+ ret = send_tlb_invalidation(&gt->uc.guc, fence, action,
+ ARRAY_SIZE(action));
+ /*
+ * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
+ * should be nuked on a GT reset so this error can be ignored.
+ */
+ if (ret == -ECANCELED)
+ return 0;
- return send_tlb_invalidation(&gt->uc.guc, fence, action,
- ARRAY_SIZE(action));
+ return ret;
}
/**
* xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT
- * @gt: graphics tile
+ * @gt: GT structure
*
* Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
* synchronous.
@@ -317,7 +326,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
* xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
* address range
*
- * @gt: graphics tile
+ * @gt: GT structure
* @fence: invalidation fence which will be signal on TLB invalidation
* completion
* @start: start address
@@ -403,7 +412,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
/**
* xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
- * @gt: graphics tile
+ * @gt: GT structure
* @fence: invalidation fence which will be signal on TLB invalidation
* completion, can be NULL
* @vma: VMA to invalidate
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index 00b1c6c01e8d..672acfcdf0d7 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -14,7 +14,8 @@ struct xe_gt;
struct xe_guc;
struct xe_vma;
-int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt);
+
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index a287b98ee70b..6e66bf0e8b3f 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -11,10 +11,10 @@
#include "xe_gt_idle_types.h"
#include "xe_gt_sriov_pf_types.h"
#include "xe_gt_sriov_vf_types.h"
-#include "xe_gt_stats.h"
+#include "xe_gt_stats_types.h"
#include "xe_hw_engine_types.h"
#include "xe_hw_fence_types.h"
-#include "xe_oa.h"
+#include "xe_oa_types.h"
#include "xe_reg_sr_types.h"
#include "xe_sa_types.h"
#include "xe_uc_types.h"
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 7f704346a8f4..408365dfe4ee 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -44,7 +44,15 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
struct xe_bo *bo)
{
struct xe_device *xe = guc_to_xe(guc);
- u32 addr = xe_bo_ggtt_addr(bo);
+ u32 addr;
+
+ /*
+ * For most BOs, the address on the allocating tile is fine. However for
+ * some, e.g. G2G CTB, the address on a specific tile is required as it
+ * might be different for each tile. So, just always ask for the address
+ * on the target GuC.
+ */
+ addr = __xe_bo_ggtt_addr(bo, gt_to_tile(guc_to_gt(guc))->id);
/* GuC addresses above GUC_GGTT_TOP don't map through the GTT */
xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
@@ -139,6 +147,34 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc)
return flags;
}
+static bool needs_wa_dual_queue(struct xe_gt *gt)
+{
+ /*
+ * The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions
+ * on RCS and CCSes with different address spaces, which on DG2 is
+ * required as a WA for an HW bug.
+ */
+ if (XE_WA(gt, 22011391025))
+ return true;
+
+ /*
+ * On newer platforms, the HW has been updated to not allow parallel
+ * execution of different address spaces, so the RCS/CCS will stall the
+ * context switch if one of the other RCS/CCSes is busy with a different
+ * address space. While functionally correct, having a submission
+ * stalled on the HW limits the GuC ability to shuffle things around and
+ * can cause complications if the non-stalled submission runs for a long
+ * time, because the GuC doesn't know that the stalled submission isn't
+ * actually running and might declare it as hung. Therefore, we enable
+ * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines
+ * to move management back to the GuC.
+ */
+ if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+ return true;
+
+ return false;
+}
+
static u32 guc_ctl_wa_flags(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
@@ -151,7 +187,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
if (XE_WA(gt, 14014475959))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
- if (XE_WA(gt, 22011391025))
+ if (needs_wa_dual_queue(gt))
flags |= GUC_WA_DUAL_QUEUE;
/*
@@ -244,6 +280,293 @@ static void guc_write_params(struct xe_guc *guc)
xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(1 + i), guc->params[i]);
}
+static int guc_action_register_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev,
+ u32 desc_addr, u32 buff_addr, u32 size)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 action[] = {
+ XE_GUC_ACTION_REGISTER_G2G,
+ FIELD_PREP(XE_G2G_REGISTER_SIZE, size / SZ_4K - 1) |
+ FIELD_PREP(XE_G2G_REGISTER_TYPE, type) |
+ FIELD_PREP(XE_G2G_REGISTER_TILE, dst_tile) |
+ FIELD_PREP(XE_G2G_REGISTER_DEVICE, dst_dev),
+ desc_addr,
+ buff_addr,
+ };
+
+ xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT));
+ xe_assert(xe, !(size % SZ_4K));
+
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static int guc_action_deregister_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 action[] = {
+ XE_GUC_ACTION_DEREGISTER_G2G,
+ FIELD_PREP(XE_G2G_DEREGISTER_TYPE, type) |
+ FIELD_PREP(XE_G2G_DEREGISTER_TILE, dst_tile) |
+ FIELD_PREP(XE_G2G_DEREGISTER_DEVICE, dst_dev),
+ };
+
+ xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT));
+
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+#define G2G_DEV(gt) (((gt)->info.type == XE_GT_TYPE_MAIN) ? 0 : 1)
+
+#define G2G_BUFFER_SIZE (SZ_4K)
+#define G2G_DESC_SIZE (64)
+#define G2G_DESC_AREA_SIZE (SZ_4K)
+
+/*
+ * Generate a unique id for each bi-directional CTB for each pair of
+ * near and far tiles/devices. The id can then be used as an index into
+ * a single allocation that is sub-divided into multiple CTBs.
+ *
+ * For example, with two devices per tile and two tiles, the table should
+ * look like:
+ * Far <tile>.<dev>
+ * 0.0 0.1 1.0 1.1
+ * N 0.0 --/-- 00/01 02/03 04/05
+ * e 0.1 01/00 --/-- 06/07 08/09
+ * a 1.0 03/02 07/06 --/-- 10/11
+ * r 1.1 05/04 09/08 11/10 --/--
+ *
+ * Where each entry is Rx/Tx channel id.
+ *
+ * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
+ * be reading from channel #11 and writing to channel #10. Whereas,
+ * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
+ */
+static unsigned int g2g_slot(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev,
+ u32 type, u32 max_inst, bool have_dev)
+{
+ u32 near = near_tile, far = far_tile;
+ u32 idx = 0, x, y, direction;
+ int i;
+
+ if (have_dev) {
+ near = (near << 1) | near_dev;
+ far = (far << 1) | far_dev;
+ }
+
+ /* No need to send to one's self */
+ if (far == near)
+ return -1;
+
+ if (far > near) {
+ /* Top right table half */
+ x = far;
+ y = near;
+
+ /* T/R is 'forwards' direction */
+ direction = type;
+ } else {
+ /* Bottom left table half */
+ x = near;
+ y = far;
+
+ /* B/L is 'backwards' direction */
+ direction = (1 - type);
+ }
+
+ /* Count the rows prior to the target */
+ for (i = y; i > 0; i--)
+ idx += max_inst - i;
+
+ /* Count this row up to the target */
+ idx += (x - 1 - y);
+
+ /* Slots are in Rx/Tx pairs */
+ idx *= 2;
+
+ /* Pick Rx/Tx direction */
+ idx += direction;
+
+ return idx;
+}
+
+static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 type, bool have_dev)
+{
+ struct xe_gt *near_gt = guc_to_gt(near_guc);
+ struct xe_device *xe = gt_to_xe(near_gt);
+ struct xe_bo *g2g_bo;
+ u32 near_tile = gt_to_tile(near_gt)->id;
+ u32 near_dev = G2G_DEV(near_gt);
+ u32 far_tile = gt_to_tile(far_gt)->id;
+ u32 far_dev = G2G_DEV(far_gt);
+ u32 max = xe->info.gt_count;
+ u32 base, desc, buf;
+ int slot;
+
+ /* G2G is not allowed between different cards */
+ xe_assert(xe, xe == gt_to_xe(far_gt));
+
+ g2g_bo = near_guc->g2g.bo;
+ xe_assert(xe, g2g_bo);
+
+ slot = g2g_slot(near_tile, near_dev, far_tile, far_dev, type, max, have_dev);
+ xe_assert(xe, slot >= 0);
+
+ base = guc_bo_ggtt_addr(near_guc, g2g_bo);
+ desc = base + slot * G2G_DESC_SIZE;
+ buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE;
+
+ xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
+ xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size);
+
+ return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev,
+ desc, buf, G2G_BUFFER_SIZE);
+}
+
+static void guc_g2g_deregister(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type)
+{
+ guc_action_deregister_g2g_buffer(guc, type, far_tile, far_dev);
+}
+
+static u32 guc_g2g_size(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int count = xe->info.gt_count;
+ u32 num_channels = (count * (count - 1)) / 2;
+
+ xe_assert(xe, num_channels * XE_G2G_TYPE_LIMIT * G2G_DESC_SIZE <= G2G_DESC_AREA_SIZE);
+
+ return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+}
+
+static bool xe_guc_g2g_wanted(struct xe_device *xe)
+{
+ /* Can't do GuC to GuC communication if there is only one GuC */
+ if (xe->info.gt_count <= 1)
+ return false;
+
+ /* No current user */
+ return false;
+}
+
+static int guc_g2g_alloc(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ if (guc->g2g.bo)
+ return 0;
+
+ if (gt->info.id != 0) {
+ struct xe_gt *root_gt = xe_device_get_gt(xe, 0);
+ struct xe_guc *root_guc = &root_gt->uc.guc;
+ struct xe_bo *bo;
+
+ bo = xe_bo_get(root_guc->g2g.bo);
+ if (!bo)
+ return -ENODEV;
+
+ guc->g2g.bo = bo;
+ guc->g2g.owned = false;
+ return 0;
+ }
+
+ g2g_size = guc_g2g_size(guc);
+ bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+ guc->g2g.bo = bo;
+ guc->g2g.owned = true;
+
+ return 0;
+}
+
+static void guc_g2g_fini(struct xe_guc *guc)
+{
+ if (!guc->g2g.bo)
+ return;
+
+ /* Unpinning the owned object is handled by generic shutdown */
+ if (!guc->g2g.owned)
+ xe_bo_put(guc->g2g.bo);
+
+ guc->g2g.bo = NULL;
+}
+
+static int guc_g2g_start(struct xe_guc *guc)
+{
+ struct xe_gt *far_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int i, j;
+ int t, err;
+ bool have_dev;
+
+ if (!guc->g2g.bo) {
+ int ret;
+
+ ret = guc_g2g_alloc(guc);
+ if (ret)
+ return ret;
+ }
+
+ /* GuC interface will need extending if more GT device types are ever created. */
+ xe_gt_assert(gt, (gt->info.type == XE_GT_TYPE_MAIN) || (gt->info.type == XE_GT_TYPE_MEDIA));
+
+ /* Channel numbering depends on whether there are multiple GTs per tile */
+ have_dev = xe->info.gt_count > xe->info.tile_count;
+
+ for_each_gt(far_gt, xe, i) {
+ u32 far_tile, far_dev;
+
+ if (far_gt->info.id == gt->info.id)
+ continue;
+
+ far_tile = gt_to_tile(far_gt)->id;
+ far_dev = G2G_DEV(far_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
+ err = guc_g2g_register(guc, far_gt, t, have_dev);
+ if (err) {
+ while (--t >= 0)
+ guc_g2g_deregister(guc, far_tile, far_dev, t);
+ goto err_deregister;
+ }
+ }
+ }
+
+ return 0;
+
+err_deregister:
+ for_each_gt(far_gt, xe, j) {
+ u32 tile, dev;
+
+ if (far_gt->info.id == gt->info.id)
+ continue;
+
+ if (j >= i)
+ break;
+
+ tile = gt_to_tile(far_gt)->id;
+ dev = G2G_DEV(far_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
+ guc_g2g_deregister(guc, tile, dev, t);
+ }
+
+ return err;
+}
+
static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
@@ -253,6 +576,8 @@ static void guc_fini_hw(void *arg)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ guc_g2g_fini(guc);
}
/**
@@ -423,7 +748,16 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
int xe_guc_post_load_init(struct xe_guc *guc)
{
+ int ret;
+
xe_guc_ads_populate_post_load(&guc->ads);
+
+ if (xe_guc_g2g_wanted(guc_to_xe(guc))) {
+ ret = guc_g2g_start(guc);
+ if (ret)
+ return ret;
+ }
+
guc->submission_state.enabled = true;
return 0;
@@ -945,7 +1279,6 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
- xe_assert(xe, !xe_guc_ct_enabled(&guc->ct));
xe_assert(xe, len);
xe_assert(xe, len <= VF_SW_FLAG_COUNT);
xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
@@ -1099,10 +1432,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
return guc_self_cfg(guc, key, 2, val);
}
+static void xe_guc_sw_0_irq_handler(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ xe_gt_sriov_vf_migrated_event_handler(gt);
+}
+
void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
{
if (iir & GUC_INTR_GUC2HOST)
xe_guc_ct_irq_handler(&guc->ct);
+
+ if (iir & GUC_INTR_SW_INT_0)
+ xe_guc_sw_0_irq_handler(guc);
}
void xe_guc_sanitize(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 4e746ae98888..fab259adc380 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -29,6 +29,7 @@
#include "xe_platform_types.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
+#include "xe_gt_mcr.h"
/* Slack of a few additional entries per engine */
#define ADS_REGSET_EXTRA_MAX 8
@@ -231,11 +232,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads)
guc_ads_private_data_size(ads);
}
-static bool needs_wa_1607983814(struct xe_device *xe)
-{
- return GRAPHICS_VERx100(xe) < 1250;
-}
-
static size_t calculate_regset_size(struct xe_gt *gt)
{
struct xe_reg_sr_entry *sr_entry;
@@ -250,7 +246,7 @@ static size_t calculate_regset_size(struct xe_gt *gt)
count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
- if (needs_wa_1607983814(gt_to_xe(gt)))
+ if (XE_WA(gt, 1607983814))
count += LNCFCMOCS_REG_COUNT;
return count * sizeof(struct guc_mmio_reg);
@@ -701,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
.flags = reg.masked ? GUC_REGSET_MASKED : 0,
};
+ if (reg.mcr) {
+ struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
+ u8 group, instance;
+
+ bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
+ &group, &instance);
+
+ if (steer) {
+ entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
+ entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
+ entry.flags |= GUC_REGSET_STEERING_NEEDED;
+ }
+ }
+
xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
&entry, sizeof(entry));
}
@@ -709,7 +719,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
struct iosys_map *regset_map,
struct xe_hw_engine *hwe)
{
- struct xe_device *xe = ads_to_xe(ads);
struct xe_hw_engine *hwe_rcs_reset_domain =
xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
struct xe_reg_sr_entry *entry;
@@ -740,8 +749,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
}
- /* Wa_1607983814 */
- if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
+ if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
guc_mmio_regset_write_one(ads, regset_map,
XELP_LNCFCMOCS(i), count++);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index d63912d28246..f6d523e4c5fe 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -1806,7 +1806,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
if (!devcore_snapshot->matched_node)
return;
- xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC);
xe_gt_assert(gt, snapshot->hwe);
capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class);
@@ -1815,7 +1814,8 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
snapshot->name ? snapshot->name : "",
snapshot->logical_instance);
drm_printf(p, "\tCapture_source: %s\n",
- snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual");
+ devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
+ "GuC" : "Manual");
drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
snapshot->forcewake.domain, snapshot->forcewake.ref);
@@ -1840,29 +1840,24 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
}
/**
- * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job.
- * @job: The job object.
+ * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue.
+ * @q: The exec queue object
*
- * Search within the capture outlist for the job, could be used for check if
- * GuC capture is ready for the job.
+ * Search within the capture outlist for the queue, could be used for check if
+ * GuC capture is ready for the queue.
* If found, the locked boolean of the node will be flagged.
*
* Returns: found guc-capture node ptr else NULL
*/
struct __guc_capture_parsed_output *
-xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
+xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- struct xe_exec_queue *q;
struct xe_device *xe;
u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
struct xe_devcoredump_snapshot *ss;
- if (!job)
- return NULL;
-
- q = job->q;
if (!q || !q->gt)
return NULL;
@@ -1874,7 +1869,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC)
return ss->matched_node;
- /* Find hwe for the job */
+ /* Find hwe for the queue */
for_each_hw_engine(hwe, q->gt, id) {
if (hwe != q->hwe)
continue;
@@ -1906,17 +1901,16 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
}
/**
- * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine
- * @job: The job object
+ * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine
+ * @q: The exec queue object
*
* Take snapshot of associated HW Engine
*
* Returns: None.
*/
void
-xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
+xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
{
- struct xe_exec_queue *q = job->q;
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
struct xe_hw_engine *hwe;
@@ -1934,11 +1928,12 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
}
if (!coredump->snapshot.hwe[id]) {
- coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
+ coredump->snapshot.hwe[id] =
+ xe_hw_engine_snapshot_capture(hwe, q);
} else {
struct __guc_capture_parsed_output *new;
- new = xe_guc_capture_get_matching_and_lock(job);
+ new = xe_guc_capture_get_matching_and_lock(q);
if (new) {
struct xe_guc *guc = &q->gt->uc.guc;
@@ -1960,7 +1955,7 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
}
/*
- * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes
+ * xe_guc_capture_put_matched_nodes - Cleanup matched nodes
* @guc: The GuC object
*
* Free matched node and all nodes with the equal guc_id from
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
index 97a795d13dd1..20a078dc4b85 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -11,10 +11,10 @@
#include "xe_guc.h"
#include "xe_guc_fwif.h"
+struct xe_exec_queue;
struct xe_guc;
struct xe_hw_engine;
struct xe_hw_engine_snapshot;
-struct xe_sched_job;
static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class)
{
@@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc);
const struct __guc_mmio_reg_descr_group *
xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
enum guc_capture_list_class_type capture_class, bool is_ext);
-struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
+struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q);
void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot);
void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
-void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
+void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q);
void xe_guc_capture_steered_list_init(struct xe_guc *guc);
void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
int xe_guc_capture_init(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h
index 2057125b1bfa..ca2d390ccbee 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h
@@ -22,7 +22,7 @@ enum capture_register_data_type {
* struct __guc_mmio_reg_descr - GuC mmio register descriptor
*
* xe_guc_capture module uses these structures to define a register
- * (offsets, names, flags,...) that are used at the ADS regisration
+ * (offsets, names, flags,...) that are used at the ADS registration
* time as well as during runtime processing and reporting of error-
* capture states generated by GuC just prior to engine reset events.
*/
@@ -48,7 +48,7 @@ struct __guc_mmio_reg_descr {
*
* xe_guc_capture module uses these structures to maintain static
* tables (per unique platform) that consists of lists of registers
- * (offsets, names, flags,...) that are used at the ADS regisration
+ * (offsets, names, flags,...) that are used at the ADS registration
* time as well as during runtime processing and reporting of error-
* capture states generated by GuC just prior to engine reset events.
*/
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 8aeb1789805c..8b65c5e959cc 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -54,6 +54,7 @@ enum {
CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */
CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */
CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */
+ CT_DEAD_CRASH, /* 0x8000 */
};
static void ct_dead_worker_func(struct work_struct *w);
@@ -469,8 +470,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
* after any existing dead state has been dumped.
*/
spin_lock_irq(&ct->dead.lock);
- if (ct->dead.reason)
+ if (ct->dead.reason) {
ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
+ queue_work(system_unbound_wq, &ct->dead.worker);
+ }
spin_unlock_irq(&ct->dead.lock);
#endif
@@ -707,7 +710,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
--len;
++action;
- /* Write H2G ensuring visable before descriptor update */
+ /* Write H2G ensuring visible before descriptor update */
xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
xe_device_wmb(xe);
@@ -1017,7 +1020,6 @@ retry_same_fence:
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
-
if (!ret) {
LNL_FLUSH_WORK(&ct->g2h_worker);
if (g2h_fence.done) {
@@ -1121,6 +1123,24 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
return 0;
}
+static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+
+ if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
+ xe_gt_err(gt, "GuC Crash dump notification\n");
+ else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION)
+ xe_gt_err(gt, "GuC Exception notification\n");
+ else
+ xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action);
+
+ CT_DEAD(ct, NULL, CRASH);
+
+ kick_reset(ct);
+
+ return 0;
+}
+
static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_gt *gt = ct_to_gt(ct);
@@ -1295,13 +1315,17 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
break;
+ case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
+ case XE_GUC_ACTION_NOTIFY_EXCEPTION:
+ ret = guc_crash_process_msg(ct, action);
+ break;
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
if (ret) {
- xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
- action, ERR_PTR(ret));
+ xe_gt_err(gt, "G2H action %#04x failed (%pe) len %u msg %*ph\n",
+ action, ERR_PTR(ret), hxg_len, (int)sizeof(u32) * hxg_len, hxg);
CT_DEAD(ct, NULL, PROCESS_FAILED);
}
@@ -1359,7 +1383,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
* this function and nowhere else. Hence, they cannot be different
* unless two g2h_read calls are running concurrently. Which is not
* possible because it is guarded by ct->fast_lock. And yet, some
- * discrete platforms are reguarly hitting this error :(.
+ * discrete platforms are regularly hitting this error :(.
*
* desc_head rolling backwards shouldn't cause any noticeable
* problems - just a delay in GuC being allowed to proceed past that
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 08ffe59f22fa..057153f89b30 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -17,6 +17,7 @@
#define G2H_LEN_DW_TLB_INVALIDATE 3
#define GUC_ID_MAX 65535
+#define GUC_ID_UNKNOWN 0xffffffff
#define GUC_CONTEXT_DISABLE 0
#define GUC_CONTEXT_ENABLE 1
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
index 9d99fe266d97..146a6eda9e06 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
@@ -49,6 +49,8 @@ const char *xe_guc_klv_key_to_string(u16 key)
return "begin_db_id";
case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY:
return "begin_ctx_id";
+ case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY:
+ return "sched_priority";
/* VF CFG threshold keys */
#define define_threshold_key_to_string_case(TAG, NAME, ...) \
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index e8b9faeaef64..df7f130fb663 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -38,6 +38,7 @@
#define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
#define RPE_MASK REG_GENMASK(15, 8)
+#define RPA_MASK REG_GENMASK(31, 16)
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define CAGF_MASK REG_GENMASK(19, 11)
@@ -328,6 +329,19 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
freq);
}
+static void mtl_update_rpa_value(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+
+ if (xe_gt_is_media_type(gt))
+ reg = xe_mmio_read32(&gt->mmio, MTL_MPA_FREQUENCY);
+ else
+ reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPA_FREQUENCY);
+
+ pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg));
+}
+
static void mtl_update_rpe_value(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
@@ -341,6 +355,25 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
}
+static void tgl_update_rpa_value(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 reg;
+
+ /*
+ * For PVC we still need to use fused RP1 as the approximation for RPe
+ * For other platforms than PVC we get the resolved RPe directly from
+ * PCODE at a different register
+ */
+ if (xe->info.platform == XE_PVC)
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
+ else
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
+
+ pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
static void tgl_update_rpe_value(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
@@ -365,10 +398,13 @@ static void pc_update_rp_values(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
struct xe_device *xe = gt_to_xe(gt);
- if (GRAPHICS_VERx100(xe) >= 1270)
+ if (GRAPHICS_VERx100(xe) >= 1270) {
+ mtl_update_rpa_value(pc);
mtl_update_rpe_value(pc);
- else
+ } else {
+ tgl_update_rpa_value(pc);
tgl_update_rpe_value(pc);
+ }
/*
* RPe is decided at runtime by PCODE. In the rare case where that's
@@ -421,8 +457,8 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -ETIMEDOUT;
}
@@ -448,6 +484,19 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
}
/**
+ * xe_guc_pc_get_rpa_freq - Get the RPa freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPa freq.
+ */
+u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc)
+{
+ pc_update_rp_values(pc);
+
+ return pc->rpa_freq;
+}
+
+/**
* xe_guc_pc_get_rpe_freq - Get the RPe freq
* @pc: The GuC PC
*
@@ -481,10 +530,10 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
*/
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
- struct xe_gt *gt = pc_to_gt(pc);
- unsigned int fw_ref;
int ret;
+ xe_device_assert_mem_access(pc_to_xe(pc));
+
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -492,24 +541,12 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
goto out;
}
- /*
- * GuC SLPC plays with min freq request when GuCRC is enabled
- * Block RC6 for a more reliable read.
- */
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- ret = -ETIMEDOUT;
- goto fw;
- }
-
ret = pc_action_query_task_state(pc);
if (ret)
- goto fw;
+ goto out;
*freq = pc_get_min_freq(pc);
-fw:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
mutex_unlock(&pc->freq_lock);
return ret;
@@ -969,8 +1006,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -ETIMEDOUT;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index efda432fadfc..619f59cd633c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -21,6 +21,7 @@ int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);
u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc);
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 13810be015db..2978ac9a249b 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -17,6 +17,8 @@ struct xe_guc_pc {
struct xe_bo *bo;
/** @rp0_freq: HW RP0 frequency - The Maximum one */
u32 rp0_freq;
+ /** @rpa_freq: HW RPa frequency - The Achievable one */
+ u32 rpa_freq;
/** @rpe_freq: HW RPe frequency - The Efficient one */
u32 rpe_freq;
/** @rpn_freq: HW RPN frequency - The Minimum one */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 6f4a9812b4f4..913c74d6e2ae 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -412,12 +412,11 @@ static const int xe_exec_queue_prio_to_guc[] = {
static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
{
struct exec_queue_policy policy;
- struct xe_device *xe = guc_to_xe(guc);
enum xe_exec_queue_priority prio = q->sched_props.priority;
u32 timeslice_us = q->sched_props.timeslice_us;
u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
- xe_assert(xe, exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
@@ -451,12 +450,11 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
struct guc_ctxt_registration_info *info)
{
#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
- struct xe_device *xe = guc_to_xe(guc);
u32 action[MAX_MLRC_REG_SIZE];
int len = 0;
int i;
- xe_assert(xe, xe_exec_queue_is_parallel(q));
+ xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
action[len++] = info->flags;
@@ -479,7 +477,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
}
- xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
+ xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
#undef MAX_MLRC_REG_SIZE
xe_guc_ct_send(&guc->ct, action, len, 0, 0);
@@ -513,7 +511,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
struct xe_lrc *lrc = q->lrc[0];
struct guc_ctxt_registration_info info;
- xe_assert(xe, !exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
memset(&info, 0, sizeof(info));
info.context_idx = q->guc->id;
@@ -603,7 +601,7 @@ static int wq_noop_append(struct xe_exec_queue *q)
if (wq_wait_for_space(q, wq_space_until_wrap(q)))
return -ENODEV;
- xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
+ xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
@@ -643,13 +641,13 @@ static void wq_item_append(struct xe_exec_queue *q)
wqi[i++] = lrc->ring.tail / sizeof(u64);
}
- xe_assert(xe, i == wqi_size / sizeof(u32));
+ xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
wq[q->guc->wqi_tail / sizeof(u32)]));
xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
q->guc->wqi_tail += wqi_size;
- xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
+ xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
xe_device_wmb(xe);
@@ -661,7 +659,6 @@ static void wq_item_append(struct xe_exec_queue *q)
static void submit_exec_queue(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_lrc *lrc = q->lrc[0];
u32 action[3];
u32 g2h_len = 0;
@@ -669,7 +666,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
int len = 0;
bool extra_submit = false;
- xe_assert(xe, exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
if (xe_exec_queue_is_parallel(q))
wq_item_append(q);
@@ -716,12 +713,11 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
struct xe_sched_job *job = to_xe_sched_job(drm_job);
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct dma_fence *fence = NULL;
bool lr = xe_exec_queue_is_lr(q);
- xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
- exec_queue_banned(q) || exec_queue_suspended(q));
+ xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
+ exec_queue_banned(q) || exec_queue_suspended(q));
trace_xe_sched_job_run(job);
@@ -823,7 +819,7 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
*/
void xe_guc_submit_wedge(struct xe_guc *guc)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
int err;
@@ -833,7 +829,8 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
- drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
+ xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
+ "Although device is wedged.\n");
return;
}
@@ -865,11 +862,10 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
container_of(w, struct xe_guc_exec_queue, lr_tdr);
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_gpu_scheduler *sched = &ge->sched;
bool wedged;
- xe_assert(xe, xe_exec_queue_is_lr(q));
+ xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
trace_xe_exec_queue_lr_cleanup(q);
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
@@ -903,13 +899,19 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
!exec_queue_pending_disable(q) ||
xe_guc_read_stopped(guc), HZ * 5);
if (!ret) {
- drm_warn(&xe->drm, "Schedule disable failed to respond");
+ xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
+ q->guc->id);
+ xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n",
+ q->guc->id);
xe_sched_submission_start(sched);
xe_gt_reset_async(q->gt);
return;
}
}
+ if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
+ xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
+
xe_sched_submission_start(sched);
}
@@ -1068,13 +1070,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* do manual capture first and decide later if we need to use it
*/
if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
- !xe_guc_capture_get_matching_and_lock(job)) {
+ !xe_guc_capture_get_matching_and_lock(q)) {
/* take force wake before engine register manual capture */
fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
- xe_engine_snapshot_capture_for_job(job);
+ xe_engine_snapshot_capture_for_queue(q);
xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
}
@@ -1132,7 +1134,12 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
if (!ret || xe_guc_read_stopped(guc)) {
trigger_reset:
if (!ret)
- xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
+ xe_gt_warn(guc_to_gt(guc),
+ "Schedule disable failed to respond, guc_id=%d",
+ q->guc->id);
+ xe_devcoredump(q, job,
+ "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
+ q->guc->id, ret, xe_guc_read_stopped(guc));
set_exec_queue_extra_ref(q);
xe_exec_queue_get(q); /* GT reset owns this */
set_exec_queue_banned(q);
@@ -1162,7 +1169,10 @@ trigger_reset:
trace_xe_sched_job_timedout(job);
if (!exec_queue_killed(q))
- xe_devcoredump(job);
+ xe_devcoredump(q, job,
+ "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
+ xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+ q->guc->id, q->flags);
/*
* Kernel jobs should never fail, nor should VM jobs if they do
@@ -1216,7 +1226,7 @@ sched_enable:
enable_scheduling(q);
rearm:
/*
- * XXX: Ideally want to adjust timeout based on current exection time
+ * XXX: Ideally want to adjust timeout based on current execution time
* but there is not currently an easy way to do in DRM scheduler. With
* some thought, do this in a follow up.
*/
@@ -1277,9 +1287,8 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
{
struct xe_exec_queue *q = msg->private_data;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
+ xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
trace_xe_exec_queue_cleanup_entity(q);
if (exec_queue_registered(q))
@@ -1315,11 +1324,10 @@ static void __suspend_fence_signal(struct xe_exec_queue *q)
static void suspend_fence_signal(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
- xe_guc_read_stopped(guc));
- xe_assert(xe, q->guc->suspend_pending);
+ xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
+ xe_guc_read_stopped(guc));
+ xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
__suspend_fence_signal(q);
}
@@ -1415,12 +1423,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_guc_exec_queue *ge;
long timeout;
int err, i;
- xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
+ xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
ge = kzalloc(sizeof(*ge), GFP_KERNEL);
if (!ge)
@@ -1633,9 +1640,8 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, !q->guc->suspend_pending);
+ xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
xe_sched_msg_lock(sched);
guc_exec_queue_try_add_msg(q, msg, RESUME);
@@ -1708,7 +1714,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
ban = true;
}
} else if (xe_exec_queue_is_lr(q) &&
- (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
+ !xe_lrc_ring_is_idle(q->lrc[0])) {
ban = true;
}
@@ -1747,9 +1753,8 @@ void xe_guc_submit_stop(struct xe_guc *guc)
{
struct xe_exec_queue *q;
unsigned long index;
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, xe_guc_read_stopped(guc) == 1);
+ xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
@@ -1791,9 +1796,8 @@ int xe_guc_submit_start(struct xe_guc *guc)
{
struct xe_exec_queue *q;
unsigned long index;
- struct xe_device *xe = guc_to_xe(guc);
- xe_assert(xe, xe_guc_read_stopped(guc) == 1);
+ xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
@@ -1814,22 +1818,22 @@ int xe_guc_submit_start(struct xe_guc *guc)
static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
if (unlikely(guc_id >= GUC_ID_MAX)) {
- drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+ xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
return NULL;
}
q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
if (unlikely(!q)) {
- drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+ xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id);
return NULL;
}
- xe_assert(xe, guc_id >= q->guc->id);
- xe_assert(xe, guc_id < (q->guc->id + q->width));
+ xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
+ xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
return q;
}
@@ -1898,15 +1902,14 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
- u32 runnable_state = msg[1];
+ u32 guc_id, runnable_state;
- if (unlikely(len < 2)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 2))
return -EPROTO;
- }
+
+ guc_id = msg[0];
+ runnable_state = msg[1];
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
@@ -1940,14 +1943,13 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
+ u32 guc_id;
- if (unlikely(len < 1)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 1))
return -EPROTO;
- }
+
+ guc_id = msg[0];
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
@@ -1969,14 +1971,13 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
+ u32 guc_id;
- if (unlikely(len < 1)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 1))
return -EPROTO;
- }
+
+ guc_id = msg[0];
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
@@ -2016,10 +2017,8 @@ int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
u32 status;
- if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) {
- xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
+ if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
return -EPROTO;
- }
status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
@@ -2034,13 +2033,21 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
- u32 guc_id = msg[0];
+ u32 guc_id;
- if (unlikely(len < 1)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len < 1))
return -EPROTO;
+
+ guc_id = msg[0];
+
+ if (guc_id == GUC_ID_UNKNOWN) {
+ /*
+ * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
+ * context. In such case only PF will be notified about that fault.
+ */
+ xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
+ return 0;
}
q = g2h_exec_queue_lookup(guc, guc_id);
@@ -2062,24 +2069,22 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
- if (unlikely(len != 3)) {
- drm_err(&xe->drm, "Invalid length %u", len);
+ if (unlikely(len != 3))
return -EPROTO;
- }
guc_class = msg[0];
instance = msg[1];
reason = msg[2];
/* Unexpected failure of a hardware feature, log an actual error */
- drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
- guc_class, instance, reason);
+ xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
+ guc_class, instance, reason);
- xe_gt_reset_async(guc_to_gt(guc));
+ xe_gt_reset_async(gt);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index fa75f57bf5da..83a41ebcdc91 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -64,6 +64,15 @@ struct xe_guc {
struct xe_guc_pc pc;
/** @dbm: GuC Doorbell Manager */
struct xe_guc_db_mgr dbm;
+
+ /** @g2g: GuC to GuC communication state */
+ struct {
+ /** @g2g.bo: Storage for GuC to GuC communication channels */
+ struct xe_bo *bo;
+ /** @g2g.owned: Is the BO owned by this GT or just mapped in */
+ bool owned;
+ } g2g;
+
/** @submission_state: GuC submission state */
struct {
/** @submission_state.idm: GuC context ID Manager */
@@ -79,6 +88,7 @@ struct xe_guc {
/** @submission_state.fini_wq: submit fini wait queue */
wait_queue_head_t fini_wq;
} submission_state;
+
/** @hwconfig: Hardware config state */
struct {
/** @hwconfig.bo: buffer object of the hardware config */
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 65b2e147c4b9..d765bfd3636b 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe)
{
struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
- if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
+ if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi)
return;
if (heci_gsc->adev) {
@@ -177,7 +177,7 @@ void xe_heci_gsc_init(struct xe_device *xe)
const struct heci_gsc_def *def;
int ret;
- if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
+ if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi)
return;
heci_gsc->irq = -1;
@@ -222,7 +222,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
if ((iir & GSC_IRQ_INTF(1)) == 0)
return;
- if (!HAS_HECI_GSCFI(xe)) {
+ if (!xe->info.has_heci_gscfi) {
drm_warn_once(&xe->drm, "GSC irq: not supported");
return;
}
@@ -242,7 +242,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir)
if ((iir & CSC_IRQ_INTF(1)) == 0)
return;
- if (!HAS_HECI_CSCFI(xe)) {
+ if (!xe->info.has_heci_cscfi) {
drm_warn_once(&xe->drm, "CSC irq: not supported");
return;
}
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index 2c32dc46f7d4..089834467880 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -159,7 +159,7 @@ void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
* This function allocates the storage of the userptr sg table.
* It is caller's responsibility to free it calling sg_free_table.
*
- * returns: 0 for succuss; negative error no on failure
+ * returns: 0 for success; negative error no on failure
*/
int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
bool is_mm_mmap_locked)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1557acee3523..fc447751fe78 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -324,6 +324,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
{
u32 ccs_mask =
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+ u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
@@ -332,8 +333,10 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
xe_bo_ggtt_addr(hwe->hwsp));
- xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
- _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+ if (xe_device_has_msix(gt_to_xe(hwe->gt)))
+ ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
+ xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode);
xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
_MASKED_BIT_DISABLE(STOP_RING));
xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
@@ -419,7 +422,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
* Bspec: 72161
*/
const u8 mocs_write_idx = gt->mocs.uc_index;
- const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+ const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) &&
(GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
gt->mocs.wb_index : gt->mocs.uc_index;
u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
@@ -574,7 +577,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
- xe_reg_sr_apply_whitelist(hwe);
hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
@@ -773,7 +775,7 @@ static void check_gsc_availability(struct xe_gt *gt)
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
- drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
+ drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n");
}
}
@@ -829,7 +831,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
/**
* xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
* @hwe: Xe HW Engine.
- * @job: The job object.
+ * @q: The exec queue object.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -838,7 +840,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
* caller, using `xe_hw_engine_snapshot_free`.
*/
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q)
{
struct xe_hw_engine_snapshot *snapshot;
struct __guc_capture_parsed_output *node;
@@ -864,15 +866,14 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job
if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
return snapshot;
- if (job) {
+ if (q) {
/* If got guc capture, set source to GuC */
- node = xe_guc_capture_get_matching_and_lock(job);
+ node = xe_guc_capture_get_matching_and_lock(q);
if (node) {
struct xe_device *xe = gt_to_xe(hwe->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
coredump->snapshot.matched_node = node;
- snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
return snapshot;
}
@@ -880,7 +881,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job
/* otherwise, do manual capture */
xe_engine_manual_capture(hwe, snapshot);
- snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
return snapshot;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index da0a6922a26f..6b5f9fa2a594 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -11,7 +11,7 @@
struct drm_printer;
struct drm_xe_engine_class_instance;
struct xe_device;
-struct xe_sched_job;
+struct xe_exec_queue;
#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
@@ -56,7 +56,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
enum xe_engine_class engine_class);
struct xe_hw_engine_snapshot *
-xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job);
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q);
void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 719f27ef00a5..e4191a7a2c31 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -106,7 +106,7 @@ struct xe_hw_engine_class_intf {
* Contains all the hardware engine state for physical instances.
*/
struct xe_hw_engine {
- /** @gt: graphics tile this hw engine belongs to */
+ /** @gt: GT structure this hw engine belongs to */
struct xe_gt *gt;
/** @name: name of this hw engine */
const char *name;
@@ -165,8 +165,6 @@ enum xe_hw_engine_snapshot_source_id {
struct xe_hw_engine_snapshot {
/** @name: name of the hw engine */
char *name;
- /** @source: Data source, either manual or GuC */
- enum xe_hw_engine_snapshot_source_id source;
/** @hwe: hw engine */
struct xe_hw_engine *hwe;
/** @logical_instance: logical instance of this hw engine */
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
index 364a61f4bfda..58a8d09afe5c 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -41,7 +41,7 @@ struct xe_hw_fence_irq {
* to a xe_hw_fence_irq, maintains serial seqno.
*/
struct xe_hw_fence_ctx {
- /** @gt: graphics tile of hardware fence context */
+ /** @gt: GT structure of hardware fence context */
struct xe_gt *gt;
/** @irq: fence irq handler */
struct xe_hw_fence_irq *irq;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index b7995ebd54ab..32f5a67a917b 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -10,6 +10,7 @@
#include <drm/drm_managed.h>
#include "display/xe_display.h"
+#include "regs/xe_guc_regs.h"
#include "regs/xe_irq_regs.h"
#include "xe_device.h"
#include "xe_drv.h"
@@ -29,6 +30,11 @@
#define IIR(offset) XE_REG(offset + 0x8)
#define IER(offset) XE_REG(offset + 0xc)
+static int xe_irq_msix_init(struct xe_device *xe);
+static void xe_irq_msix_free(struct xe_device *xe);
+static int xe_irq_msix_request_irqs(struct xe_device *xe);
+static void xe_irq_msix_synchronize_irq(struct xe_device *xe);
+
static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg)
{
u32 val = xe_mmio_read32(mmio, reg);
@@ -192,7 +198,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
gsc_mask = irqs | GSC_ER_COMPLETE;
heci_mask = GSC_IRQ_INTF(1);
- } else if (HAS_HECI_GSCFI(xe)) {
+ } else if (xe->info.has_heci_gscfi) {
gsc_mask = GSC_IRQ_INTF(1);
}
@@ -325,7 +331,7 @@ static void gt_irq_handler(struct xe_tile *tile,
if (class == XE_ENGINE_CLASS_OTHER) {
/* HECI GSCFI interrupts come from outside of GT */
- if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE)
+ if (xe->info.has_heci_gscfi && instance == OTHER_GSC_INSTANCE)
xe_heci_gsc_irq_handler(xe, intr_vec);
else
gt_other_irq_handler(engine_gt, instance, intr_vec);
@@ -348,12 +354,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
unsigned long intr_dw[2];
u32 identity[32];
- spin_lock(&xe->irq.lock);
- if (!xe->irq.enabled) {
- spin_unlock(&xe->irq.lock);
+ if (!atomic_read(&xe->irq.enabled))
return IRQ_NONE;
- }
- spin_unlock(&xe->irq.lock);
master_ctl = xelp_intr_disable(xe);
if (!master_ctl) {
@@ -417,12 +419,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
/* TODO: This really shouldn't be copied+pasted */
- spin_lock(&xe->irq.lock);
- if (!xe->irq.enabled) {
- spin_unlock(&xe->irq.lock);
+ if (!atomic_read(&xe->irq.enabled))
return IRQ_NONE;
- }
- spin_unlock(&xe->irq.lock);
master_tile_ctl = dg1_intr_disable(xe);
if (!master_tile_ctl) {
@@ -459,7 +457,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
* the primary tile.
*/
if (id == 0) {
- if (HAS_HECI_CSCFI(xe))
+ if (xe->info.has_heci_cscfi)
xe_heci_csc_irq_handler(xe, master_ctl);
xe_display_irq_handler(xe, master_ctl);
gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
@@ -508,7 +506,7 @@ static void gt_irq_reset(struct xe_tile *tile)
if ((tile->media_gt &&
xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
- HAS_HECI_GSCFI(tile_to_xe(tile))) {
+ tile_to_xe(tile)->info.has_heci_gscfi) {
xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0);
@@ -580,6 +578,11 @@ static void xe_irq_reset(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return vf_irq_reset(xe);
+ if (xe_device_uses_memirq(xe)) {
+ for_each_tile(tile, xe, id)
+ xe_memirq_reset(&tile->memirq);
+ }
+
for_each_tile(tile, xe, id) {
if (GRAPHICS_VERx100(xe) >= 1210)
dg1_irq_reset(tile);
@@ -622,6 +625,14 @@ static void xe_irq_postinstall(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return vf_irq_postinstall(xe);
+ if (xe_device_uses_memirq(xe)) {
+ struct xe_tile *tile;
+ unsigned int id;
+
+ for_each_tile(tile, xe, id)
+ xe_memirq_postinstall(&tile->memirq);
+ }
+
xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
/*
@@ -644,12 +655,8 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
struct xe_tile *tile;
unsigned int id;
- spin_lock(&xe->irq.lock);
- if (!xe->irq.enabled) {
- spin_unlock(&xe->irq.lock);
+ if (!atomic_read(&xe->irq.enabled))
return IRQ_NONE;
- }
- spin_unlock(&xe->irq.lock);
for_each_tile(tile, xe, id)
xe_memirq_handler(&tile->memirq);
@@ -668,63 +675,85 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe)
return xelp_irq_handler;
}
-static void irq_uninstall(void *arg)
+static int xe_irq_msi_request_irqs(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ irq_handler_t irq_handler;
+ int irq, err;
+
+ irq_handler = xe_irq_handler(xe);
+ if (!irq_handler) {
+ drm_err(&xe->drm, "No supported interrupt handler");
+ return -EINVAL;
+ }
+
+ irq = pci_irq_vector(pdev, 0);
+ err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
+ if (err < 0) {
+ drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void xe_irq_msi_free(struct xe_device *xe)
{
- struct xe_device *xe = arg;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int irq;
- if (!xe->irq.enabled)
+ irq = pci_irq_vector(pdev, 0);
+ free_irq(irq, xe);
+}
+
+static void irq_uninstall(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ if (!atomic_xchg(&xe->irq.enabled, 0))
return;
- xe->irq.enabled = false;
xe_irq_reset(xe);
- irq = pci_irq_vector(pdev, 0);
- free_irq(irq, xe);
+ if (xe_device_has_msix(xe))
+ xe_irq_msix_free(xe);
+ else
+ xe_irq_msi_free(xe);
+}
+
+int xe_irq_init(struct xe_device *xe)
+{
+ spin_lock_init(&xe->irq.lock);
+
+ return xe_irq_msix_init(xe);
}
int xe_irq_install(struct xe_device *xe)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- unsigned int irq_flags = PCI_IRQ_MSIX;
- irq_handler_t irq_handler;
- int err, irq, nvec;
-
- irq_handler = xe_irq_handler(xe);
- if (!irq_handler) {
- drm_err(&xe->drm, "No supported interrupt handler");
- return -EINVAL;
- }
+ unsigned int irq_flags = PCI_IRQ_MSI;
+ int nvec = 1;
+ int err;
xe_irq_reset(xe);
- nvec = pci_msix_vec_count(pdev);
- if (nvec <= 0) {
- if (nvec == -EINVAL) {
- /* MSIX capability is not supported in the device, using MSI */
- irq_flags = PCI_IRQ_MSI;
- nvec = 1;
- } else {
- drm_err(&xe->drm, "MSIX: Failed getting count\n");
- return nvec;
- }
+ if (xe_device_has_msix(xe)) {
+ nvec = xe->irq.msix.nvec;
+ irq_flags = PCI_IRQ_MSIX;
}
err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags);
if (err < 0) {
- drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
+ drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err);
return err;
}
- irq = pci_irq_vector(pdev, 0);
- err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
- if (err < 0) {
- drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
+ err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) :
+ xe_irq_msi_request_irqs(xe);
+ if (err)
return err;
- }
- xe->irq.enabled = true;
+ atomic_set(&xe->irq.enabled, 1);
xe_irq_postinstall(xe);
@@ -735,20 +764,28 @@ int xe_irq_install(struct xe_device *xe)
return 0;
free_irq_handler:
- free_irq(irq, xe);
+ if (xe_device_has_msix(xe))
+ xe_irq_msix_free(xe);
+ else
+ xe_irq_msi_free(xe);
return err;
}
-void xe_irq_suspend(struct xe_device *xe)
+static void xe_irq_msi_synchronize_irq(struct xe_device *xe)
{
- int irq = to_pci_dev(xe->drm.dev)->irq;
+ synchronize_irq(to_pci_dev(xe->drm.dev)->irq);
+}
- spin_lock_irq(&xe->irq.lock);
- xe->irq.enabled = false; /* no new irqs */
- spin_unlock_irq(&xe->irq.lock);
+void xe_irq_suspend(struct xe_device *xe)
+{
+ atomic_set(&xe->irq.enabled, 0); /* no new irqs */
- synchronize_irq(irq); /* flush irqs */
+ /* flush irqs */
+ if (xe_device_has_msix(xe))
+ xe_irq_msix_synchronize_irq(xe);
+ else
+ xe_irq_msi_synchronize_irq(xe);
xe_irq_reset(xe); /* turn irqs off */
}
@@ -762,10 +799,205 @@ void xe_irq_resume(struct xe_device *xe)
* 1. no irq will arrive before the postinstall
* 2. display is not yet resumed
*/
- xe->irq.enabled = true;
+ atomic_set(&xe->irq.enabled, 1);
xe_irq_reset(xe);
xe_irq_postinstall(xe); /* turn irqs on */
for_each_gt(gt, xe, id)
xe_irq_enable_hwe(gt);
}
+
+/* MSI-X related definitions and functions below. */
+
+enum xe_irq_msix_static {
+ GUC2HOST_MSIX = 0,
+ DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX,
+ /* Must be last */
+ NUM_OF_STATIC_MSIX,
+};
+
+static int xe_irq_msix_init(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int nvec = pci_msix_vec_count(pdev);
+
+ if (nvec == -EINVAL)
+ return 0; /* MSI */
+
+ if (nvec < 0) {
+ drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec);
+ return nvec;
+ }
+
+ xe->irq.msix.nvec = nvec;
+ xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC);
+ return 0;
+}
+
+static irqreturn_t guc2host_irq_handler(int irq, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_tile *tile;
+ u8 id;
+
+ if (!atomic_read(&xe->irq.enabled))
+ return IRQ_NONE;
+
+ for_each_tile(tile, xe, id)
+ xe_guc_irq_handler(&tile->primary_gt->uc.guc,
+ GUC_INTR_GUC2HOST);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg)
+{
+ unsigned int tile_id, gt_id;
+ struct xe_device *xe = arg;
+ struct xe_memirq *memirq;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_tile *tile;
+ struct xe_gt *gt;
+
+ if (!atomic_read(&xe->irq.enabled))
+ return IRQ_NONE;
+
+ for_each_tile(tile, xe, tile_id) {
+ memirq = &tile->memirq;
+ if (!memirq->bo)
+ continue;
+
+ for_each_gt(gt, xe, gt_id) {
+ if (gt->tile != tile)
+ continue;
+
+ for_each_hw_engine(hwe, gt, id)
+ xe_memirq_hwe_handler(memirq, hwe);
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf,
+ bool dynamic_msix, u16 *msix)
+{
+ struct xa_limit limit;
+ int ret;
+ u32 id;
+
+ limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) :
+ XA_LIMIT(*msix, *msix);
+ ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ if (dynamic_msix)
+ *msix = id;
+
+ return 0;
+}
+
+static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix)
+{
+ xa_erase(&xe->irq.msix.indexes, msix);
+}
+
+static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler,
+ void *irq_buf, const char *name, u16 msix)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int ret, irq;
+
+ irq = pci_irq_vector(pdev, msix);
+ if (irq < 0)
+ return irq;
+
+ ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf,
+ const char *name, bool dynamic_msix, u16 *msix)
+{
+ int ret;
+
+ ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix);
+ if (ret)
+ return ret;
+
+ ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix);
+ if (ret) {
+ drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix);
+ xe_irq_msix_release_vector(xe, *msix);
+ return ret;
+ }
+
+ return 0;
+}
+
+void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int irq;
+ void *irq_buf;
+
+ irq_buf = xa_load(&xe->irq.msix.indexes, msix);
+ if (!irq_buf)
+ return;
+
+ irq = pci_irq_vector(pdev, msix);
+ if (irq < 0) {
+ drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix);
+ return;
+ }
+
+ free_irq(irq, irq_buf);
+ xe_irq_msix_release_vector(xe, msix);
+}
+
+int xe_irq_msix_request_irqs(struct xe_device *xe)
+{
+ int err;
+ u16 msix;
+
+ msix = GUC2HOST_MSIX;
+ err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe,
+ DRIVER_NAME "-guc2host", false, &msix);
+ if (err)
+ return err;
+
+ msix = DEFAULT_MSIX;
+ err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe,
+ DRIVER_NAME "-default-msix", false, &msix);
+ if (err) {
+ xe_irq_msix_free_irq(xe, GUC2HOST_MSIX);
+ return err;
+ }
+
+ return 0;
+}
+
+void xe_irq_msix_free(struct xe_device *xe)
+{
+ unsigned long msix;
+ u32 *dummy;
+
+ xa_for_each(&xe->irq.msix.indexes, msix, dummy)
+ xe_irq_msix_free_irq(xe, msix);
+ xa_destroy(&xe->irq.msix.indexes);
+}
+
+void xe_irq_msix_synchronize_irq(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ unsigned long msix;
+ u32 *dummy;
+
+ xa_for_each(&xe->irq.msix.indexes, msix, dummy)
+ synchronize_irq(pci_irq_vector(pdev, msix));
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
index 067514e13675..a28bd577ba52 100644
--- a/drivers/gpu/drm/xe/xe_irq.h
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -6,13 +6,21 @@
#ifndef _XE_IRQ_H_
#define _XE_IRQ_H_
+#include <linux/interrupt.h>
+
+#define XE_IRQ_DEFAULT_MSIX 1
+
struct xe_device;
struct xe_tile;
struct xe_gt;
+int xe_irq_init(struct xe_device *xe);
int xe_irq_install(struct xe_device *xe);
void xe_irq_suspend(struct xe_device *xe);
void xe_irq_resume(struct xe_device *xe);
void xe_irq_enable_hwe(struct xe_gt *gt);
+int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf,
+ const char *name, bool dynamic_msix, u16 *msix);
+void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix);
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 4f64c7f4e68d..bbb9ffbf6367 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -25,6 +25,7 @@
#include "xe_map.h"
#include "xe_memirq.h"
#include "xe_sriov.h"
+#include "xe_trace_lrc.h"
#include "xe_vm.h"
#include "xe_wa.h"
@@ -583,6 +584,7 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
{
struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
struct xe_device *xe = gt_to_xe(hwe->gt);
+ u8 num_regs;
if (!xe_device_uses_memirq(xe))
return;
@@ -592,12 +594,18 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
- regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+ num_regs = xe_device_has_msix(xe) ? 3 : 2;
+ regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
+
+ if (xe_device_has_msix(xe)) {
+ regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
+ /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
+ }
}
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
@@ -875,7 +883,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
+ struct xe_vm *vm, u32 ring_size, u16 msix_vec)
{
struct xe_gt *gt = hwe->gt;
struct xe_tile *tile = gt_to_tile(gt);
@@ -944,6 +952,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_drm_client_add_bo(vm->xef->client, lrc->bo);
}
+ if (xe_device_has_msix(xe)) {
+ xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
+ xe_memirq_status_ptr(&tile->memirq, hwe));
+ xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
+ xe_memirq_source_ptr(&tile->memirq, hwe));
+ xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
+ }
+
if (xe_gt_has_indirect_ring_state(gt)) {
xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
__xe_lrc_indirect_ring_ggtt_addr(lrc));
@@ -1004,6 +1020,7 @@ err_lrc_finish:
* @hwe: Hardware Engine
* @vm: The VM (address space)
* @ring_size: LRC ring size
+ * @msix_vec: MSI-X interrupt vector (for platforms that support it)
*
* Allocate and initialize the Logical Ring Context (LRC).
*
@@ -1011,7 +1028,7 @@ err_lrc_finish:
* upon failure.
*/
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
+ u32 ring_size, u16 msix_vec)
{
struct xe_lrc *lrc;
int err;
@@ -1020,7 +1037,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
if (!lrc)
return ERR_PTR(-ENOMEM);
- err = xe_lrc_init(lrc, hwe, vm, ring_size);
+ err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec);
if (err) {
kfree(lrc);
return ERR_PTR(err);
@@ -1060,6 +1077,14 @@ u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
}
+static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
+{
+ if (xe_lrc_has_indirect_ring_state(lrc))
+ return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
+ else
+ return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
+}
+
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
{
if (xe_lrc_has_indirect_ring_state(lrc))
@@ -1635,10 +1660,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
xe_vm_get(lrc->bo->vm);
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
+ snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc);
snapshot->tail.internal = lrc->ring.tail;
snapshot->tail.memory = xe_lrc_ring_tail(lrc);
+ snapshot->start = xe_lrc_ring_start(lrc);
snapshot->start_seqno = xe_lrc_start_seqno(lrc);
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
@@ -1692,11 +1719,14 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
return;
drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
+ drm_printf(p, "\tHW Ring address: 0x%08x\n",
+ snapshot->ring_addr);
drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
snapshot->indirect_context_desc);
drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
snapshot->tail.internal, snapshot->tail.memory);
+ drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
@@ -1758,5 +1788,20 @@ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ trace_xe_lrc_update_timestamp(lrc, *old_ts);
+
return lrc->ctx_timestamp;
}
+
+/**
+ * xe_lrc_ring_is_idle() - LRC is idle
+ * @lrc: Pointer to the lrc.
+ *
+ * Compare LRC ring head and tail to determine if idle.
+ *
+ * Return: True is ring is idle, False otherwise
+ */
+bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
+{
+ return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 40d8f6906d3e..4206e6a8b50a 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -25,8 +25,10 @@ struct xe_lrc_snapshot {
unsigned long lrc_size, lrc_offset;
u32 context_desc;
+ u32 ring_addr;
u32 indirect_context_desc;
u32 head;
+ u32 start;
struct {
u32 internal;
u32 memory;
@@ -40,7 +42,7 @@ struct xe_lrc_snapshot {
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size);
+ u32 ring_size, u16 msix_vec);
void xe_lrc_destroy(struct kref *ref);
/**
@@ -78,6 +80,8 @@ u32 xe_lrc_ring_head(struct xe_lrc *lrc);
u32 xe_lrc_ring_space(struct xe_lrc *lrc);
void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+bool xe_lrc_ring_is_idle(struct xe_lrc *lrc);
+
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
u32 *xe_lrc_regs(struct xe_lrc *lrc);
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
index daf56c846d03..8a77c2423555 100644
--- a/drivers/gpu/drm/xe/xe_macros.h
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -10,9 +10,13 @@
#define XE_WARN_ON WARN_ON
-#define XE_IOCTL_DBG(xe, cond) \
- ((cond) && (drm_dbg(&(xe)->drm, \
- "Ioctl argument check failed at %s:%d: %s", \
- __FILE__, __LINE__, #cond), 1))
+#define XE_IOCTL_DBG(xe, cond) ({ \
+ int cond__ = !!(cond); \
+ if (cond__) \
+ drm_dbg(&(xe)->drm, \
+ "Ioctl argument check failed at %s:%d: %s", \
+ __FILE__, __LINE__, #cond); \
+ cond__; \
+})
#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index f833da88150a..404fa2a456d5 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -155,13 +155,6 @@ static const char *guc_name(struct xe_guc *guc)
*
*/
-static void __release_xe_bo(struct drm_device *drm, void *arg)
-{
- struct xe_bo *bo = arg;
-
- xe_bo_unpin_map_no_vm(bo);
-}
-
static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq)
{
/*
@@ -184,14 +177,12 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64));
BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K));
- /* XXX: convert to managed bo */
- bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE |
- XE_BO_FLAG_NEEDS_UC |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ bo = xe_managed_bo_create_pin_map(xe, tile, bo_size,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_NEEDS_UC |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out;
@@ -215,7 +206,7 @@ static int memirq_alloc_pages(struct xe_memirq *memirq)
xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0),
XE_MEMIRQ_STATUS_OFFSET(0));
- return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
+ return 0;
out:
memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
@@ -442,6 +433,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+
+ if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name))
+ xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 1b97d90aadda..278bc96cf593 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1506,7 +1506,7 @@ err_bb:
* using the default engine for the updates, they will be performed in the
* order they grab the job_mutex. If different engines are used, external
* synchronization is needed for overlapping updates to maintain page-table
- * consistency. Note that the meaing of "overlapping" is that the updates
+ * consistency. Note that the meaning of "overlapping" is that the updates
* touch the same page-table, which might be a higher-level page-directory.
* If no pipelining is needed, then updates may be performed by the cpu.
*
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index bfc3deebdaa2..07b27114be9a 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -19,7 +19,7 @@
struct xe_modparam xe_modparam = {
.probe_display = true,
- .guc_log_level = 5,
+ .guc_log_level = 3,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
.wedged_mode = 1,
/* the rest are 0 by default */
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 5cc0f6f9bc11..eeb96b5f49e2 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -16,7 +16,6 @@
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
-#include "regs/xe_lrc_layout.h"
#include "regs/xe_oa_regs.h"
#include "xe_assert.h"
#include "xe_bb.h"
@@ -28,7 +27,6 @@
#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_guc_pc.h"
-#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_oa.h"
@@ -90,6 +88,8 @@ struct xe_oa_open_param {
struct drm_xe_sync __user *syncs_user;
int num_syncs;
struct xe_sync_entry *syncs;
+ size_t oa_buffer_size;
+ int wait_num_reports;
};
struct xe_oa_config_bo {
@@ -234,11 +234,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
{
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
+ u32 tail, hw_tail, partial_report_size, available;
int report_size = stream->oa_buffer.format->size;
- u32 tail, hw_tail;
unsigned long flags;
bool pollin;
- u32 partial_report_size;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
@@ -282,8 +281,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
stream->oa_buffer.tail = tail;
- pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail,
- stream->oa_buffer.head) >= report_size;
+ available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head);
+ pollin = available >= stream->wait_num_reports * report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -397,11 +396,19 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
- struct xe_mmio *mmio = &stream->gt->mmio;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
+ int size_exponent = __ffs(stream->oa_buffer.bo->size);
+ u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT;
+ struct xe_mmio *mmio = &stream->gt->mmio;
unsigned long flags;
+ /*
+ * If oa buffer size is more than 16MB (exponent greater than 24), the
+ * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set.
+ */
+ oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK,
+ size_exponent > 24 ? size_exponent - 20 : size_exponent - 17);
+
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
@@ -857,15 +864,12 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_file_put(stream->xef);
}
-static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
+static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
{
struct xe_bo *bo;
- BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE);
- BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M);
-
bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
- XE_OA_BUFFER_SIZE, ttm_bo_type_kernel,
+ size, ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -1043,6 +1047,13 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
}
+static u32 oag_buf_size_select(const struct xe_oa_stream *stream)
+{
+ return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT,
+ stream->oa_buffer.bo->size > SZ_16M ?
+ OAG_OA_DEBUG_BUF_SIZE_SELECT : 0);
+}
+
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
{
struct xe_mmio *mmio = &stream->gt->mmio;
@@ -1075,6 +1086,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
_MASKED_BIT_ENABLE(oa_debug) |
oag_report_ctx_switches(stream) |
+ oag_buf_size_select(stream) |
oag_configure_mmio_trigger(stream, true));
xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
@@ -1216,6 +1228,28 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
return 0;
}
+static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) {
+ drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value);
+ return -EINVAL;
+ }
+ param->oa_buffer_size = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (!value) {
+ drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value);
+ return -EINVAL;
+ }
+ param->wait_num_reports = value;
+ return 0;
+}
+
static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
struct xe_oa_open_param *param)
{
@@ -1236,6 +1270,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
[DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+ [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
+ [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports,
};
static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
@@ -1250,6 +1286,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
[DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+ [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval,
};
static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
@@ -1509,7 +1547,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg)
{
- struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, };
+ struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, };
void __user *uaddr = (void __user *)arg;
if (copy_to_user(uaddr, &info, sizeof(info)))
@@ -1595,7 +1633,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
}
/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
- if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) {
+ if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) {
drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
return -EINVAL;
}
@@ -1633,81 +1671,6 @@ static const struct file_operations xe_oa_fops = {
.mmap = xe_oa_mmap,
};
-static bool engine_supports_mi_query(struct xe_hw_engine *hwe)
-{
- return hwe->class == XE_ENGINE_CLASS_RENDER ||
- hwe->class == XE_ENGINE_CLASS_COMPUTE;
-}
-
-static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
-{
- u32 idx = *offset;
- u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
- bool found = false;
-
- idx++;
- for (; idx < len; idx += 2) {
- if (state[idx] == reg) {
- found = true;
- break;
- }
- }
-
- *offset = idx;
- return found;
-}
-
-#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \
- REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM))
-
-static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg)
-{
- struct xe_lrc *lrc = stream->exec_q->lrc[0];
- u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) +
- lrc->ring.size) / sizeof(u32);
- u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
- u32 *state = (u32 *)lrc->bo->vmap.vaddr;
-
- if (drm_WARN_ON(&stream->oa->xe->drm, !state))
- return U32_MAX;
-
- for (; offset < len; ) {
- if (IS_MI_LRI_CMD(state[offset])) {
- /*
- * We expect reg-value pairs in MI_LRI command, so
- * MI_LRI_LEN() should be even
- */
- drm_WARN_ON(&stream->oa->xe->drm,
- MI_LRI_LEN(state[offset]) & 0x1);
-
- if (xe_oa_find_reg_in_lri(state, reg, &offset, len))
- break;
- } else {
- offset++;
- }
- }
-
- return offset < len ? offset : U32_MAX;
-}
-
-static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream)
-{
- struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base);
- u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class];
-
- /* Do this only once. Failure is stored as offset of U32_MAX */
- if (offset)
- goto exit;
-
- offset = xe_oa_context_image_offset(stream, reg.addr);
- stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset;
-
- drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n",
- stream->hwe->name, offset);
-exit:
- return offset && offset != U32_MAX ? 0 : -ENODEV;
-}
-
static int xe_oa_stream_init(struct xe_oa_stream *stream,
struct xe_oa_open_param *param)
{
@@ -1726,6 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
+ stream->wait_num_reports = param->wait_num_reports;
stream->xef = xe_file_get(param->xef);
stream->num_syncs = param->num_syncs;
@@ -1739,20 +1703,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
stream->oa_buffer.circ_size =
- XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+ param->oa_buffer_size -
+ param->oa_buffer_size % stream->oa_buffer.format->size;
else
- stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
-
- if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
- /* If we don't find the context offset, just return error */
- ret = xe_oa_set_ctx_ctrl_offset(stream);
- if (ret) {
- drm_err(&stream->oa->xe->drm,
- "xe_oa_set_ctx_ctrl_offset failed for %s\n",
- stream->hwe->name);
- goto exit;
- }
- }
+ stream->oa_buffer.circ_size = param->oa_buffer_size;
stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set);
if (!stream->oa_config) {
@@ -1784,7 +1738,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
goto err_fw_put;
}
- ret = xe_oa_alloc_oa_buffer(stream);
+ ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size);
if (ret)
goto err_fw_put;
@@ -2081,6 +2035,17 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
+ if (!param.oa_buffer_size)
+ param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
+
+ if (!param.wait_num_reports)
+ param.wait_num_reports = 1;
+ if (param.wait_num_reports > param.oa_buffer_size / f->size) {
+ drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports);
+ ret = -EINVAL;
+ goto err_exec_q;
+ }
+
ret = xe_oa_parse_syncs(oa, &param);
if (ret)
goto err_exec_q;
@@ -2198,6 +2163,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = {
{ .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */
{ .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */
{ .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */
+ { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */
{ .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */
{ .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */
{ .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index fea9d981e414..52e33c37d5ee 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -15,7 +15,7 @@
#include "regs/xe_reg_defs.h"
#include "xe_hw_engine_types.h"
-#define XE_OA_BUFFER_SIZE SZ_16M
+#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
enum xe_oa_report_header {
HDR_32_BIT = 0,
@@ -138,9 +138,6 @@ struct xe_oa {
/** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */
struct idr metrics_idr;
- /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */
- u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX];
-
/** @oa_formats: tracks all OA formats across platforms */
const struct xe_oa_format *oa_formats;
@@ -218,6 +215,9 @@ struct xe_oa_stream {
/** @pollin: Whether there is data available to read */
bool pollin;
+ /** @wait_num_reports: Number of reports to wait for before signalling pollin */
+ int wait_num_reports;
+
/** @periodic: Whether periodic sampling is currently enabled */
bool periodic;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6b7f77425c7f..39be74848e44 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -490,7 +490,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
* least basic xe_gt and xe_guc initialization.
*
* Since to obtain the value of GMDID_MEDIA we need to use the
- * media GuC, temporarly tweak the gt type.
+ * media GuC, temporarily tweak the gt type.
*/
xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
@@ -781,7 +781,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
* error injectable functions is proper handling of the error code by the
* caller for recovery, which is always the case here. The second
* requirement is that no state is changed before the first error return.
- * It is not strictly fullfilled for all initialization functions using the
+ * It is not strictly fulfilled for all initialization functions using the
* ALLOW_ERROR_INJECTION() macro but this is acceptable because for those
* error cases at probe time, the error code is simply propagated up by the
* caller. Therefore there is no consequence on those specific callers when
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index d95d9835de42..9333ce776a6e 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -217,7 +217,7 @@ out:
*
* It returns 0 on success, and -ERROR number on failure, -EINVAL if max
* frequency is higher then the minimal, and other errors directly translated
- * from the PCODE Error returs:
+ * from the PCODE Error returns:
* - -ENXIO: "Illegal Command"
* - -ETIMEDOUT: "Timed out"
* - -EINVAL: "Illegal Data"
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 40f7c844ed44..c9cc0c091dfd 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -7,6 +7,7 @@
#include <linux/fault-inject.h>
#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
#include <drm/drm_managed.h>
#include <drm/ttm/ttm_placement.h>
@@ -390,7 +391,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
/*
* Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
- * also checks and delets bo entry from user fault list.
+ * also checks and deletes bo entry from user fault list.
*/
mutex_lock(&xe->mem_access.vram_userfault.lock);
list_for_each_entry_safe(bo, on,
@@ -414,8 +415,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_irq_suspend(xe);
- if (xe->d3cold.allowed)
- xe_display_pm_suspend_late(xe);
+ xe_display_pm_runtime_suspend_late(xe);
+
out:
if (err)
xe_display_pm_runtime_resume(xe);
@@ -607,7 +608,8 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
struct device *dev = xe->drm.dev;
return dev->power.runtime_status == RPM_SUSPENDING ||
- dev->power.runtime_status == RPM_RESUMING;
+ dev->power.runtime_status == RPM_RESUMING ||
+ pm_suspend_target_state != PM_SUSPEND_ON;
#else
return false;
#endif
@@ -738,9 +740,6 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
xe->d3cold.allowed = false;
mutex_unlock(&xe->d3cold.lock);
-
- drm_dbg(&xe->drm,
- "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
}
/**
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 797576690356..1ddcc7e79a93 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -136,6 +136,7 @@ err_kfree:
xe_pt_free(pt);
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO);
/**
* xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
@@ -275,7 +276,7 @@ struct xe_pt_stage_bind_walk {
/* Also input, but is updated during the walk*/
/** @curs: The DMA address cursor. */
struct xe_res_cursor *curs;
- /** @va_curs_start: The Virtual address coresponding to @curs->start */
+ /** @va_curs_start: The Virtual address corresponding to @curs->start */
u64 va_curs_start;
/* Output */
@@ -1850,6 +1851,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
return 0;
}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -2130,6 +2132,7 @@ kill_vm_tile1:
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
/**
* xe_pt_update_ops_fini() - Finish PT update operations
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 170ae72d1a7b..c059639613f7 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -23,6 +23,7 @@
#include "xe_guc_hwconfig.h"
#include "xe_macros.h"
#include "xe_mmio.h"
+#include "xe_oa.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_wa.h"
@@ -670,7 +671,9 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_id = u->oa_unit_id;
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
- du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
+ DRM_XE_OA_CAPS_OA_BUFFER_SIZE |
+ DRM_XE_OA_CAPS_WAIT_NUM_REPORTS;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index c13123008e90..9475e3f74958 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -24,7 +24,6 @@
#include "xe_hw_engine_types.h"
#include "xe_macros.h"
#include "xe_mmio.h"
-#include "xe_reg_whitelist.h"
#include "xe_rtp_types.h"
static void reg_sr_fini(struct drm_device *drm, void *arg)
@@ -192,58 +191,6 @@ err_force_wake:
xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
}
-void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
-{
- struct xe_reg_sr *sr = &hwe->reg_whitelist;
- struct xe_gt *gt = hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_reg_sr_entry *entry;
- struct drm_printer p;
- u32 mmio_base = hwe->mmio_base;
- unsigned long reg;
- unsigned int slot = 0;
- unsigned int fw_ref;
-
- if (xa_empty(&sr->xa))
- return;
-
- drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
- goto err_force_wake;
-
- p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
- xa_for_each(&sr->xa, reg, entry) {
- if (slot == RING_MAX_NONPRIV_SLOTS) {
- xe_gt_err(gt,
- "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
- hwe->name, RING_MAX_NONPRIV_SLOTS);
- break;
- }
-
- xe_reg_whitelist_print_entry(&p, 0, reg, entry);
- xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot),
- reg | entry->set_bits);
- slot++;
- }
-
- /* And clear the rest just in case of garbage */
- for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
- u32 addr = RING_NOPID(mmio_base).addr;
-
- xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
- }
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return;
-
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n");
-}
-
/**
* xe_reg_sr_dump - print all save/restore entries
* @sr: Save/restore entries
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 3996934974fa..edab5d4e3ba5 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -10,7 +10,9 @@
#include "regs/xe_oa_regs.h"
#include "regs/xe_regs.h"
#include "xe_gt_types.h"
+#include "xe_gt_printk.h"
#include "xe_platform_types.h"
+#include "xe_reg_sr.h"
#include "xe_rtp.h"
#include "xe_step.h"
@@ -89,6 +91,40 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
{}
};
+static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
+{
+ struct xe_reg_sr *sr = &hwe->reg_whitelist;
+ struct xe_reg_sr_entry *entry;
+ struct drm_printer p;
+ unsigned long reg;
+ unsigned int slot;
+
+ xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name);
+ p = xe_gt_dbg_printer(hwe->gt);
+
+ slot = 0;
+ xa_for_each(&sr->xa, reg, entry) {
+ struct xe_reg_sr_entry hwe_entry = {
+ .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot),
+ .set_bits = entry->reg.addr | entry->set_bits,
+ .clr_bits = ~0u,
+ .read_mask = entry->read_mask,
+ };
+
+ if (slot == RING_MAX_NONPRIV_SLOTS) {
+ xe_gt_err(hwe->gt,
+ "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
+ hwe->name, RING_MAX_NONPRIV_SLOTS);
+ break;
+ }
+
+ xe_reg_whitelist_print_entry(&p, 0, reg, entry);
+ xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt);
+
+ slot++;
+ }
+}
+
/**
* xe_reg_whitelist_process_engine - process table of registers to whitelist
* @hwe: engine instance to process whitelist for
@@ -102,6 +138,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
+ whitelist_apply_to_hwe(hwe);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index b13d4d62f0b1..7a1c78fdfc92 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -340,3 +340,8 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
return dss >= dss_per_gslice;
}
+bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return !IS_SRIOV_VF(gt_to_xe(gt));
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 827d932b6908..38b9f13bba5e 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -131,7 +131,7 @@ struct xe_reg_sr;
* @ver_end__: Last graphics IP version to match
*
* Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
- * inclusive on boths sides
+ * inclusive on both sides
*
* Refer to XE_RTP_RULES() for expected usage.
*/
@@ -169,7 +169,7 @@ struct xe_reg_sr;
* @ver_end__: Last media IP version to match
*
* Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
- * inclusive on boths sides
+ * inclusive on both sides
*
* Refer to XE_RTP_RULES() for expected usage.
*/
@@ -476,4 +476,15 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
+/*
+ * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if device is not VF, false otherwise.
+ */
+bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index ef10782af656..04e2f539ccd9 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -14,6 +14,7 @@
#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
+#include "xe_sriov_vf.h"
/**
* xe_sriov_mode_to_string - Convert enum value to string.
@@ -114,6 +115,9 @@ int xe_sriov_init(struct xe_device *xe)
return err;
}
+ if (IS_SRIOV_VF(xe))
+ xe_sriov_vf_init_early(xe);
+
xe_assert(xe, !xe->sriov.wq);
xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
if (!xe->sriov.wq)
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
index 7d156ba82479..dd1df950b021 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
@@ -20,7 +20,7 @@
* is within a range of supported VF numbers (up to maximum number of VFs that
* driver can support, including VF0 that represents the PF itself).
*
- * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ * Note: Effective only on debug builds. See `Xe Asserts`_ for more information.
*/
#define xe_sriov_pf_assert_vfid(xe, vfid) \
xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe))
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index c7b7ad4af5c8..ca94382a721e 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -9,6 +9,7 @@
#include <linux/build_bug.h>
#include <linux/mutex.h>
#include <linux/types.h>
+#include <linux/workqueue_types.h>
/**
* VFID - Virtual Function Identifier
@@ -56,4 +57,20 @@ struct xe_device_pf {
struct mutex master_lock;
};
+/**
+ * struct xe_device_vf - Xe Virtual Function related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_VF mode.
+ */
+struct xe_device_vf {
+ /** @migration: VF Migration state data */
+ struct {
+ /** @migration.worker: VF migration recovery worker */
+ struct work_struct worker;
+ /** @migration.gt_flags: Per-GT request flags for VF migration recovery */
+ unsigned long gt_flags;
+ } migration;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
new file mode 100644
index 000000000000..c1275e64aa9c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+#include "xe_sriov_vf.h"
+
+/**
+ * DOC: VF restore procedure in PF KMD and VF KMD
+ *
+ * Restoring previously saved state of a VF is one of core features of
+ * SR-IOV. All major VM Management applications allow saving and restoring
+ * the VM state, and doing that to a VM which uses SRIOV VF as one of
+ * the accessible devices requires support from KMD on both PF and VF side.
+ * VMM initiates all required operations through VFIO module, which then
+ * translates them into PF KMD calls. This description will focus on these
+ * calls, leaving out the module which initiates these steps (VFIO).
+ *
+ * In order to start the restore procedure, GuC needs to keep the VF in
+ * proper state. The PF driver can ensure GuC set it to VF_READY state
+ * by provisioning the VF, which in turn can be done after Function Level
+ * Reset of said VF (or after it was freshly created - in that case FLR
+ * is not needed). The FLR procedure ends with GuC sending message
+ * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC.
+ * After the provisioning is completed, the VF needs to be paused, and
+ * at that point the actual restore can begin.
+ *
+ * During VF Restore, state of several resources is restored. These may
+ * include local memory content (system memory is restored by VMM itself),
+ * values of MMIO registers, stateless compression metadata and others.
+ * The final resource which also needs restoring is state of the VF
+ * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE`
+ * message is used, with reference to the state blob to be consumed by
+ * GuC.
+ *
+ * Next, when VFIO is asked to set the VM into running state, the PF driver
+ * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this
+ * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the
+ * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform
+ * the VF KMD within the VM that it was migrated.
+ *
+ * As soon as Virtual GPU of the VM starts, the VF driver within receives
+ * the MIGRATED interrupt and schedules post-migration recovery worker.
+ * That worker queries GuC for new provisioning (using MMIO communication),
+ * and applies fixups to any non-virtualized resources used by the VF.
+ *
+ * When the VF driver is ready to continue operation on the newly connected
+ * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to
+ * enter the long awaited `VF_RUNNING` state, and therefore start handling
+ * CTB messages and scheduling workloads from the VF::
+ *
+ * PF GuC VF
+ * [ ] | |
+ * [ ] PF2GUC_VF_CONTROL(pause) | |
+ * [ ]---------------------------> [ ] |
+ * [ ] [ ] GuC sets new VF state to |
+ * [ ] [ ]------- VF_READY_PAUSED |
+ * [ ] [ ] | |
+ * [ ] [ ] <----- |
+ * [ ] success [ ] |
+ * [ ] <---------------------------[ ] |
+ * [ ] | |
+ * [ ] PF loads resources from the | |
+ * [ ]------- saved image supplied | |
+ * [ ] | | |
+ * [ ] <----- | |
+ * [ ] | |
+ * [ ] GUC_PF_OPCODE_VF_RESTORE | |
+ * [ ]---------------------------> [ ] |
+ * [ ] [ ] GuC loads contexts and CTB |
+ * [ ] [ ]------- state from image |
+ * [ ] [ ] | |
+ * [ ] [ ] <----- |
+ * [ ] [ ] |
+ * [ ] [ ] GuC sets new VF state to |
+ * [ ] [ ]------- VF_RESFIX_PAUSED |
+ * [ ] [ ] | |
+ * [ ] success [ ] <----- |
+ * [ ] <---------------------------[ ] |
+ * [ ] | |
+ * [ ] GUC_PF_TRIGGER_VF_RESUME | |
+ * [ ]---------------------------> [ ] |
+ * [ ] [ ] GuC sets new VF state to |
+ * [ ] [ ]------- VF_RESFIX_BLOCKED |
+ * [ ] [ ] | |
+ * [ ] [ ] <----- |
+ * [ ] [ ] |
+ * [ ] [ ] GUC_INTR_SW_INT_0 |
+ * [ ] success [ ]---------------------------> [ ]
+ * [ ] <---------------------------[ ] [ ]
+ * | | VF2GUC_QUERY_SINGLE_KLV [ ]
+ * | [ ] <---------------------------[ ]
+ * | [ ] [ ]
+ * | [ ] new VF provisioning [ ]
+ * | [ ]---------------------------> [ ]
+ * | | [ ]
+ * | | VF driver applies post [ ]
+ * | | migration fixups -------[ ]
+ * | | | [ ]
+ * | | -----> [ ]
+ * | | [ ]
+ * | | VF2GUC_NOTIFY_RESFIX_DONE [ ]
+ * | [ ] <---------------------------[ ]
+ * | [ ] [ ]
+ * | [ ] GuC sets new VF state to [ ]
+ * | [ ]------- VF_RUNNING [ ]
+ * | [ ] | [ ]
+ * | [ ] <----- [ ]
+ * | [ ] success [ ]
+ * | [ ]---------------------------> [ ]
+ * | | |
+ * | | |
+ */
+
+static void migration_worker_func(struct work_struct *w);
+
+/**
+ * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
+ * @xe: the &xe_device to initialize
+ */
+void xe_sriov_vf_init_early(struct xe_device *xe)
+{
+ INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+}
+
+/**
+ * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
+ * @xe: the &xe_device struct instance
+ *
+ * After migration, we need to re-query all VF configuration to make sure
+ * they match previous provisioning. Note that most of VF provisioning
+ * shall be the same, except GGTT range, since GGTT is not virtualized per-VF.
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+static int vf_post_migration_requery_guc(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int err, ret = 0;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_vf_query_config(gt);
+ ret = ret ?: err;
+ }
+
+ return ret;
+}
+
+/*
+ * vf_post_migration_imminent - Check if post-restore recovery is coming.
+ * @xe: the &xe_device struct instance
+ *
+ * Return: True if migration recovery worker will soon be running. Any worker currently
+ * executing does not affect the result.
+ */
+static bool vf_post_migration_imminent(struct xe_device *xe)
+{
+ return xe->sriov.vf.migration.gt_flags != 0 ||
+ work_pending(&xe->sriov.vf.migration.worker);
+}
+
+/*
+ * Notify all GuCs about resource fixups apply finished.
+ */
+static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id) {
+ if (vf_post_migration_imminent(xe))
+ goto skip;
+ xe_gt_sriov_vf_notify_resfix_done(gt);
+ }
+ return;
+
+skip:
+ drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n");
+}
+
+static void vf_post_migration_recovery(struct xe_device *xe)
+{
+ int err;
+
+ drm_dbg(&xe->drm, "migration recovery in progress\n");
+ xe_pm_runtime_get(xe);
+ err = vf_post_migration_requery_guc(xe);
+ if (vf_post_migration_imminent(xe))
+ goto defer;
+ if (unlikely(err))
+ goto fail;
+
+ /* FIXME: add the recovery steps */
+ vf_post_migration_notify_resfix_done(xe);
+ xe_pm_runtime_put(xe);
+ drm_notice(&xe->drm, "migration recovery ended\n");
+ return;
+defer:
+ xe_pm_runtime_put(xe);
+ drm_dbg(&xe->drm, "migration recovery deferred\n");
+ return;
+fail:
+ xe_pm_runtime_put(xe);
+ drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
+ xe_device_declare_wedged(xe);
+}
+
+static void migration_worker_func(struct work_struct *w)
+{
+ struct xe_device *xe = container_of(w, struct xe_device,
+ sriov.vf.migration.worker);
+
+ vf_post_migration_recovery(xe);
+}
+
+static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id) {
+ if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) {
+ xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
+ * @xe: the &xe_device to start recovery on
+ *
+ * This function shall be called only by VF.
+ */
+void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
+{
+ bool started;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ if (!vf_ready_to_recovery_on_all_gts(xe))
+ return;
+
+ WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
+ /* Ensure other threads see that no flags are set now. */
+ smp_mb();
+
+ started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
+ drm_info(&xe->drm, "VF migration recovery %s\n", started ?
+ "scheduled" : "already in progress");
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
new file mode 100644
index 000000000000..7b8622cff2b7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_H_
+#define _XE_SRIOV_VF_H_
+
+struct xe_device;
+
+void xe_sriov_vf_init_early(struct xe_device *xe);
+void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 91130ad8999c..d5281de04d54 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -211,6 +211,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__string(dev, __dev_name_eq(job->q))
__field(u32, seqno)
__field(u32, lrc_seqno)
+ __field(u8, gt_id)
__field(u16, guc_id)
__field(u32, guc_state)
__field(u32, flags)
@@ -223,6 +224,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__assign_str(dev);
__entry->seqno = xe_sched_job_seqno(job);
__entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
+ __entry->gt_id = job->q->gt->info.id;
__entry->guc_id = job->q->guc->id;
__entry->guc_state =
atomic_read(&job->q->guc->state);
@@ -232,9 +234,9 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__entry->batch_addr = (u64)job->ptrs[0].batch_addr;
),
- TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+ TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
__get_str(dev), __entry->fence, __entry->seqno,
- __entry->lrc_seqno, __entry->guc_id,
+ __entry->lrc_seqno, __entry->gt_id, __entry->guc_id,
__entry->batch_addr, __entry->guc_state,
__entry->flags, __entry->error)
);
@@ -282,6 +284,7 @@ DECLARE_EVENT_CLASS(xe_sched_msg,
__string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data)))
__field(u32, opcode)
__field(u16, guc_id)
+ __field(u8, gt_id)
),
TP_fast_assign(
@@ -289,9 +292,11 @@ DECLARE_EVENT_CLASS(xe_sched_msg,
__entry->opcode = msg->opcode;
__entry->guc_id =
((struct xe_exec_queue *)msg->private_data)->guc->id;
+ __entry->gt_id =
+ ((struct xe_exec_queue *)msg->private_data)->gt->info.id;
),
- TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id,
+ TP_printk("dev=%s, gt=%u guc_id=%d, opcode=%u", __get_str(dev), __entry->gt_id, __entry->guc_id,
__entry->opcode)
);
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index 30a3cfbaaa09..ea50fee50c7d 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -48,6 +48,11 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
TP_ARGS(bo)
);
+DEFINE_EVENT(xe_bo, xe_bo_validate,
+ TP_PROTO(struct xe_bo *bo),
+ TP_ARGS(bo)
+);
+
TRACE_EVENT(xe_bo_move,
TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement,
bool move_lacks_source),
@@ -55,8 +60,8 @@ TRACE_EVENT(xe_bo_move,
TP_STRUCT__entry(
__field(struct xe_bo *, bo)
__field(size_t, size)
- __field(u32, new_placement)
- __field(u32, old_placement)
+ __string(new_placement_name, xe_mem_type_to_name[new_placement])
+ __string(old_placement_name, xe_mem_type_to_name[old_placement])
__string(device_id, __dev_name_bo(bo))
__field(bool, move_lacks_source)
),
@@ -64,15 +69,15 @@ TRACE_EVENT(xe_bo_move,
TP_fast_assign(
__entry->bo = bo;
__entry->size = bo->size;
- __entry->new_placement = new_placement;
- __entry->old_placement = old_placement;
+ __assign_str(new_placement_name);
+ __assign_str(old_placement_name);
__assign_str(device_id);
__entry->move_lacks_source = move_lacks_source;
),
TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s",
__entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size,
- xe_mem_type_to_name[__entry->old_placement],
- xe_mem_type_to_name[__entry->new_placement], __get_str(device_id))
+ __get_str(old_placement_name),
+ __get_str(new_placement_name), __get_str(device_id))
);
DECLARE_EVENT_CLASS(xe_vma,
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.c b/drivers/gpu/drm/xe/xe_trace_lrc.c
new file mode 100644
index 000000000000..ab9b7e2970bc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace_lrc.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h
new file mode 100644
index 000000000000..5c669a0b2180
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_LRC_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_LRC_H_
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+#include "xe_gt_types.h"
+#include "xe_lrc.h"
+#include "xe_lrc_types.h"
+
+#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
+
+TRACE_EVENT(xe_lrc_update_timestamp,
+ TP_PROTO(struct xe_lrc *lrc, uint32_t old),
+ TP_ARGS(lrc, old),
+ TP_STRUCT__entry(
+ __field(struct xe_lrc *, lrc)
+ __field(u32, old)
+ __field(u32, new)
+ __string(name, lrc->fence_ctx.name)
+ __string(device_id, __dev_name_lrc(lrc))
+ ),
+
+ TP_fast_assign(
+ __entry->lrc = lrc;
+ __entry->old = old;
+ __entry->new = lrc->ctx_timestamp;
+ __assign_str(name);
+ __assign_str(device_id);
+ ),
+ TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
+ __entry->lrc, __get_str(name),
+ __entry->old, __entry->new,
+ __get_str(device_id))
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace_lrc
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 423b261ea743..f4a16e5fa770 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -5,6 +5,7 @@
*/
#include <drm/drm_managed.h>
+#include <drm/drm_drv.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
@@ -52,7 +53,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
struct xe_ttm_vram_mgr_resource *vres;
struct drm_buddy *mm = &mgr->mm;
- u64 size, remaining_size, min_page_size;
+ u64 size, min_page_size;
unsigned long lpfn;
int err;
@@ -98,17 +99,6 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
goto error_fini;
}
- if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */
- err = -EINVAL;
- goto error_fini;
- }
-
- if (WARN_ON((size > SZ_2G &&
- (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) {
- err = -EINVAL;
- goto error_fini;
- }
-
if (WARN_ON(!IS_ALIGNED(size, min_page_size))) {
err = -EINVAL;
goto error_fini;
@@ -116,12 +106,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
mutex_lock(&mgr->lock);
if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) {
- mutex_unlock(&mgr->lock);
err = -ENOSPC;
- goto error_fini;
+ goto error_unlock;
}
- if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn &&
+ if (place->fpfn + (size >> PAGE_SHIFT) != lpfn &&
place->flags & TTM_PL_FLAG_CONTIGUOUS) {
size = roundup_pow_of_two(size);
min_page_size = size;
@@ -129,25 +118,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
}
- remaining_size = size;
- do {
- /*
- * Limit maximum size to 2GiB due to SG table limitations.
- * FIXME: Should maybe be handled as part of sg construction.
- */
- u64 alloc_size = min_t(u64, remaining_size, SZ_2G);
-
- err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
- (u64)lpfn << PAGE_SHIFT,
- alloc_size,
- min_page_size,
- &vres->blocks,
- vres->flags);
- if (err)
- goto error_free_blocks;
-
- remaining_size -= alloc_size;
- } while (remaining_size);
+ err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+ (u64)lpfn << PAGE_SHIFT, size,
+ min_page_size, &vres->blocks, vres->flags);
+ if (err)
+ goto error_unlock;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
@@ -194,9 +169,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
*res = &vres->base;
return 0;
-
-error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks, 0);
+error_unlock:
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
@@ -339,6 +312,13 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
struct ttm_resource_manager *man = &mgr->manager;
int err;
+ if (mem_type != XE_PL_STOLEN) {
+ const char *name = mem_type == XE_PL_VRAM0 ? "vram0" : "vram1";
+ man->cg = drmm_cgroup_register_region(&xe->drm, name, size);
+ if (IS_ERR(man->cg))
+ return PTR_ERR(man->cg);
+ }
+
man->func = &xe_ttm_vram_mgr_func;
mgr->mem_type = mem_type;
mutex_init(&mgr->lock);
@@ -393,7 +373,8 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
xe_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
- xe_res_next(&cursor, cursor.size);
+ /* Limit maximum size to 2GiB due to SG table limitations. */
+ xe_res_next(&cursor, min_t(u64, cursor.size, SZ_2G));
}
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
@@ -413,7 +394,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
xe_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + tile->mem.vram.io_start;
- size_t size = cursor.size;
+ size_t size = min_t(u64, cursor.size, SZ_2G);
dma_addr_t addr;
addr = dma_map_resource(dev, phys, size, dir,
@@ -426,7 +407,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
- xe_res_next(&cursor, cursor.size);
+ xe_res_next(&cursor, size);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 0d8caa0e7354..ad3b35a0e6eb 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -92,7 +92,7 @@ struct xe_uc_fw {
const enum xe_uc_fw_status status;
/**
* @__status: private firmware load status - only to be used
- * by firmware laoding code
+ * by firmware loading code
*/
enum xe_uc_fw_status __status;
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c99380271de6..690330352d4c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -10,7 +10,6 @@
#include <drm/drm_exec.h>
#include <drm/drm_print.h>
-#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_tt.h>
#include <uapi/drm/xe_drm.h>
#include <linux/ascii85.h>
@@ -733,13 +732,14 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
vops->pt_update_ops[i].ops =
kmalloc_array(vops->pt_update_ops[i].num_ops,
sizeof(*vops->pt_update_ops[i].ops),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!vops->pt_update_ops[i].ops)
return array_of_binds ? -ENOBUFS : -ENOMEM;
}
return 0;
}
+ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
static void xe_vma_ops_fini(struct xe_vma_ops *vops)
{
@@ -1024,7 +1024,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
/*
* Since userptr pages are not pinned, we can't remove
- * the notifer until we're sure the GPU is not accessing
+ * the notifier until we're sure the GPU is not accessing
* them anymore
*/
mmu_interval_notifier_remove(&userptr->notifier);
@@ -1352,6 +1352,7 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
return 0;
}
+ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
static void xe_vm_free_scratch(struct xe_vm *vm)
{
@@ -1978,6 +1979,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
return ops;
}
+ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
u16 pat_index, unsigned int flags)
@@ -2105,7 +2107,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
}
}
- /* Adjust for partial unbind after removin VMA from VM */
+ /* Adjust for partial unbind after removing VMA from VM */
if (!err) {
op->base.remap.unmap->va->va.addr = op->remap.start;
op->base.remap.unmap->va->va.range = op->remap.range;
@@ -2357,13 +2359,15 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
bool validate)
{
struct xe_bo *bo = xe_vma_bo(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
int err = 0;
if (bo) {
if (!bo->vm)
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
- err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+ err = xe_bo_validate(bo, vm,
+ !xe_vm_in_preempt_fence_mode(vm));
}
return err;
@@ -2697,6 +2701,7 @@ unlock:
drm_exec_fini(&exec);
return err;
}
+ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
#define SUPPORTED_FLAGS_STUB \
(DRM_XE_VM_BIND_FLAG_READONLY | \
@@ -2733,7 +2738,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
*bind_ops = kvmalloc_array(args->num_binds,
sizeof(struct drm_xe_vm_bind_op),
- GFP_KERNEL | __GFP_ACCOUNT);
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!*bind_ops)
return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
@@ -2973,14 +2979,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (args->num_binds) {
bos = kvcalloc(args->num_binds, sizeof(*bos),
- GFP_KERNEL | __GFP_ACCOUNT);
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!bos) {
err = -ENOMEM;
goto release_vm_lock;
}
ops = kvcalloc(args->num_binds, sizeof(*ops),
- GFP_KERNEL | __GFP_ACCOUNT);
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!ops) {
err = -ENOMEM;
goto release_vm_lock;
@@ -3303,7 +3311,6 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
for (int i = 0; i < snap->num_snaps; i++) {
struct xe_bo *bo = snap->snap[i].bo;
- struct iosys_map src;
int err;
if (IS_ERR(snap->snap[i].data))
@@ -3316,16 +3323,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
}
if (bo) {
- xe_bo_lock(bo, false);
- err = ttm_bo_vmap(&bo->ttm, &src);
- if (!err) {
- xe_map_memcpy_from(xe_bo_device(bo),
- snap->snap[i].data,
- &src, snap->snap[i].bo_ofs,
- snap->snap[i].len);
- ttm_bo_vunmap(&bo->ttm, &src);
- }
- xe_bo_unlock(bo);
+ err = xe_bo_read(bo, snap->snap[i].bo_ofs,
+ snap->snap[i].data, snap->snap[i].len);
} else {
void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index c864dba35e1d..23adb7442881 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -17,7 +17,6 @@ struct drm_printer;
struct drm_file;
struct ttm_buffer_object;
-struct ttm_validate_buffer;
struct xe_exec_queue;
struct xe_file;
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index 4d33f310b653..078786958403 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -64,8 +64,8 @@
* update page level 2 PDE[1] to page level 3b phys address (GPU)
*
* bind BO2 0x1ff000-0x201000
- * update page level 3a PTE[511] to BO2 phys addres (GPU)
- * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ * update page level 3a PTE[511] to BO2 phys address (GPU)
+ * update page level 3b PTE[0] to BO2 phys address + 0x1000 (GPU)
*
* GPU bypass
* ~~~~~~~~~~
@@ -192,7 +192,7 @@
*
* If a VM is in fault mode (TODO: link to fault mode), new bind operations that
* create mappings are by default deferred to the page fault handler (first
- * use). This behavior can be overriden by setting the flag
+ * use). This behavior can be overridden by setting the flag
* DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
* immediately.
*
@@ -209,7 +209,7 @@
*
* Since this a core kernel managed memory the kernel can move this memory
* whenever it wants. We register an invalidation MMU notifier to alert XE when
- * a user poiter is about to move. The invalidation notifier needs to block
+ * a user pointer is about to move. The invalidation notifier needs to block
* until all pending users (jobs or compute mode engines) of the userptr are
* idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
*
@@ -252,7 +252,7 @@
* Rebind worker
* -------------
*
- * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * The rebind worker is very similar to an exec. It is responsible for rebinding
* evicted BOs or userptrs, waiting on those operations, installing new preempt
* fences, and finally resuming executing of engines in the VM.
*
@@ -317,11 +317,11 @@
* are not allowed, only long running workloads and ULLS are enabled on a faulting
* VM.
*
- * Defered VM binds
+ * Deferred VM binds
* ----------------
*
* By default, on a faulting VM binds just allocate the VMA and the actual
- * updating of the page tables is defered to the page fault handler. This
+ * updating of the page tables is deferred to the page fault handler. This
* behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in
* the VM bind which will then do the bind immediately.
*
@@ -500,18 +500,18 @@
* Slot waiting
* ------------
*
- * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * 1. The execution of all jobs from kernel ops shall wait on all slots
* (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
* kernel op is operating on external or private BO)
*
- * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * 2. In non-compute mode, the execution of all jobs from rebinds in execs shall
* wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
* (depends on if the rebind is operatiing on an external or private BO)
*
- * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * 3. In non-compute mode, the execution of all jobs from execs shall wait on the
* last rebind job
*
- * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * 4. In compute mode, the execution of all jobs from rebinds in the rebind
* worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
* or VM (depends on if rebind is operating on external or private BO)
*
diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c
new file mode 100644
index 000000000000..b378848d3b7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vsec.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright © 2024 Intel Corporation */
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/cleanup.h>
+#include <linux/errno.h>
+#include <linux/intel_vsec.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_drv.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_pm.h"
+#include "xe_vsec.h"
+
+#include "regs/xe_pmt.h"
+
+/* PMT GUID value for BMG devices. NOTE: this is NOT a PCI id */
+#define BMG_DEVICE_ID 0xE2F8
+
+static struct intel_vsec_header bmg_telemetry = {
+ .length = 0x10,
+ .id = VSEC_ID_TELEMETRY,
+ .num_entries = 2,
+ .entry_size = 4,
+ .tbir = 0,
+ .offset = BMG_DISCOVERY_OFFSET,
+};
+
+static struct intel_vsec_header bmg_punit_crashlog = {
+ .length = 0x10,
+ .id = VSEC_ID_CRASHLOG,
+ .num_entries = 1,
+ .entry_size = 4,
+ .tbir = 0,
+ .offset = BMG_DISCOVERY_OFFSET + 0x60,
+};
+
+static struct intel_vsec_header bmg_oobmsm_crashlog = {
+ .length = 0x10,
+ .id = VSEC_ID_CRASHLOG,
+ .num_entries = 1,
+ .entry_size = 4,
+ .tbir = 0,
+ .offset = BMG_DISCOVERY_OFFSET + 0x78,
+};
+
+static struct intel_vsec_header *bmg_capabilities[] = {
+ &bmg_telemetry,
+ &bmg_punit_crashlog,
+ &bmg_oobmsm_crashlog,
+ NULL
+};
+
+enum xe_vsec {
+ XE_VSEC_UNKNOWN = 0,
+ XE_VSEC_BMG,
+};
+
+static struct intel_vsec_platform_info xe_vsec_info[] = {
+ [XE_VSEC_BMG] = {
+ .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_CRASHLOG,
+ .headers = bmg_capabilities,
+ },
+ { }
+};
+
+/*
+ * The GUID will have the following bits to decode:
+ * [0:3] - {Telemetry space iteration number (0,1,..)}
+ * [4:7] - Segment (SEGMENT_INDEPENDENT-0, Client-1, Server-2)
+ * [8:11] - SOC_SKU
+ * [12:27] – Device ID – changes for each down bin SKU’s
+ * [28:29] - Capability Type (Crashlog-0, Telemetry Aggregator-1, Watcher-2)
+ * [30:31] - Record-ID (0-PUNIT, 1-OOBMSM_0, 2-OOBMSM_1)
+ */
+#define GUID_TELEM_ITERATION GENMASK(3, 0)
+#define GUID_SEGMENT GENMASK(7, 4)
+#define GUID_SOC_SKU GENMASK(11, 8)
+#define GUID_DEVICE_ID GENMASK(27, 12)
+#define GUID_CAP_TYPE GENMASK(29, 28)
+#define GUID_RECORD_ID GENMASK(31, 30)
+
+#define PUNIT_TELEMETRY_OFFSET 0x0200
+#define PUNIT_WATCHER_OFFSET 0x14A0
+#define OOBMSM_0_WATCHER_OFFSET 0x18D8
+#define OOBMSM_1_TELEMETRY_OFFSET 0x1000
+
+enum record_id {
+ PUNIT,
+ OOBMSM_0,
+ OOBMSM_1,
+};
+
+enum capability {
+ CRASHLOG,
+ TELEMETRY,
+ WATCHER,
+};
+
+static int xe_guid_decode(u32 guid, int *index, u32 *offset)
+{
+ u32 record_id = FIELD_GET(GUID_RECORD_ID, guid);
+ u32 cap_type = FIELD_GET(GUID_CAP_TYPE, guid);
+ u32 device_id = FIELD_GET(GUID_DEVICE_ID, guid);
+
+ if (device_id != BMG_DEVICE_ID)
+ return -ENODEV;
+
+ if (cap_type > WATCHER)
+ return -EINVAL;
+
+ *offset = 0;
+
+ if (cap_type == CRASHLOG) {
+ *index = record_id == PUNIT ? 2 : 4;
+ return 0;
+ }
+
+ switch (record_id) {
+ case PUNIT:
+ *index = 0;
+ if (cap_type == TELEMETRY)
+ *offset = PUNIT_TELEMETRY_OFFSET;
+ else
+ *offset = PUNIT_WATCHER_OFFSET;
+ break;
+
+ case OOBMSM_0:
+ *index = 1;
+ if (cap_type == WATCHER)
+ *offset = OOBMSM_0_WATCHER_OFFSET;
+ break;
+
+ case OOBMSM_1:
+ *index = 1;
+ if (cap_type == TELEMETRY)
+ *offset = OOBMSM_1_TELEMETRY_OFFSET;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
+ u32 count)
+{
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET;
+ u32 mem_region;
+ u32 offset;
+ int ret;
+
+ ret = xe_guid_decode(guid, &mem_region, &offset);
+ if (ret)
+ return ret;
+
+ telem_addr += offset + user_offset;
+
+ guard(mutex)(&xe->pmt.lock);
+
+ /* indicate that we are not at an appropriate power level */
+ if (!xe_pm_runtime_get_if_active(xe))
+ return -ENODATA;
+
+ /* set SoC re-mapper index register based on GUID memory region */
+ xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS,
+ REG_FIELD_PREP(SG_REMAP_BITS, mem_region));
+
+ memcpy_fromio(data, telem_addr, count);
+ xe_pm_runtime_put(xe);
+
+ return count;
+}
+
+static struct pmt_callbacks xe_pmt_cb = {
+ .read_telem = xe_pmt_telem_read,
+};
+
+static const int vsec_platforms[] = {
+ [XE_BATTLEMAGE] = XE_VSEC_BMG,
+};
+
+static enum xe_vsec get_platform_info(struct xe_device *xe)
+{
+ if (xe->info.platform > XE_BATTLEMAGE)
+ return XE_VSEC_UNKNOWN;
+
+ return vsec_platforms[xe->info.platform];
+}
+
+/**
+ * xe_vsec_init - Initialize resources and add intel_vsec auxiliary
+ * interface
+ * @xe: valid xe instance
+ */
+void xe_vsec_init(struct xe_device *xe)
+{
+ struct intel_vsec_platform_info *info;
+ struct device *dev = xe->drm.dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ enum xe_vsec platform;
+
+ platform = get_platform_info(xe);
+ if (platform == XE_VSEC_UNKNOWN)
+ return;
+
+ info = &xe_vsec_info[platform];
+ if (!info->headers)
+ return;
+
+ switch (platform) {
+ case XE_VSEC_BMG:
+ info->priv_data = &xe_pmt_cb;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Register a VSEC. Cleanup is handled using device managed
+ * resources.
+ */
+ intel_vsec_register(pdev, info);
+}
+MODULE_IMPORT_NS("INTEL_VSEC");
diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h
new file mode 100644
index 000000000000..5777c53faec2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vsec.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright © 2024 Intel Corporation */
+
+#ifndef _XE_VSEC_H_
+#define _XE_VSEC_H_
+
+struct xe_device;
+
+void xe_vsec_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 02cf647f86d8..570fe0376402 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -607,6 +607,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
},
+ { XE_RTP_NAME("16024792527"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER,
+ SMP_FORCE_128B_OVERFETCH))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index bcd04464b85e..40438c3d9b72 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,3 +1,4 @@
+1607983814 GRAPHICS_VERSION_RANGE(1200, 1210)
22012773006 GRAPHICS_VERSION_RANGE(1200, 1250)
14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
PLATFORM(DG2)
@@ -33,7 +34,7 @@
GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
- MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)
22019338487_display PLATFORM(LUNARLAKE)
16023588340 GRAPHICS_VERSION(2001)
14019789679 GRAPHICS_VERSION(1255)