summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/gaudi/gaudi.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c248
1 files changed, 222 insertions, 26 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f6cfd6e075e2..0d2edb25c92a 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -354,6 +354,10 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
struct hl_cs_job *job);
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
u32 size, u64 val);
+static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
+ u32 num_regs, u32 val);
+static int gaudi_schedule_register_memset(struct hl_device *hdev,
+ u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
u32 tpc_id);
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -913,11 +917,17 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
struct gaudi_hw_sob_group *hw_sob_group =
container_of(ref, struct gaudi_hw_sob_group, kref);
struct hl_device *hdev = hw_sob_group->hdev;
- int i;
+ u64 base_addr;
+ int rc;
- for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
- WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
- (hw_sob_group->base_sob_id + i) * 4, 0);
+ base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+ hw_sob_group->base_sob_id * 4;
+ rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
+ base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
+ if (rc)
+ dev_err(hdev->dev,
+ "failed resetting sob group - sob base %u, count %u",
+ hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
kref_init(&hw_sob_group->kref);
}
@@ -1008,6 +1018,8 @@ static void gaudi_collective_master_init_job(struct hl_device *hdev,
cprop->hw_sob_group[sob_group_offset].base_sob_id;
master_monitor = prop->collective_mstr_mon_id[0];
+ cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
+
dev_dbg(hdev->dev,
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
master_sob_base, cprop->mstr_sob_mask[0],
@@ -5583,31 +5595,206 @@ release_cb:
return rc;
}
-static void gaudi_restore_sm_registers(struct hl_device *hdev)
+static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
+ u32 num_regs, u32 val)
+{
+ struct packet_msg_long *pkt;
+ struct hl_cs_job *job;
+ u32 cb_size, ctl;
+ struct hl_cb *cb;
+ int i, rc;
+
+ cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
+
+ if (cb_size > SZ_2M) {
+ dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
+ return -ENOMEM;
+ }
+
+ cb = hl_cb_kernel_create(hdev, cb_size, false);
+ if (!cb)
+ return -EFAULT;
+
+ pkt = cb->kernel_address;
+
+ ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
+ for (i = 0; i < num_regs ; i++, pkt++) {
+ pkt->ctl = cpu_to_le32(ctl);
+ pkt->value = cpu_to_le32(val);
+ pkt->addr = cpu_to_le64(reg_base + (i * 4));
+ }
+
+ job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+ if (!job) {
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ rc = -ENOMEM;
+ goto release_cb;
+ }
+
+ job->id = 0;
+ job->user_cb = cb;
+ atomic_inc(&job->user_cb->cs_cnt);
+ job->user_cb_size = cb_size;
+ job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = cb_size;
+
+ hl_debugfs_add_job(hdev, job);
+
+ rc = gaudi_send_job_on_qman0(hdev, job);
+ hl_debugfs_remove_job(hdev, job);
+ kfree(job);
+ atomic_dec(&cb->cs_cnt);
+
+release_cb:
+ hl_cb_put(cb);
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ return rc;
+}
+
+static int gaudi_schedule_register_memset(struct hl_device *hdev,
+ u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
{
+ struct hl_ctx *ctx = hdev->compute_ctx;
+ struct hl_pending_cb *pending_cb;
+ struct packet_msg_long *pkt;
+ u32 cb_size, ctl;
+ struct hl_cb *cb;
int i;
- for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
- WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
- WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
- WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
+ /* If no compute context available or context is going down
+ * memset registers directly
+ */
+ if (!ctx || kref_read(&ctx->refcount) == 0)
+ return gaudi_memset_registers(hdev, reg_base, num_regs, val);
+
+ cb_size = (sizeof(*pkt) * num_regs) +
+ sizeof(struct packet_msg_prot) * 2;
+
+ if (cb_size > SZ_2M) {
+ dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
+ return -ENOMEM;
+ }
+
+ pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
+ if (!pending_cb)
+ return -ENOMEM;
+
+ cb = hl_cb_kernel_create(hdev, cb_size, false);
+ if (!cb) {
+ kfree(pending_cb);
+ return -EFAULT;
+ }
+
+ pkt = cb->kernel_address;
+
+ ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
+ for (i = 0; i < num_regs ; i++, pkt++) {
+ pkt->ctl = cpu_to_le32(ctl);
+ pkt->value = cpu_to_le32(val);
+ pkt->addr = cpu_to_le64(reg_base + (i * 4));
+ }
+
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ pending_cb->cb = cb;
+ pending_cb->cb_size = cb_size;
+ /* The queue ID MUST be an external queue ID. Otherwise, we will
+ * have undefined behavior
+ */
+ pending_cb->hw_queue_id = hw_queue_id;
+
+ spin_lock(&ctx->pending_cb_lock);
+ list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
+ spin_unlock(&ctx->pending_cb_lock);
+
+ return 0;
+}
+
+static int gaudi_restore_sm_registers(struct hl_device *hdev)
+{
+ u64 base_addr;
+ u32 num_regs;
+ int rc;
+
+ base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
+ num_regs = NUM_OF_SOB_IN_BLOCK;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
+ }
+
+ base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
+ num_regs = NUM_OF_SOB_IN_BLOCK;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
+ }
+
+ base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
+ num_regs = NUM_OF_SOB_IN_BLOCK;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
}
- for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
- WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
- WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
- WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
+ base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
+ num_regs = NUM_OF_MONITORS_IN_BLOCK;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
}
- i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
+ base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
+ num_regs = NUM_OF_MONITORS_IN_BLOCK;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
+ }
- for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
- WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
+ base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
+ num_regs = NUM_OF_MONITORS_IN_BLOCK;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
+ }
+
+ base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+ (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
+ num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
+ }
- i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
+ base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
+ (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
+ num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
+ rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+ if (rc) {
+ dev_err(hdev->dev, "failed resetting SM registers");
+ return -ENOMEM;
+ }
- for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
- WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
+ return 0;
}
static void gaudi_restore_dma_registers(struct hl_device *hdev)
@@ -5664,18 +5851,23 @@ static void gaudi_restore_qm_registers(struct hl_device *hdev)
}
}
-static void gaudi_restore_user_registers(struct hl_device *hdev)
+static int gaudi_restore_user_registers(struct hl_device *hdev)
{
- gaudi_restore_sm_registers(hdev);
+ int rc;
+
+ rc = gaudi_restore_sm_registers(hdev);
+ if (rc)
+ return rc;
+
gaudi_restore_dma_registers(hdev);
gaudi_restore_qm_registers(hdev);
+
+ return 0;
}
static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
{
- gaudi_restore_user_registers(hdev);
-
- return 0;
+ return gaudi_restore_user_registers(hdev);
}
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
@@ -8203,12 +8395,16 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
static void gaudi_reset_sob(struct hl_device *hdev, void *data)
{
struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
+ int rc;
dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
hw_sob->sob_id);
- WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
- 0);
+ rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
+ CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+ hw_sob->sob_id * 4, 1, 0);
+ if (rc)
+ dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
kref_init(&hw_sob->kref);
}