diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
136 files changed, 10151 insertions, 4378 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index ca0e435559d5..c2bbcdd9c875 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -120,6 +120,7 @@ amdgpu-y += \ amdgpu_rlc.o \ gfx_v8_0.o \ gfx_v9_0.o \ + gfx_v9_4.o \ gfx_v10_0.o # add async DMA block @@ -147,12 +148,16 @@ amdgpu-y += \ vce_v3_0.o \ vce_v4_0.o -# add VCN block +# add VCN and JPEG block amdgpu-y += \ amdgpu_vcn.o \ vcn_v1_0.o \ vcn_v2_0.o \ - vcn_v2_5.o + vcn_v2_5.o \ + amdgpu_jpeg.o \ + jpeg_v1_0.o \ + jpeg_v2_0.o \ + jpeg_v2_5.o # add ATHUB block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0c229a92a24b..da3bcff61b97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -69,6 +69,7 @@ #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "amdgpu_vcn.h" +#include "amdgpu_jpeg.h" #include "amdgpu_mn.h" #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" @@ -89,6 +90,7 @@ #include "amdgpu_mes.h" #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" +#include "amdgpu_df.h" #define MAX_GPU_INSTANCE 16 @@ -588,6 +590,8 @@ struct amdgpu_asic_funcs { bool (*need_reset_on_init)(struct amdgpu_device *adev); /* PCIe replay counter */ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); + /* device supports BACO */ + bool (*supports_baco)(struct amdgpu_device *adev); }; /* @@ -633,9 +637,8 @@ struct amdgpu_fw_vram_usage { struct amdgpu_bo *reserved_bo; void *va; - /* Offset on the top of VRAM, used as c2p write buffer. + /* GDDR6 training support flag. */ - u64 mem_train_fb_loc; bool mem_train_support; }; @@ -662,29 +665,6 @@ struct amdgpu_mmio_remap { resource_size_t bus_addr; }; -struct amdgpu_df_funcs { - void (*sw_init)(struct amdgpu_device *adev); - void (*sw_fini)(struct amdgpu_device *adev); - void (*enable_broadcast_mode)(struct amdgpu_device *adev, - bool enable); - u32 (*get_fb_channel_number)(struct amdgpu_device *adev); - u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, - bool enable); - int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, - int is_enable); - int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, - int is_disable); - void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, - uint64_t *count); - uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); - void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, - uint32_t ficadl_val, uint32_t ficadh_val); -}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -704,6 +684,7 @@ enum amd_hw_ip_block_type { MP1_HWIP, UVD_HWIP, VCN_HWIP = UVD_HWIP, + JPEG_HWIP = VCN_HWIP, VCE_HWIP, DF_HWIP, DCE_HWIP, @@ -899,6 +880,9 @@ struct amdgpu_device { /* vcn */ struct amdgpu_vcn vcn; + /* jpeg */ + struct amdgpu_jpeg jpeg; + /* firmwares */ struct amdgpu_firmware firmware; @@ -924,6 +908,9 @@ struct amdgpu_device { bool enable_mes; struct amdgpu_mes mes; + /* df */ + struct amdgpu_df df; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -937,8 +924,6 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_df_funcs *df_funcs; - /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; @@ -982,6 +967,11 @@ struct amdgpu_device { /* device pstate */ int pstate; + /* enable runtime pm on the device */ + bool runpm; + + bool pm_sysfs_en; + bool ucode_sysfs_en; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1019,10 +1009,14 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define AMDGPU_REGS_IDX (1<<0) #define AMDGPU_REGS_NO_KIQ (1<<1) +#define AMDGPU_REGS_KIQ (1<<2) #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ) +#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ) + #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1117,6 +1111,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev)) + #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ @@ -1133,9 +1129,12 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); -bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_supports_boco(struct drm_device *dev); +bool amdgpu_device_supports_baco(struct drm_device *dev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_device_baco_enter(struct drm_device *dev); +int amdgpu_device_baco_exit(struct drm_device *dev); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) @@ -1173,8 +1172,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); -int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon); -int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); +int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); +int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 82155ac3288a..12247a32f9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -527,7 +527,7 @@ static int acp_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = state == AMD_PG_STATE_GATE ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..8609287620ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -613,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (is_support_sw_smu(adev)) - smu_switch_power_profile(&adev->smu, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) @@ -634,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index b6713e0ed1b2..4bcc175a149d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -46,6 +46,8 @@ #include "soc15.h" #include "soc15d.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v9_4.h" #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -69,32 +71,56 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { - SOC15_REG_OFFSET(SDMA0, 0, - mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, - mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA2, 0, - mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA3, 0, - mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA4, 0, - mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA5, 0, - mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA6, 0, - mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA7, 0, - mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL - }; - - uint32_t retval = sdma_engine_reg_base[engine_id] + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + /* fall through */ + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, @@ -258,11 +284,28 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } +static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); + + gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); +} + const struct kfd2kgd_calls arcturus_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -277,8 +320,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, - .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 61cd707158e4..a7b17c8deb00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -107,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -268,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -332,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uint64_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uint64_t)wptr)); - pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -350,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v10_compute_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + static int kgd_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -686,71 +725,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); - return 0; -} - static int kgd_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -817,6 +791,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, + .hiq_mqd_load = kgd_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -832,7 +807,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6e6f0a99ec06..8f052e98a3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid\n"); - return 0; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - /** * read_vmid_from_vmfault_reg - read vmid from register * @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index bfbddedb2380..19a10db93d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return -EINVAL; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 47c853ef1051..8562afe5b761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -40,7 +40,6 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" -#include "gmc_v9_0.h" enum hqd_dequeue_request_type { @@ -104,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -259,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -324,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -340,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v9_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -618,100 +655,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid, i; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - uint32_t flush_type = 0; - - if (adev->in_gpu_reset) - return -EIO; - if (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20) - flush_type = 2; - - if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid, flush_type); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; - } - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int i; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - - return 0; -} - int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -758,8 +701,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, return 0; } -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, + uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -769,16 +712,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi return; } - /* TODO: take advantage of per-process address space size. For - * now, all processes share the same address space size, like - * on GFX8 and older. - */ - if (adev->asic_type == CHIP_ARCTURUS) { - /* Two MMHUBs */ - mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); - mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); - } else - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } @@ -788,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -803,7 +738,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index d9e9ad22b2bd..63d3e6683dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); @@ -57,9 +60,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, uint8_t vmid, uint16_t *p_pasid); -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 888209eb8cec..fa8ac9d19a7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -85,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, } /* Set memory usage limits. Current, limits are - * System (TTM + userptr) memory - 3/4th System RAM + * System (TTM + userptr) memory - 15/16th System RAM * TTM memory - 3/8th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) @@ -98,7 +98,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), @@ -358,7 +358,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(NULL, sync, vm->last_update, false); + return amdgpu_sync_fence(sync, vm->last_update, false); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -750,7 +750,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + amdgpu_sync_fence(sync, bo_va->last_pt_update, false); return 0; } @@ -769,7 +769,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -1674,10 +1674,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) { struct amdkfd_process_info *process_info = mem->process_info; - int invalid, evicted_bos; + int evicted_bos; int r = 0; - invalid = atomic_inc_return(&mem->invalid); + atomic_inc(&mem->invalid); evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ @@ -2048,7 +2048,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; @@ -2129,6 +2129,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem return -ENOMEM; mutex_init(&(*mem)->lock); + INIT_LIST_HEAD(&(*mem)->bo_va_list); (*mem)->bo = amdgpu_bo_ref(gws_bo); (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; (*mem)->process_info = process_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 72232fccf61a..fdd52d86a4d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -338,17 +338,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -373,15 +365,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; @@ -2038,7 +2022,7 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) if (adev->is_atom_fw) { amdgpu_atomfirmware_scratch_regs_init(adev); amdgpu_atomfirmware_allocate_fb_scratch(adev); - ret = amdgpu_atomfirmware_get_mem_train_fb_loc(adev); + ret = amdgpu_atomfirmware_get_mem_train_info(adev); if (ret) { DRM_ERROR("Failed to get mem train fb location.\n"); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index ff4eb96bdfb5..58f9d8c3a17a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -525,16 +525,12 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev) return ret; } -int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev) +int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; - unsigned char *bios = ctx->bios; - struct vram_reserve_block *reserved_block; - int index, block_number; + int index; uint8_t frev, crev; uint16_t data_offset, size; - uint32_t start_address_in_kb; - uint64_t offset; int ret; adev->fw_vram_usage.mem_train_support = false; @@ -569,32 +565,6 @@ int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev) return -EINVAL; } - reserved_block = (struct vram_reserve_block *) - (bios + data_offset + sizeof(struct atom_common_table_header)); - block_number = ((unsigned int)size - sizeof(struct atom_common_table_header)) - / sizeof(struct vram_reserve_block); - reserved_block += (block_number > 0) ? block_number-1 : 0; - DRM_DEBUG("block_number:0x%04x, last block: 0x%08xkb sz, %dkb fw, %dkb drv.\n", - block_number, - le32_to_cpu(reserved_block->start_address_in_kb), - le16_to_cpu(reserved_block->used_by_firmware_in_kb), - le16_to_cpu(reserved_block->used_by_driver_in_kb)); - if (reserved_block->used_by_firmware_in_kb > 0) { - start_address_in_kb = le32_to_cpu(reserved_block->start_address_in_kb); - offset = (uint64_t)start_address_in_kb * ONE_KiB; - if ((offset & (ONE_MiB - 1)) < (4 * ONE_KiB + 1) ) { - offset -= ONE_MiB; - } - - offset &= ~(ONE_MiB - 1); - adev->fw_vram_usage.mem_train_fb_loc = offset; - adev->fw_vram_usage.mem_train_support = true; - DRM_DEBUG("mem_train_fb_loc:0x%09llx.\n", offset); - ret = 0; - } else { - DRM_ERROR("used_by_firmware_in_kb is 0!\n"); - ret = -EINVAL; - } - - return ret; + adev->fw_vram_usage.mem_train_support = true; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index f871af5ea6f3..434fe2fa0089 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -31,7 +31,7 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, int *vram_width, int *vram_type, int *vram_vendor); -int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5ca905b4a0fb..a52a084158b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -795,29 +795,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, - fpriv->prt_va->last_pt_update, false); + r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); if (r) return r; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { - struct dma_fence *f; - bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_fence *f; - /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); if (!bo) @@ -831,8 +825,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -845,7 +838,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); + r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); if (r) return r; @@ -916,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (parser->entity && parser->entity != entity) return -EINVAL; + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + parser->entity = entity; ring = to_amdgpu_ring(entity->rq->sched); @@ -987,7 +985,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence, true); dma_fence_put(fence); if (r) return r; @@ -1009,7 +1007,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence, true); dma_fence_put(fence); return r; @@ -1236,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, goto error_abort; } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6614d8a6f4c8..94a6c42f29ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_JPEG] = 1, }; -static int amdgpu_ctx_total_num_entities(void) -{ - unsigned i, num_entities = 0; - - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) - num_entities += amdgpu_ctx_num_entities[i]; - - return num_entities; -} - static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum drm_sched_priority priority) { + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) + return -EINVAL; + /* NORMAL and below are accessible by everyone */ if (priority <= DRM_SCHED_PRIORITY_NORMAL) return 0; @@ -68,47 +61,94 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring) +{ + struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_entity *entity; + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; + unsigned num_scheds = 0; + enum drm_sched_priority priority; + int r; + + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), + GFP_KERNEL); + if (!entity) + return -ENOMEM; + + entity->sequence = 1; + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + sched = &adev->gfx.gfx_ring[0].sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_COMPUTE: + scheds = adev->gfx.compute_sched; + num_scheds = adev->gfx.num_compute_sched; + break; + case AMDGPU_HW_IP_DMA: + scheds = adev->sdma.sdma_sched; + num_scheds = adev->sdma.num_sdma_sched; + break; + case AMDGPU_HW_IP_UVD: + sched = &adev->uvd.inst[0].ring.sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_VCE: + sched = &adev->vce.ring[0].sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_UVD_ENC: + sched = &adev->uvd.inst[0].ring_enc[0].sched; + scheds = &sched; + num_scheds = 1; + break; + case AMDGPU_HW_IP_VCN_DEC: + scheds = adev->vcn.vcn_dec_sched; + num_scheds = adev->vcn.num_vcn_dec_sched; + break; + case AMDGPU_HW_IP_VCN_ENC: + scheds = adev->vcn.vcn_enc_sched; + num_scheds = adev->vcn.num_vcn_enc_sched; + break; + case AMDGPU_HW_IP_VCN_JPEG: + scheds = adev->jpeg.jpeg_sched; + num_scheds = adev->jpeg.num_jpeg_sched; + break; + } + + r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds, + &ctx->guilty); + if (r) + goto error_free_entity; + + ctx->entities[hw_ip][ring] = entity; + return 0; + +error_free_entity: + kfree(entity); + + return r; +} + static int amdgpu_ctx_init(struct amdgpu_device *adev, enum drm_sched_priority priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); - unsigned i, j, k; int r; - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) - return -EINVAL; - r = amdgpu_ctx_priority_permit(filp, priority); if (r) return r; memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - - ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, - sizeof(struct dma_fence*), GFP_KERNEL); - if (!ctx->fences) - return -ENOMEM; - - ctx->entities[0] = kcalloc(num_entities, - sizeof(struct amdgpu_ctx_entity), - GFP_KERNEL); - if (!ctx->entities[0]) { - r = -ENOMEM; - goto error_free_fences; - } - - for (i = 0; i < num_entities; ++i) { - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; - entity->sequence = 1; - entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; - } - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) - ctx->entities[i] = ctx->entities[i - 1] + - amdgpu_ctx_num_entities[i - 1]; + ctx->adev = adev; kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); @@ -120,114 +160,49 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->init_priority = priority; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; - struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings = 0; - unsigned num_rqs = 0; - - switch (i) { - case AMDGPU_HW_IP_GFX: - rings[0] = &adev->gfx.gfx_ring[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_COMPUTE: - for (j = 0; j < adev->gfx.num_compute_rings; ++j) - rings[j] = &adev->gfx.compute_ring[j]; - num_rings = adev->gfx.num_compute_rings; - break; - case AMDGPU_HW_IP_DMA: - for (j = 0; j < adev->sdma.num_instances; ++j) - rings[j] = &adev->sdma.instance[j].ring; - num_rings = adev->sdma.num_instances; - break; - case AMDGPU_HW_IP_UVD: - rings[0] = &adev->uvd.inst[0].ring; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCE: - rings[0] = &adev->vce.ring[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_UVD_ENC: - rings[0] = &adev->uvd.inst[0].ring_enc[0]; - num_rings = 1; - break; - case AMDGPU_HW_IP_VCN_DEC: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; - rings[num_rings++] = &adev->vcn.inst[j].ring_dec; - } - break; - case AMDGPU_HW_IP_VCN_ENC: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; - for (k = 0; k < adev->vcn.num_enc_rings; ++k) - rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; - } - break; - case AMDGPU_HW_IP_VCN_JPEG: - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; - rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; - } - break; - } + return 0; - for (j = 0; j < num_rings; ++j) { - if (!rings[j]->adev) - continue; +} - rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority]; - } +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +{ - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) - r = drm_sched_entity_init(&ctx->entities[i][j].entity, - rqs, num_rqs, &ctx->guilty); - if (r) - goto error_cleanup_entities; - } + int i; - return 0; + if (!entity) + return; -error_cleanup_entities: - for (i = 0; i < num_entities; ++i) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); - kfree(ctx->entities[0]); + for (i = 0; i < amdgpu_sched_jobs; ++i) + dma_fence_put(entity->fences[i]); -error_free_fences: - kfree(ctx->fences); - ctx->fences = NULL; - return r; + kfree(entity); } static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; - for (i = 0; i < num_entities; ++i) - for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(ctx->entities[0][i].fences[j]); - kfree(ctx->fences); - kfree(ctx->entities[0]); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { + amdgpu_ctx_fini_entity(ctx->entities[i][j]); + ctx->entities[i][j] = NULL; + } + } mutex_destroy(&ctx->lock); - kfree(ctx); } int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity) { + int r; + if (hw_ip >= AMDGPU_HW_IP_NUM) { DRM_ERROR("unknown HW IP type: %d\n", hw_ip); return -EINVAL; @@ -244,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return -EINVAL; } - *entity = &ctx->entities[hw_ip][ring].entity; + if (ctx->entities[hw_ip][ring] == NULL) { + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); + if (r) + return r; + } + + *entity = &ctx->entities[hw_ip][ring]->entity; return 0; } @@ -284,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; - unsigned num_entities; - u32 i; + u32 i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + if (!ctx->entities[i][j]) + continue; - num_entities = amdgpu_ctx_total_num_entities(); - for (i = 0; i < num_entities; i++) - drm_sched_entity_destroy(&ctx->entities[0][i].entity); + drm_sched_entity_destroy(&ctx->entities[i][j]->entity); + } + } amdgpu_ctx_fini(ref); } @@ -521,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); enum drm_sched_priority ctx_prio; - unsigned i; + unsigned i, j; ctx->override_priority = priority; ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity = &ctx->entities[0][i].entity; + if (!ctx->entities[i][j]) + continue; - drm_sched_entity_set_priority(entity, ctx_prio); + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_set_priority(entity, ctx_prio); + } } } @@ -569,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - for (i = 0; i < num_entities; i++) { - struct drm_sched_entity *entity; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; - entity = &ctx->entities[0][i].entity; - timeout = drm_sched_entity_flush(entity, timeout); + entity = &ctx->entities[i][j]->entity; + timeout = drm_sched_entity_flush(entity, timeout); + } } } mutex_unlock(&mgr->lock); @@ -591,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { - unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; - uint32_t id, i; + uint32_t id, i, j; idp = &mgr->ctx_handles; @@ -604,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) - drm_sched_entity_fini(&ctx->entities[0][i].entity); + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct drm_sched_entity *entity; + + if (!ctx->entities[i][j]) + continue; + + entity = &ctx->entities[i][j]->entity; + drm_sched_entity_fini(entity); + } + } } } @@ -627,3 +627,45 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } + +void amdgpu_ctx_init_sched(struct amdgpu_device *adev) +{ + int i, j; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched; + adev->gfx.num_gfx_sched++; + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched; + adev->gfx.num_compute_sched++; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched; + adev->sdma.num_sdma_sched++; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] = + &adev->vcn.inst[i].ring_dec.sched; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) + adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] = + &adev->vcn.inst[i].ring_enc[j].sched; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] = + &adev->jpeg.inst[i].ring_dec.sched; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index da808633732b..de490f183af2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -29,10 +29,12 @@ struct drm_device; struct drm_file; struct amdgpu_fpriv; +#define AMDGPU_MAX_ENTITY_NUM 4 + struct amdgpu_ctx_entity { uint64_t sequence; - struct dma_fence **fences; struct drm_sched_entity entity; + struct dma_fence *fences[]; }; struct amdgpu_ctx { @@ -42,8 +44,7 @@ struct amdgpu_ctx { unsigned reset_counter_query; uint32_t vram_lost_counter; spinlock_t ring_lock; - struct dma_fence **fences; - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; bool preamble_presented; enum drm_sched_priority init_priority; enum drm_sched_priority override_priority; @@ -87,4 +88,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_init_sched(struct amdgpu_device *adev); + + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e6726e0d035..f24ed9a1a3e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -26,6 +26,7 @@ #include <linux/kthread.h> #include <linux/pci.h> #include <linux/uaccess.h> +#include <linux/pm_runtime.h> #include <drm/drm_debugfs.h> @@ -129,7 +130,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, sh_bank = 0xFFFFFFFF; if (instance_bank == 0x3FF) instance_bank = 0xFFFFFFFF; - use_bank = 1; + use_bank = true; } else if (*pos & (1ULL << 61)) { me = (*pos & GENMASK_ULL(33, 24)) >> 24; @@ -137,17 +138,24 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, queue = (*pos & GENMASK_ULL(53, 44)) >> 44; vmid = (*pos & GENMASK_ULL(58, 54)) >> 54; - use_ring = 1; + use_ring = true; } else { - use_bank = use_ring = 0; + use_bank = use_ring = false; } *pos &= (1UL << 22) - 1; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, sh_bank, instance_bank); @@ -193,6 +201,9 @@ end: if (pm_pg_lock) mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_PCIE(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_PCIE(*pos >> 2, value); @@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_DIDT(*pos >> 2, value); @@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_SMC(*pos, value); @@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, wave = (*pos & GENMASK_ULL(36, 31)) >> 31; simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (!x) return -EINVAL; @@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (!data) return -ENOMEM; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + while (size) { uint32_t value; @@ -859,6 +953,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; + /* Avoid accidently unparking the sched thread during GPU reset */ mutex_lock(&adev->lock_reset); @@ -889,6 +987,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) mutex_unlock(&adev->lock_reset); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -907,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -917,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c17505fba988..39cd545976b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -66,6 +66,7 @@ #include "amdgpu_pmu.h" #include <linux/suspend.h> +#include <drm/task_barrier.h> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); @@ -137,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO, static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** - * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control + * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control * * @dev: drm_device pointer * * Returns true if the device is a dGPU with HG/PX power control, * otherwise return false. */ -bool amdgpu_device_is_px(struct drm_device *dev) +bool amdgpu_device_supports_boco(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -154,6 +155,21 @@ bool amdgpu_device_is_px(struct drm_device *dev) } /** + * amdgpu_device_supports_baco - Does the device support BACO + * + * @dev: drm_device pointer + * + * Returns true if the device supporte BACO, + * otherwise return false. + */ +bool amdgpu_device_supports_baco(struct drm_device *dev) +{ + struct amdgpu_device *adev = dev->dev_private; + + return amdgpu_asic_supports_baco(adev); +} + +/** * VRAM access helper functions. * * amdgpu_device_vram_access - read/write a buffer in vram @@ -200,8 +216,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - return amdgpu_virt_kiq_rreg(adev, reg); + if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) + return amdgpu_kiq_rreg(adev, reg); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -278,8 +294,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, adev->last_mm_index = v; } - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) - return amdgpu_virt_kiq_wreg(adev, reg, v); + if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) + return amdgpu_kiq_wreg(adev, reg, v); if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -969,7 +985,7 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) { struct sysinfo si; - bool is_os_64 = (sizeof(void *) == 8) ? true : false; + bool is_os_64 = (sizeof(void *) == 8); uint64_t total_memory; uint64_t dram_size_seven_GB = 0x1B8000000; uint64_t dram_size_three_GB = 0xB8000000; @@ -1016,8 +1032,6 @@ def_value: */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { - int ret = 0; - if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1057,7 +1071,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - return ret; + return 0; } /** @@ -1072,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) { struct drm_device *dev = pci_get_drvdata(pdev); + int r; - if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) + if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) return; if (state == VGA_SWITCHEROO_ON) { @@ -1081,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero /* don't suspend or resume card normally */ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - amdgpu_device_resume(dev, true, true); + pci_set_power_state(dev->pdev, PCI_D0); + pci_restore_state(dev->pdev); + r = pci_enable_device(dev->pdev); + if (r) + DRM_WARN("pci_enable_device failed (%d)\n", r); + amdgpu_device_resume(dev, true); dev->switch_power_state = DRM_SWITCH_POWER_ON; drm_kms_helper_poll_enable(dev); @@ -1089,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero pr_info("amdgpu: switched off\n"); drm_kms_helper_poll_disable(dev); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - amdgpu_device_suspend(dev, true, true); + amdgpu_device_suspend(dev, true); + pci_save_state(dev->pdev); + /* Shut down the device */ + pci_disable_device(dev->pdev); + pci_set_power_state(dev->pdev, PCI_D3cold); dev->switch_power_state = DRM_SWITCH_POWER_OFF; } } @@ -1527,7 +1551,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } parse_soc_bounding_box: -#ifdef CONFIG_DRM_AMD_DC_DCN2_0 /* * soc bounding box info is not integrated in disocovery table, * we always need to parse it from gpu info firmware. @@ -1538,7 +1561,6 @@ parse_soc_bounding_box: le32_to_cpu(hdr->header.ucode_array_offset_bytes)); adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; } -#endif break; } default: @@ -1787,7 +1809,8 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) } } - r = amdgpu_pm_load_smu_firmware(adev, &smu_version); + if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) + r = amdgpu_pm_load_smu_firmware(adev, &smu_version); return r; } @@ -1854,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_init_data_exchange(adev); + r = amdgpu_ib_pool_init(adev); if (r) { dev_err(adev->dev, "IB initialization failed (%d).\n", r); @@ -1895,11 +1921,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_amdkfd_device_init(adev); init_failed: - if (amdgpu_sriov_vf(adev)) { - if (!r) - amdgpu_virt_init_data_exchange(adev); + if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, true); - } return r; } @@ -1938,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * amdgpu_device_set_cg_state - set clockgating for amdgpu device * * @adev: amdgpu_device pointer + * @state: clockgating state (gate or ungate) * * The list of all the hardware IPs that make up the asic is walked and the * set_clockgating_state callbacks are run. @@ -1962,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1992,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, @@ -2319,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; /* handle putting the SMC in the appropriate state */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - if (is_support_sw_smu(adev)) { - r = smu_set_mp1_state(&adev->smu, adev->mp1_state); - } else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_mp1_state) { - r = adev->powerplay.pp_funcs->set_mp1_state( - adev->powerplay.pp_handle, - adev->mp1_state); - } + r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { DRM_ERROR("SMC failed to set mp1 state %d, %d\n", adev->mp1_state, r); @@ -2413,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_SDMA, AMD_IP_BLOCK_TYPE_UVD, - AMD_IP_BLOCK_TYPE_VCE + AMD_IP_BLOCK_TYPE_VCE, + AMD_IP_BLOCK_TYPE_VCN }; for (i = 0; i < ARRAY_SIZE(ip_order); i++) { @@ -2428,7 +2448,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) block->status.hw) continue; - r = block->version->funcs->hw_init(adev); + if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) + r = block->version->funcs->resume(adev); + else + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; @@ -2600,20 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#if defined(CONFIG_DRM_AMD_DC_DCN) case CHIP_RAVEN: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) case CHIP_RENOIR: #endif return amdgpu_dc != 0; #endif default: + if (amdgpu_dc > 0) + DRM_INFO("Display Core has been requested via kernel parameter " + "but isn't supported by ASIC, ignoring\n"); return false; } } @@ -2638,8 +2661,38 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) { struct amdgpu_device *adev = container_of(__work, struct amdgpu_device, xgmi_reset_work); + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + + /* It's a bug to not have a hive within this function */ + if (WARN_ON(!hive)) + return; + + /* + * Use task barrier to synchronize all xgmi reset works across the + * hive. task_barrier_enter and task_barrier_exit will block + * until all the threads running the xgmi reset works reach + * those points. task_barrier_full will do both blocks. + */ + if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + + task_barrier_enter(&hive->tb); + adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); + + if (adev->asic_reset_res) + goto fail; + + task_barrier_exit(&hive->tb); + adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); + + if (adev->asic_reset_res) + goto fail; + } else { + + task_barrier_full(&hive->tb); + adev->asic_reset_res = amdgpu_asic_reset(adev); + } - adev->asic_reset_res = amdgpu_asic_reset(adev); +fail: if (adev->asic_reset_res) DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev->ddev->unique); @@ -2731,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags) { int r, i; - bool runtime = false; + bool boco = false; u32 max_MBps; adev->shutdown = false; @@ -2754,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; - adev->vm_manager.vm_pte_num_rqs = 0; + adev->vm_manager.vm_pte_num_scheds = 0; adev->gmc.gmc_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -2794,9 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); - mutex_init(&adev->notifier_lock); - mutex_init(&adev->virt.dpm_mutex); mutex_init(&adev->psp.mutex); + mutex_init(&adev->notifier_lock); r = amdgpu_device_check_arguments(adev); if (r) @@ -2902,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, * ignore it */ vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); - if (amdgpu_device_is_px(ddev)) - runtime = true; - if (!pci_is_thunderbolt_attached(adev->pdev)) + if (amdgpu_device_supports_boco(ddev)) + boco = true; + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + !pci_is_thunderbolt_attached(adev->pdev)) vga_switcheroo_register_client(adev->pdev, - &amdgpu_switcheroo_ops, runtime); - if (runtime) + &amdgpu_switcheroo_ops, boco); + if (boco) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); if (amdgpu_emu_mode == 1) { @@ -2994,11 +3049,17 @@ fence_driver_init: } dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); - if (amdgpu_virt_request_full_gpu(adev, false)) - amdgpu_virt_release_full_gpu(adev, false); goto failed; } + DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se, + adev->gfx.config.max_cu_per_sh, + adev->gfx.cu_info.number); + + amdgpu_ctx_init_sched(adev); + adev->accel_working = true; amdgpu_vm_check_compute_bug(adev); @@ -3013,16 +3074,19 @@ fence_driver_init: amdgpu_fbdev_init(adev); - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) - amdgpu_pm_virt_sysfs_init(adev); - r = amdgpu_pm_sysfs_init(adev); - if (r) + if (r) { + adev->pm_sysfs_en = false; DRM_ERROR("registering pm debugfs failed (%d).\n", r); + } else + adev->pm_sysfs_en = true; r = amdgpu_ucode_sysfs_init(adev); - if (r) + if (r) { + adev->ucode_sysfs_en = false; DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + } else + adev->ucode_sysfs_en = true; r = amdgpu_debugfs_gem_init(adev); if (r) @@ -3091,7 +3155,7 @@ fence_driver_init: failed: amdgpu_vf_error_trans_all(adev); - if (runtime) + if (boco) vga_switcheroo_fini_domain_pm_ops(adev->dev); return r; @@ -3122,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) drm_atomic_helper_shutdown(adev->ddev); } amdgpu_fence_driver_fini(adev); - amdgpu_pm_sysfs_fini(adev); + if (adev->pm_sysfs_en) + amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_device_ip_fini(adev); if (adev->firmware.gpu_info_fw) { @@ -3139,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) kfree(adev->bios); adev->bios = NULL; - if (!pci_is_thunderbolt_attached(adev->pdev)) + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + !pci_is_thunderbolt_attached(adev->pdev)) vga_switcheroo_unregister_client(adev->pdev); - if (adev->flags & AMD_IS_PX) + if (amdgpu_device_supports_boco(adev->ddev)) vga_switcheroo_fini_domain_pm_ops(adev->dev); vga_client_register(adev->pdev, NULL, NULL, NULL); if (adev->rio_mem) @@ -3150,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) - amdgpu_pm_virt_sysfs_fini(adev); amdgpu_debugfs_regs_cleanup(adev); device_remove_file(adev->dev, &dev_attr_pcie_replay_count); - amdgpu_ucode_sysfs_fini(adev); + if (adev->ucode_sysfs_en) + amdgpu_ucode_sysfs_fini(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); amdgpu_debugfs_preempt_cleanup(adev); @@ -3178,7 +3245,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) * Returns 0 for success or an error on failure. * Called at driver suspend. */ -int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) +int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev; struct drm_crtc *crtc; @@ -3261,13 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - if (suspend) { - pci_save_state(dev->pdev); - /* Shut down the device */ - pci_disable_device(dev->pdev); - pci_set_power_state(dev->pdev, PCI_D3hot); - } - return 0; } @@ -3282,7 +3342,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) * Returns 0 for success or an error on failure. * Called at driver resume. */ -int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) +int amdgpu_device_resume(struct drm_device *dev, bool fbcon) { struct drm_connector *connector; struct drm_connector_list_iter iter; @@ -3293,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (resume) { - pci_set_power_state(dev->pdev, PCI_D0); - pci_restore_state(dev->pdev); - r = pci_enable_device(dev->pdev); - if (r) - return r; - } - /* post card */ if (amdgpu_device_need_post(adev)) { r = amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -3639,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_amdkfd_pre_reset(adev); - /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) goto error; + amdgpu_virt_init_data_exchange(adev); /* we need recover gart prior to run SMC/CP/SDMA resume */ amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); @@ -3663,7 +3714,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_amdkfd_post_reset(adev); error: - amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { amdgpu_inc_vram_lost(adev); @@ -3709,6 +3759,11 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: goto disabled; @@ -3785,7 +3840,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) r = -EALREADY; } else r = amdgpu_asic_reset(tmp_adev); @@ -3797,7 +3852,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, } } - /* For XGMI wait for all PSP resets to complete before proceed */ + /* For XGMI wait for all resets to complete before proceed */ if (!r) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { @@ -3811,6 +3866,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, } } + if (!r && amdgpu_ras_intr_triggered()) + amdgpu_ras_intr_cleared(); list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (need_full_reset) { @@ -3899,7 +3956,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) mutex_lock(&adev->lock_reset); atomic_inc(&adev->gpu_reset_counter); - adev->in_gpu_reset = 1; + adev->in_gpu_reset = true; switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; @@ -3919,7 +3976,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; - adev->in_gpu_reset = 0; + adev->in_gpu_reset = false; mutex_unlock(&adev->lock_reset); } @@ -3943,12 +4000,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_device *tmp_adev = NULL; int i, r = 0; bool in_ras_intr = amdgpu_ras_intr_triggered(); + bool use_baco = + (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? + true : false; /* * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { + if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); @@ -3959,7 +4019,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); + dev_info(adev->dev, "GPU %s begin!\n", + (in_ras_intr && !use_baco) ? "jobs stop":"reset"); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4026,7 +4087,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_unregister_gpu_instance(tmp_adev); /* disable ras on ALL IPs */ - if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!(in_ras_intr && !use_baco) && + amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -4037,13 +4099,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, job ? &job->base : NULL); - if (in_ras_intr) + if (in_ras_intr && !use_baco) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr) + if (in_ras_intr && !use_baco) goto skip_sched_resume; /* @@ -4136,7 +4198,7 @@ skip_hw_reset: skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ - if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) + if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } @@ -4285,3 +4347,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } +int amdgpu_device_baco_enter(struct drm_device *dev) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!amdgpu_device_supports_baco(adev->ddev)) + return -ENOTSUPP; + + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + + return amdgpu_dpm_baco_enter(adev); +} + +int amdgpu_device_baco_exit(struct drm_device *dev) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; + + if (!amdgpu_device_supports_baco(adev->ddev)) + return -ENOTSUPP; + + ret = amdgpu_dpm_baco_exit(adev); + if (ret) + return ret; + + if (ras && ras->supported) + adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h new file mode 100644 index 000000000000..057f6ea645d7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -0,0 +1,65 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DF_H__ +#define __AMDGPU_DF_H__ + +struct amdgpu_df_hash_status { + bool hash_64k; + bool hash_2m; + bool hash_1g; +}; + +struct amdgpu_df_funcs { + void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); + uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev, + uint32_t df_inst); + uint32_t (*get_df_inst_id)(struct amdgpu_device *adev); +}; + +struct amdgpu_df { + struct amdgpu_df_hash_status hash_status; + const struct amdgpu_df_funcs *funcs; +}; + +#endif /* __AMDGPU_DF_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 3cadb0b76f22..6d520a3eec40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -513,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * will not allow USWC mappings. * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ - if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN && - (adev->flags & AMD_IS_APU) && - (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && - amdgpu_device_asic_has_dc_support(adev->asic_type)) - domain |= AMDGPU_GEM_DOMAIN_GTT; + amdgpu_device_asic_has_dc_support(adev->asic_type)) { + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + case CHIP_RAVEN: + /* enable S/G on PCO and RV2 */ + if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + default: + break; + } + } #endif return domain; @@ -690,7 +700,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct amdgpu_encoder *amdgpu_encoder; struct drm_connector *connector; - struct amdgpu_connector *amdgpu_connector; u32 src_v = 1, dst_v = 1; u32 src_h = 1, dst_h = 1; @@ -702,7 +711,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, continue; amdgpu_encoder = to_amdgpu_encoder(encoder); connector = amdgpu_get_connector_for_encoder(encoder); - amdgpu_connector = to_amdgpu_connector(connector); /* set scaling */ if (amdgpu_encoder->rmx_type == RMX_OFF) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e2eec7b66334..a59cd47aa6c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -360,10 +360,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, return ERR_PTR(-EPERM); buf = drm_gem_prime_export(gobj, flags); - if (!IS_ERR(buf)) { - buf->file->f_mapping = gobj->dev->anon_inode->i_mapping; + if (!IS_ERR(buf)) buf->ops = &amdgpu_dmabuf_ops; - } return buf; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 9cc270efee7c..a2e8c3dfb4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -946,20 +946,63 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block bool swsmu = is_support_sw_smu(adev); switch (block_type) { - case AMD_IP_BLOCK_TYPE_GFX: case AMD_IP_BLOCK_TYPE_UVD: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: + if (swsmu) { + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) { + /* + * TODO: need a better lock mechanism + * + * Here adev->pm.mutex lock protection is enforced on + * UVD and VCE cases only. Since for other cases, there + * may be already lock protection in amdgpu_pm.c. + * This is a quick fix for the deadlock issue below. + * NFO: task ocltst:2028 blocked for more than 120 seconds. + * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu + * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + * cltst D 0 2028 2026 0x00000000 + * all Trace: + * __schedule+0x2c0/0x870 + * schedule+0x2c/0x70 + * schedule_preempt_disabled+0xe/0x10 + * __mutex_lock.isra.9+0x26d/0x4e0 + * __mutex_lock_slowpath+0x13/0x20 + * ? __mutex_lock_slowpath+0x13/0x20 + * mutex_lock+0x2f/0x40 + * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] + * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] + * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] + * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] + * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] + * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] + */ + mutex_lock(&adev->pm.mutex); + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + mutex_unlock(&adev->pm.mutex); + } + break; + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); - else + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); break; + case AMD_IP_BLOCK_TYPE_JPEG: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + break; case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: - ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); break; default: @@ -968,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block return ret; } + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + } + + return ret; +} + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_exit(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + } + + return ret; +} + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_set_mp1_state(&adev->smu, mp1_state); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + ret = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + mp1_state); + } + + return ret; +} + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + bool baco_cap; + + if (is_support_sw_smu(adev)) { + return smu_baco_is_support(smu); + } else { + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) + return false; + + if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap)) + return false; + + return baco_cap ? true : false; + } +} + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) { + return smu_mode2_reset(smu); + } else { + if (!pp_funcs || !pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return pp_funcs->asic_reset_mode_2(pp_handle); + } +} + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + dev_info(adev->dev, "GPU BACO reset\n"); + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + if (ret) + return ret; + + ret = smu_baco_exit(smu); + if (ret) + return ret; + } else { + if (!pp_funcs + || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + if (ret) + return ret; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) + ret = smu_switch_power_profile(&adev->smu, type, en); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) + ret = adev->powerplay.pp_funcs->switch_power_profile( + adev->powerplay.pp_handle, type, en); + + return ret; +} + +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate) +{ + int ret = 0; + + if (is_support_sw_smu_xgmi(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 2cfb677272af..902ca6c00cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,10 +341,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_switch_power_profile(adev, type, en) \ - ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type, en)) - #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) @@ -517,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate); + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en); + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state); + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 01a793a0cbf7..94e2fd758e01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1004,7 +1004,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ - {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, @@ -1147,7 +1147,7 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - return amdgpu_device_suspend(drm_dev, true, true); + return amdgpu_device_suspend(drm_dev, true); } static int amdgpu_pmops_resume(struct device *dev) @@ -1155,13 +1155,14 @@ static int amdgpu_pmops_resume(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); /* GPU comes up enabled by the bios on resume */ - if (amdgpu_device_is_px(drm_dev)) { + if (amdgpu_device_supports_boco(drm_dev) || + amdgpu_device_supports_baco(drm_dev)) { pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); } - return amdgpu_device_resume(drm_dev, true, true); + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_freeze(struct device *dev) @@ -1170,7 +1171,7 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_dev->dev_private; int r; - r = amdgpu_device_suspend(drm_dev, false, true); + r = amdgpu_device_suspend(drm_dev, true); if (r) return r; return amdgpu_asic_reset(adev); @@ -1180,46 +1181,66 @@ static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - return amdgpu_device_resume(drm_dev, false, true); + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - return amdgpu_device_suspend(drm_dev, true, true); + return amdgpu_device_suspend(drm_dev, true); } static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - return amdgpu_device_resume(drm_dev, false, true); + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); - int ret; + struct amdgpu_device *adev = drm_dev->dev_private; + int ret, i; - if (!amdgpu_device_is_px(drm_dev)) { + if (!adev->runpm) { pm_runtime_forbid(dev); return -EBUSY; } - drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* wait for all rings to drain before suspending */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->sched.ready) { + ret = amdgpu_fence_wait_empty(ring); + if (ret) + return -EBUSY; + } + } + + if (amdgpu_device_supports_boco(drm_dev)) + drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_kms_helper_poll_disable(drm_dev); - ret = amdgpu_device_suspend(drm_dev, false, false); - pci_save_state(pdev); - pci_disable_device(pdev); - pci_ignore_hotplug(pdev); - if (amdgpu_is_atpx_hybrid()) - pci_set_power_state(pdev, PCI_D3cold); - else if (!amdgpu_has_atpx_dgpu_power_cntl()) - pci_set_power_state(pdev, PCI_D3hot); - drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + ret = amdgpu_device_suspend(drm_dev, false); + if (amdgpu_device_supports_boco(drm_dev)) { + /* Only need to handle PCI state in the driver for ATPX + * PCI core handles it for _PR3. + */ + if (amdgpu_is_atpx_hybrid()) { + pci_ignore_hotplug(pdev); + } else { + pci_save_state(pdev); + pci_disable_device(pdev); + pci_ignore_hotplug(pdev); + pci_set_power_state(pdev, PCI_D3cold); + } + drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + } else if (amdgpu_device_supports_baco(drm_dev)) { + amdgpu_device_baco_enter(drm_dev); + } return 0; } @@ -1228,34 +1249,45 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_dev->dev_private; int ret; - if (!amdgpu_device_is_px(drm_dev)) + if (!adev->runpm) return -EINVAL; - drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - - if (amdgpu_is_atpx_hybrid() || - !amdgpu_has_atpx_dgpu_power_cntl()) - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - ret = pci_enable_device(pdev); - if (ret) - return ret; - pci_set_master(pdev); - - ret = amdgpu_device_resume(drm_dev, false, false); + if (amdgpu_device_supports_boco(drm_dev)) { + drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + + /* Only need to handle PCI state in the driver for ATPX + * PCI core handles it for _PR3. + */ + if (amdgpu_is_atpx_hybrid()) { + pci_set_master(pdev); + } else { + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + ret = pci_enable_device(pdev); + if (ret) + return ret; + pci_set_master(pdev); + } + } else if (amdgpu_device_supports_baco(drm_dev)) { + amdgpu_device_baco_exit(drm_dev); + } + ret = amdgpu_device_resume(drm_dev, false); drm_kms_helper_poll_enable(drm_dev); - drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; + if (amdgpu_device_supports_boco(drm_dev)) + drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; return 0; } static int amdgpu_pmops_runtime_idle(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_dev->dev_private; struct drm_crtc *crtc; - if (!amdgpu_device_is_px(drm_dev)) { + if (!adev->runpm) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 143753d237e7..2672dc64a310 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user) return 0; } -static struct fb_ops amdgpufb_ops = { +static const struct fb_ops amdgpufb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, .fb_open = amdgpufb_open, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 377fe20bce23..3c01252b1e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -34,6 +34,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/firmware.h> +#include <linux/pm_runtime.h> #include <drm/drm_debugfs.h> @@ -154,7 +155,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); - + pm_runtime_get_noresume(adev->ddev->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -234,6 +235,7 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) bool amdgpu_fence_process(struct amdgpu_ring *ring) { struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct amdgpu_device *adev = ring->adev; uint32_t seq, last_seq; int r; @@ -274,6 +276,8 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) BUG(); dma_fence_put(fence); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); } while (last_seq != seq); return true; @@ -737,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return 0; seq_printf(m, "gpu recover\n"); amdgpu_device_gpu_recover(adev, NULL); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 19705e399905..e01e681d2a60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -302,6 +302,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, * @pages: number of pages to bind * @pagelist: pages to bind * @dma_addr: DMA addresses of pages + * @flags: page table entry flags * * Binds the requested pages to the gart page table * (all asics). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e00b46180d2e..0f960b498792 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -296,7 +296,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); + r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); if (r) return r; @@ -321,7 +321,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -543,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!is_support_sw_smu(adev) && - (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->set_powergating_by_smu)) - return; - - mutex_lock(&adev->gfx.gfx_off_mutex); if (!enable) @@ -641,7 +635,7 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); if (adev->gfx.funcs->query_ras_error_count) adev->gfx.funcs->query_ras_error_count(adev, err_data); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } return AMDGPU_RAS_SUCCESS; } @@ -664,3 +658,95 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_rreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_rreg(ring, reg); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_read; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + return adev->wb.wb[kiq->reg_val_offs]; + +failed_kiq_read: + pr_err("failed to read reg:%x\n", reg); + return ~0; +} + +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_wreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_wreg(ring, reg, v); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_write; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_kiq_write: + pr_err("failed to write reg:%x\n", reg); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 0ae0a2715b0d..ca17ffb01301 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { @@ -90,6 +94,7 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; + uint32_t reg_val_offs; }; /* @@ -269,8 +274,12 @@ struct amdgpu_gfx { bool me_fw_write_wait; bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; + struct drm_gpu_scheduler *gfx_sched[AMDGPU_MAX_GFX_RINGS]; + uint32_t num_gfx_sched; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; + struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS]; + uint32_t num_compute_sched; unsigned num_compute_rings; struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; @@ -367,4 +376,6 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a12f33c0f5df..5884ab590486 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -223,7 +223,7 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) u64 size_af, size_bf; if (amdgpu_sriov_vf(adev)) { - mc->agp_start = 0xffffffff; + mc->agp_start = 0xffffffffffff; mc->agp_end = 0x0; mc->agp_size = 0; @@ -333,3 +333,43 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) amdgpu_mmhub_ras_fini(adev); amdgpu_xgmi_ras_fini(adev); } + + /* + * The latest engine allocation on gfx9/10 is: + * Engine 2, 3: firmware + * Engine 0, 1, 4~16: amdgpu ring, + * subject to change when ring number changes + * Engine 17: Gart flushes + */ +#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 +#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 + +int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, + GFXHUB_FREE_VM_INV_ENGS_BITMAP}; + unsigned i; + unsigned vmhub, inv_eng; + + for (i = 0; i < adev->num_rings; ++i) { + ring = adev->rings[i]; + vmhub = ring->funcs->vmhub; + + inv_eng = ffs(vm_inv_engs[vmhub]); + if (!inv_eng) { + dev_err(adev->dev, "no VM inv eng for ring %s\n", + ring->name); + return -EINVAL; + } + + ring->vm_inv_eng = inv_eng - 1; + vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); + + dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", + ring->name, ring->vm_inv_eng, ring->funcs->vmhub); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index b499a3de8bb6..d3c27a3c43f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +219,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) @@ -267,5 +273,6 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); +int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6f9289735e31..3a67f6c046d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, int r; if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false); + return amdgpu_sync_fence(sync, ring->vmid_wait, false); fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) @@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(adev, sync, &array->base, false); + r = amdgpu_sync_fence(sync, &array->base, false); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; return r; @@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(adev, sync, tmp, false); + r = amdgpu_sync_fence(sync, tmp, false); return r; } needs_flush = true; @@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence, false); if (r) return r; @@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence, false); if (r) return r; @@ -435,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(ring->adev, &id->active, - fence, false); + r = amdgpu_sync_fence(&id->active, fence, false); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 6d8f05511aba..111a301ce878 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -66,7 +66,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, if (ih->ring == NULL) return -ENOMEM; - memset((void *)ih->ring, 0, ih->ring_size + 8); ih->gpu_addr = dma_addr; ih->wptr_addr = dma_addr + ih->ring_size; ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 30d540d23b77..5ed4227f304b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -55,6 +55,7 @@ #include "amdgpu_connectors.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_ras.h" #include <linux/pm_runtime.h> @@ -162,13 +163,15 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) * register to check whether the interrupt is triggered or not, and properly * ack the interrupt if it is there */ - if (adev->nbio.funcs && - adev->nbio.funcs->handle_ras_controller_intr_no_bifring) - adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); - - if (adev->nbio.funcs && - adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) - adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) { + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_controller_intr_no_bifring) + adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); + + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); + } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4fb20e870e63..d42be880a236 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; - job->owner = owner; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); priority = job->base.s_priority; @@ -193,8 +192,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync, &explicit); if (fence && explicit) { if (drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(ring->adev, &job->sched_sync, - fence, false); + r = amdgpu_sync_fence(&job->sched_sync, fence, false); if (r) DRM_ERROR("Error adding fence (%d)\n", r); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index dc7ee9358dcd..3f7b8433d179 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -49,7 +49,6 @@ struct amdgpu_job { uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; - void *owner; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c new file mode 100644 index 000000000000..5727f00afc8e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -0,0 +1,211 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15d.h" +#include "soc15_common.h" + +#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000) + +static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); + +int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) +{ + INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); + + return 0; +} + +int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) +{ + int i; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec); + } + + return 0; +} + +int amdgpu_jpeg_suspend(struct amdgpu_device *adev) +{ + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + return 0; +} + +int amdgpu_jpeg_resume(struct amdgpu_device *adev) +{ + return 0; +} + +static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, jpeg.idle_work.work); + unsigned int fences = 0; + unsigned int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec); + } + + if (fences == 0) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + else + schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); +} + +void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (set_clocks) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_UNGATE); +} + +void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring) +{ + schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); +} + +int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; + + amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + +static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + const unsigned ib_size_dw = 16; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + + ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); + ib->ptr[1] = 0xDEADBEEF; + for (i = 2; i < 16; i += 2) { + ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err; + + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + struct dma_fence *fence = NULL; + long r = 0; + + r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence); + if (r) + goto error; + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto error; + } else if (r < 0) { + goto error; + } else { + r = 0; + } + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + dma_fence_put(fence); +error: + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h new file mode 100644 index 000000000000..bd9ef9cc86de --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -0,0 +1,64 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_JPEG_H__ +#define __AMDGPU_JPEG_H__ + +#define AMDGPU_MAX_JPEG_INSTANCES 2 + +#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) +#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) + +struct amdgpu_jpeg_reg{ + unsigned jpeg_pitch; +}; + +struct amdgpu_jpeg_inst { + struct amdgpu_ring ring_dec; + struct amdgpu_irq_src irq; + struct amdgpu_jpeg_reg external; +}; + +struct amdgpu_jpeg { + uint8_t num_jpeg_inst; + struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; + struct amdgpu_jpeg_reg internal; + struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES]; + uint32_t num_jpeg_sched; + unsigned harvest_config; + struct delayed_work idle_work; + enum amd_powergating_state cur_state; +}; + +int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); +int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev); +int amdgpu_jpeg_suspend(struct amdgpu_device *adev); +int amdgpu_jpeg_resume(struct amdgpu_device *adev); + +void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring); + +int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +#endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b6db28a570c2..60591dbc2097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (amdgpu_sriov_vf(adev)) amdgpu_virt_request_full_gpu(adev, false); - if (amdgpu_device_is_px(dev)) { + if (adev->runpm) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -150,8 +150,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) } dev->dev_private = (void *)adev; - if ((amdgpu_runtime_pm != 0) && - amdgpu_has_atpx() && + if (amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && ((flags & AMD_IS_APU) == 0) && @@ -170,6 +169,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) goto out; } + if (amdgpu_device_supports_boco(dev) && + (amdgpu_runtime_pm != 0)) /* enable runpm by default */ + adev->runpm = true; + else if (amdgpu_device_supports_baco(dev) && + (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 */ + adev->runpm = true; + /* Call ACPI methods: require modeset init * but failure is not fatal */ @@ -180,7 +186,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - if (amdgpu_device_is_px(dev)) { + if (adev->runpm) { dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP); pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); @@ -193,7 +199,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ - if (adev->rmmio && amdgpu_device_is_px(dev)) + if (adev->rmmio && adev->runpm) pm_runtime_put_noidle(dev->dev); amdgpu_driver_unload_kms(dev); } @@ -293,6 +299,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->dm.dmcu_fw_version; fw_info->feature = 0; break; + case AMDGPU_INFO_FW_DMCUB: + fw_info->ver = adev->dm.dmcub_fw_version; + fw_info->feature = 0; + break; default: return -EINVAL; } @@ -396,12 +406,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: - type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) + type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? + AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) continue; - if (adev->vcn.inst[i].ring_jpeg.sched.ready) + if (adev->jpeg.inst[i].ring_dec.sched.ready) ++num_rings; } ib_start_alignment = 16; @@ -517,9 +529,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_VCN_DEC: case AMDGPU_HW_IP_VCN_ENC: - case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; break; + case AMDGPU_HW_IP_VCN_JPEG: + type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? + AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; + break; default: return -EINVAL; } @@ -688,10 +703,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (adev->pm.dpm_enabled) { dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; - } else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && - adev->virt.ops->get_pp_clk) { - dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10; - dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10; } else { dev_info.max_engine_clock = adev->clock.default_sclk * 10; dev_info.max_memory_clock = adev->clock.default_mclk * 10; @@ -1394,6 +1405,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* DMCUB */ + query_fw.fw_type = AMDGPU_INFO_FW_DMCUB; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index f205f56e3358..b03b1eb7ba04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -37,6 +37,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> +#include <linux/pm_runtime.h> #include "hwmgr.h" #define WIDTH_4K 3840 @@ -158,6 +159,14 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) @@ -170,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -183,6 +195,10 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; if (strncmp("battery", buf, strlen("battery")) == 0) state = POWER_STATE_TYPE_BATTERY; @@ -190,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, state = POWER_STATE_TYPE_BALANCED; else if (strncmp("performance", buf, strlen("performance")) == 0) state = POWER_STATE_TYPE_PERFORMANCE; - else { - count = -EINVAL; - goto fail; - } + else + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); @@ -206,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, adev->pm.dpm.user_state = state; mutex_unlock(&adev->pm.mutex); - /* Can't set dpm state when the card is off */ - if (!(adev->flags & AMD_IS_PX) || - (ddev->switch_power_state == DRM_SWITCH_POWER_ON)) - amdgpu_pm_compute_clocks(adev); + amdgpu_pm_compute_clocks(adev); } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -282,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; + int ret; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return snprintf(buf, PAGE_SIZE, "off\n"); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); @@ -297,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : @@ -320,9 +341,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, enum amd_dpm_forced_level current_level = 0xff; int ret = 0; - /* Can't force performance level when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; if (strncmp("low", buf, strlen("low")) == 0) { @@ -344,30 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) { level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; } else { - count = -EINVAL; - goto fail; + return -EINVAL; } - /* handle sriov case here */ - if (amdgpu_sriov_vf(adev)) { - if (amdgim_is_hwperf(adev) && - adev->virt.ops->force_dpm_level) { - mutex_lock(&adev->pm.mutex); - adev->virt.ops->force_dpm_level(adev, level); - mutex_unlock(&adev->pm.mutex); - return count; - } else { - return -EINVAL; - } - } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) current_level = amdgpu_dpm_get_performance_level(adev); - if (current_level == level) + if (current_level == level) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; + } /* profile_exit setting is valid only when current mode is in profile mode */ if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | @@ -376,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { pr_err("Currently not in any profile mode!\n"); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } if (is_support_sw_smu(adev)) { ret = smu_force_performance_level(&adev->smu, level); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { - count = -EINVAL; mutex_unlock(&adev->pm.mutex); - goto fail; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); - if (ret) - count = -EINVAL; - else + if (ret) { + mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } else { adev->pm.dpm.forced_level = level; + } mutex_unlock(&adev->pm.mutex); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); -fail: return count; } @@ -411,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_get_power_num_states(&adev->smu, &data); if (ret) @@ -418,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); for (i = 0; i < data.nums; i++) buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, @@ -440,6 +470,13 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, enum amd_pm_state_type pm = 0; int i = 0, ret = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pm = smu_get_current_power_state(smu); ret = smu_get_power_num_states(smu, &data); @@ -451,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, amdgpu_dpm_get_pp_num_states(adev, &data); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + for (i = 0; i < data.nums; i++) { if (pm == data.states[i]) break; @@ -469,6 +509,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + if (adev->pp_force_state_enabled) return amdgpu_get_pp_cur_state(dev, attr, buf); else @@ -486,6 +529,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, unsigned long idx; int ret; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + if (strlen(buf) == 1) adev->pp_force_state_enabled = false; else if (is_support_sw_smu(adev)) @@ -495,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct pp_states_info data; ret = kstrtoul(buf, 0, &idx); - if (ret || idx >= ARRAY_SIZE(data.states)) { - count = -EINVAL; - goto fail; - } + if (ret || idx >= ARRAY_SIZE(data.states)) + return -EINVAL; + idx = array_index_nospec(idx, ARRAY_SIZE(data.states)); amdgpu_dpm_get_pp_num_states(adev, &data); state = data.states[idx]; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -510,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); } -fail: + return count; } @@ -533,17 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size; + int size, ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (size < 0) return size; - } - else if (adev->powerplay.pp_funcs->get_pp_table) + } else if (adev->powerplay.pp_funcs->get_pp_table) { size = amdgpu_dpm_get_pp_table(adev, &table); - else + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (size < 0) + return size; + } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return 0; + } if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; @@ -562,13 +629,26 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return ret; + } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -654,6 +734,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, const char delimiter[3] = {' ', '\n', '\0'}; uint32_t type; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + if (count > 127) return -EINVAL; @@ -689,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } if (type == PP_OD_COMMIT_DPM_TABLE) { @@ -708,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -724,24 +823,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - uint32_t size = 0; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); - return size; } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); - return size; } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return size; } /** @@ -770,21 +878,36 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev, uint64_t featuremask; int ret; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + ret = kstrtou64(buf, 0, &featuremask); if (ret) return -EINVAL; pr_debug("featuremask = 0x%llx\n", featuremask); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -795,13 +918,27 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; - if (is_support_sw_smu(adev)) { - return smu_sys_get_pp_feature_mask(&adev->smu, buf); - } else if (adev->powerplay.pp_funcs->get_ppfeature_status) - return amdgpu_dpm_get_ppfeature_status(adev, buf); + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + + if (is_support_sw_smu(adev)) + size = smu_sys_get_pp_feature_mask(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_ppfeature_status) + size = amdgpu_dpm_get_ppfeature_status(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); - return snprintf(buf, PAGE_SIZE, "\n"); + return size; } /** @@ -840,17 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && - adev->virt.ops->get_pp_clk) - return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /* @@ -899,18 +1046,25 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_sriov_vf(adev)) - return 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -923,17 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; - if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && - adev->virt.ops->get_pp_clk) - return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, @@ -943,21 +1107,28 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - int ret; uint32_t mask = 0; + int ret; - if (amdgpu_sriov_vf(adev)) - return 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -970,13 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, @@ -989,14 +1174,26 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1010,13 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, @@ -1029,14 +1240,26 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1050,13 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, @@ -1069,14 +1306,26 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1090,13 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); + size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, @@ -1109,14 +1372,26 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1131,12 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1150,12 +1436,17 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, int ret; long int value; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); @@ -1171,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1182,12 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1201,12 +1505,17 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, int ret; long int value; + if (amdgpu_sriov_vf(adev)) + return 0; + ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); @@ -1222,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1252,13 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) - return smu_get_power_profile_mode(&adev->smu, buf); + size = smu_get_power_profile_mode(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_power_profile_mode) - return amdgpu_dpm_get_power_profile_mode(adev, buf); + size = amdgpu_dpm_get_power_profile_mode(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); - return snprintf(buf, PAGE_SIZE, "\n"); + return size; } @@ -1283,7 +1608,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, tmp[1] = '\0'; ret = kstrtol(tmp, 0, &profile_mode); if (ret) - goto fail; + return -EINVAL; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return -EINVAL; if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { if (count < 2 || count > 127) @@ -1295,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, while (tmp_str[0]) { sub_str = strsep(&tmp_str, delimiter); ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; parameter_size++; while (isspace(*tmp_str)) tmp_str++; } } parameter[parameter_size] = profile_mode; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (!ret) return count; -fail: + return -EINVAL; } @@ -1331,10 +1666,20 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1357,10 +1702,20 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1386,8 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint64_t count0, count1; + int ret; + + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", count0, count1, pcie_get_mps(adev->pdev)); } @@ -1409,6 +1776,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + if (adev->unique_id) return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); @@ -1472,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - /* Can't get temperature when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (channel >= PP_TEMP_MAX) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_EDGE: /* get current edge temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_MEM: /* get current memory temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, (void *)&temp, &size); - if (r) - return r; + break; + default: + r = -EINVAL; break; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (r) + return r; + return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1603,15 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode); } @@ -1621,27 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, size_t count) { struct amdgpu_device *adev = dev_get_drvdata(dev); - int err; + int err, ret; int value; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - err = kstrtoint(buf, 10, &value); if (err) return err; + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, value); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1668,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } value = (value * 100) / 255; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_percent(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->set_fan_speed_percent) err = amdgpu_dpm_set_fan_speed_percent(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1708,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->get_fan_speed_percent) err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; speed = (speed * 255) / 100; @@ -1736,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", speed); } @@ -1763,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1780,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1796,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", rpm); } @@ -1823,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -ENODATA; - - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_rpm(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) err = amdgpu_dpm_set_fan_speed_rpm(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1859,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1881,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - - err = kstrtoint(buf, 10, &value); if (err) return err; @@ -1898,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1914,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddgfx; int r, size = sizeof(vddgfx); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1944,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddnb; int r, size = sizeof(vddnb); @@ -1952,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, if (!(adev->flags & AMD_IS_APU)) return -EINVAL; - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1978,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 query = 0; int r, size = sizeof(u32); unsigned uw; - /* Can't get power when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2013,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, true, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, @@ -2031,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, false, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } @@ -2053,19 +2524,29 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + err = kstrtou32(buf, 10, &value); if (err) return err; value = value / 1000000; /* convert to Watt */ - if (is_support_sw_smu(adev)) { + + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + + if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); - } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - } else { + else err = -EINVAL; - } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); if (err) return err; @@ -2078,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t sclk; int r, size = sizeof(sclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2108,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t mclk; int r, size = sizeof(mclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2299,6 +2784,23 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; + /* under multi-vf mode, the hwmon attributes are all not supported */ + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + + /* there is no fan under pp one vf mode */ + if (amdgpu_sriov_is_pp_one_vf(adev) && + (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_target.dev_attr.attr || + attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) + return 0; + /* Skip fan attributes if fan is not present */ if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || @@ -2666,17 +3168,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; - if (is_support_sw_smu(adev)) { - ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable); - if (ret) - DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n", - enable ? "true" : "false", ret); - } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { - /* enable/disable UVD */ - mutex_lock(&adev->pm.mutex); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); - mutex_unlock(&adev->pm.mutex); - } + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + /* enable/disable Low Memory PState for UVD (4k videos) */ if (adev->asic_type == CHIP_STONEY && adev->uvd.decode_image_width >= WIDTH_4K) { @@ -2693,17 +3190,11 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; - if (is_support_sw_smu(adev)) { - ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable); - if (ret) - DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n", - enable ? "true" : "false", ret); - } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { - /* enable/disable VCE */ - mutex_lock(&adev->pm.mutex); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); - mutex_unlock(&adev->pm.mutex); - } + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + if (ret) + DRM_ERROR("Dpm %s vce failed, ret = %d. \n", + enable ? "enable" : "disable", ret); } void amdgpu_pm_print_power_states(struct amdgpu_device *adev) @@ -2718,42 +3209,14 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) } -int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev) +void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable) { int ret = 0; - if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) - return ret; - - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_sclk\n"); - return ret; - } - - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_mclk\n"); - return ret; - } - - ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level); - if (ret) { - DRM_ERROR("failed to create device file for dpm state\n"); - return ret; - } - - return ret; -} - -void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev) -{ - if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) - return; - - device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); - device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); - device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable); + if (ret) + DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n", + enable ? "enable" : "disable", ret); } int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) @@ -3163,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_device *ddev = adev->ddev; u32 flags = 0; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); @@ -3173,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if (!adev->pm.dpm_enabled) { seq_printf(m, "dpm not enabled\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return 0; } - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { - seq_printf(m, "PX asic powered off\n"); - } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + + if (!is_support_sw_smu(adev) && + adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); else seq_printf(m, "Debugfs support not implemented for this asic\n"); mutex_unlock(&adev->pm.mutex); + r = 0; } else { - return amdgpu_debugfs_pm_info_pp(m, adev); + r = amdgpu_debugfs_pm_info_pp(m, adev); } - return 0; + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return r; } static const struct drm_info_list amdgpu_pm_info_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index ef31448ee8d8..3da1da277805 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -41,5 +41,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_thermal_work_handler(struct work_struct *work); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable); +void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 0e6dba9f60f0..07914e34bc25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: if (!(flags & PERF_EF_RELOAD)) - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0); break; default: break; @@ -101,13 +101,13 @@ static void amdgpu_perf_read(struct perf_event *event) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf, &count); break; default: count = 0; break; - }; + } } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev); local64_add(count - prev, &event->count); @@ -126,11 +126,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0); break; default: break; - }; + } WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; @@ -156,11 +156,11 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); break; default: return 0; - }; + } if (retval) return retval; @@ -184,11 +184,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1); break; default: break; - }; + } perf_event_update_userpage(event); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e1b8d8daeafc..3a1570dafe34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -158,7 +158,7 @@ psp_cmd_submit_buf(struct psp_context *psp, memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); index = atomic_inc_return(&psp->fence_value); - ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); + ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); if (ret) { atomic_dec(&psp->fence_value); mutex_unlock(&psp->mutex); @@ -191,9 +191,9 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_DEBUG_DRIVER("psp command (0x%X) failed and response status is (0x%X)\n", + DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n", psp->cmd_buf_mem->cmd_id, - psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); + psp->cmd_buf_mem->resp.status); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; @@ -318,35 +318,17 @@ static int psp_tmr_load(struct psp_context *psp) return ret; } -static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t asd_mc, uint64_t asd_mc_shared, - uint32_t size, uint32_t shared_size) +static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t asd_mc, uint32_t size) { cmd->cmd_id = GFX_CMD_ID_LOAD_ASD; cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc); cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc); cmd->cmd.cmd_load_ta.app_len = size; - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - -static int psp_asd_init(struct psp_context *psp) -{ - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for shared ASD <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->asd_shared_bo, - &psp->asd_shared_mc_addr, - &psp->asd_shared_buf); - - return ret; + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = 0; + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = 0; + cmd->cmd.cmd_load_ta.cmd_buf_len = 0; } static int psp_asd_load(struct psp_context *psp) @@ -368,11 +350,49 @@ static int psp_asd_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size); - psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr, - psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); + psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->asd_ucode_size); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + if (!ret) { + psp->asd_context.asd_initialized = true; + psp->asd_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = session_id; +} + +static int psp_asd_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->asd_context.asd_initialized) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (!ret) + psp->asd_context.asd_initialized = false; kfree(cmd); @@ -407,18 +427,20 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, return ret; } -static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, - uint32_t xgmi_ta_size, uint32_t shared_size) +static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t ta_bin_mc, + uint32_t ta_bin_size, + uint64_t ta_shared_mc, + uint32_t ta_shared_size) { - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc); - cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size; + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); + cmd->cmd.cmd_load_ta.app_len = ta_bin_size; - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc); + cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size; } static int psp_xgmi_init_shared_buf(struct psp_context *psp) @@ -438,6 +460,36 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp) return ret; } +static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; +} + +int psp_ta_invoke(struct psp_context *psp, + uint32_t ta_cmd_id, + uint32_t session_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + static int psp_xgmi_load(struct psp_context *psp) { int ret; @@ -446,8 +498,6 @@ static int psp_xgmi_load(struct psp_context *psp) /* * TODO: bypass the loading in sriov for now */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) @@ -456,9 +506,11 @@ static int psp_xgmi_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); - psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->xgmi_context.xgmi_shared_mc_addr, - psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE); + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_xgmi_ucode_size, + psp->xgmi_context.xgmi_shared_mc_addr, + PSP_XGMI_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -473,29 +525,25 @@ static int psp_xgmi_load(struct psp_context *psp) return ret; } -static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t xgmi_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; - cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id; -} - static int psp_xgmi_unload(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + struct amdgpu_device *adev = psp->adev; + + /* XGMI TA unload currently is not supported on Arcturus */ + if (adev->asic_type == CHIP_ARCTURUS) + return 0; /* * TODO: bypass the unloading in sriov for now */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; - psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -505,40 +553,9 @@ static int psp_xgmi_unload(struct psp_context *psp) return ret; } -static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t xgmi_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the loading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->xgmi_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; + return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id); } static int psp_xgmi_terminate(struct psp_context *psp) @@ -594,20 +611,6 @@ static int psp_xgmi_initialize(struct psp_context *psp) } // ras begin -static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t ras_ta_mc, uint64_t ras_mc_shared, - uint32_t ras_ta_size, uint32_t shared_size) -{ - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc); - cmd->cmd.cmd_load_ta.app_len = ras_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - static int psp_ras_init_shared_buf(struct psp_context *psp) { int ret; @@ -643,15 +646,17 @@ static int psp_ras_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); - psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ras.ras_shared_mc_addr, - psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE); + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_ras_ucode_size, + psp->ras.ras_shared_mc_addr, + PSP_RAS_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->ras.ras_initialized = 1; + psp->ras.ras_initialized = true; psp->ras.session_id = cmd->resp.session_id; } @@ -660,13 +665,6 @@ static int psp_ras_load(struct psp_context *psp) return ret; } -static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ras_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; - cmd->cmd.cmd_unload_ta.session_id = ras_session_id; -} - static int psp_ras_unload(struct psp_context *psp) { int ret; @@ -682,7 +680,7 @@ static int psp_ras_unload(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -692,40 +690,15 @@ static int psp_ras_unload(struct psp_context *psp) return ret; } -static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t ras_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - int ret; - struct psp_gfx_cmd_resp *cmd; - /* * TODO: bypass the loading in sriov for now */ if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->ras.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; + return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); } int psp_ras_enable_features(struct psp_context *psp, @@ -771,7 +744,7 @@ static int psp_ras_terminate(struct psp_context *psp) if (ret) return ret; - psp->ras.ras_initialized = 0; + psp->ras.ras_initialized = false; /* free ras shared memory */ amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, @@ -812,24 +785,6 @@ static int psp_ras_initialize(struct psp_context *psp) // ras end // HDCP start -static void psp_prep_hdcp_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t hdcp_ta_mc, - uint64_t hdcp_mc_shared, - uint32_t hdcp_ta_size, - uint32_t shared_size) -{ - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(hdcp_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(hdcp_ta_mc); - cmd->cmd.cmd_load_ta.app_len = hdcp_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = - lower_32_bits(hdcp_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = - upper_32_bits(hdcp_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - static int psp_hdcp_init_shared_buf(struct psp_context *psp) { int ret; @@ -866,15 +821,16 @@ static int psp_hdcp_load(struct psp_context *psp) memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr, psp->ta_hdcp_ucode_size); - psp_prep_hdcp_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->hdcp_context.hdcp_shared_mc_addr, - psp->ta_hdcp_ucode_size, - PSP_HDCP_SHARED_MEM_SIZE); + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_hdcp_ucode_size, + psp->hdcp_context.hdcp_shared_mc_addr, + PSP_HDCP_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->hdcp_context.hdcp_initialized = 1; + psp->hdcp_context.hdcp_initialized = true; psp->hdcp_context.session_id = cmd->resp.session_id; } @@ -910,12 +866,6 @@ static int psp_hdcp_initialize(struct psp_context *psp) return 0; } -static void psp_prep_hdcp_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t hdcp_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; - cmd->cmd.cmd_unload_ta.session_id = hdcp_session_id; -} static int psp_hdcp_unload(struct psp_context *psp) { @@ -932,7 +882,7 @@ static int psp_hdcp_unload(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_hdcp_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -941,39 +891,15 @@ static int psp_hdcp_unload(struct psp_context *psp) return ret; } -static void psp_prep_hdcp_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t hdcp_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = hdcp_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - int ret; - struct psp_gfx_cmd_resp *cmd; - /* * TODO: bypass the loading in sriov for now */ if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_hdcp_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->hdcp_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; + return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id); } static int psp_hdcp_terminate(struct psp_context *psp) @@ -993,7 +919,7 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (ret) return ret; - psp->hdcp_context.hdcp_initialized = 0; + psp->hdcp_context.hdcp_initialized = false; /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, @@ -1005,22 +931,6 @@ static int psp_hdcp_terminate(struct psp_context *psp) // HDCP end // DTM start -static void psp_prep_dtm_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t dtm_ta_mc, - uint64_t dtm_mc_shared, - uint32_t dtm_ta_size, - uint32_t shared_size) -{ - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(dtm_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(dtm_ta_mc); - cmd->cmd.cmd_load_ta.app_len = dtm_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(dtm_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(dtm_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - static int psp_dtm_init_shared_buf(struct psp_context *psp) { int ret; @@ -1056,15 +966,16 @@ static int psp_dtm_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); - psp_prep_dtm_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->dtm_context.dtm_shared_mc_addr, - psp->ta_dtm_ucode_size, - PSP_DTM_SHARED_MEM_SIZE); + psp_prep_ta_load_cmd_buf(cmd, + psp->fw_pri_mc_addr, + psp->ta_dtm_ucode_size, + psp->dtm_context.dtm_shared_mc_addr, + PSP_DTM_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->dtm_context.dtm_initialized = 1; + psp->dtm_context.dtm_initialized = true; psp->dtm_context.session_id = cmd->resp.session_id; } @@ -1102,39 +1013,15 @@ static int psp_dtm_initialize(struct psp_context *psp) return 0; } -static void psp_prep_dtm_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t dtm_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = dtm_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - int ret; - struct psp_gfx_cmd_resp *cmd; - /* * TODO: bypass the loading in sriov for now */ if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_dtm_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->dtm_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; + return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id); } static int psp_dtm_terminate(struct psp_context *psp) @@ -1154,7 +1041,7 @@ static int psp_dtm_terminate(struct psp_context *psp) if (ret) return ret; - psp->dtm_context.dtm_initialized = 0; + psp->dtm_context.dtm_initialized = false; /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, @@ -1211,45 +1098,6 @@ static int psp_hw_start(struct psp_context *psp) return ret; } - ret = psp_asd_init(psp); - if (ret) { - DRM_ERROR("PSP asd init failed!\n"); - return ret; - } - - ret = psp_asd_load(psp); - if (ret) { - DRM_ERROR("PSP load asd failed!\n"); - return ret; - } - - if (adev->gmc.xgmi.num_physical_nodes > 1) { - ret = psp_xgmi_initialize(psp); - /* Warning the XGMI seesion initialize failure - * Instead of stop driver initialization - */ - if (ret) - dev_err(psp->adev->dev, - "XGMI: Failed to initialize XGMI session\n"); - } - - if (psp->adev->psp.ta_fw) { - ret = psp_ras_initialize(psp); - if (ret) - dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); - - ret = psp_hdcp_initialize(psp); - if (ret) - dev_err(psp->adev->dev, - "HDCP: Failed to initialize HDCP\n"); - - ret = psp_dtm_initialize(psp); - if (ret) - dev_err(psp->adev->dev, - "DTM: Failed to initialize DTM\n"); - } - return 0; } @@ -1329,6 +1177,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_VCN: *type = GFX_FW_TYPE_VCN; break; + case AMDGPU_UCODE_ID_VCN1: + *type = GFX_FW_TYPE_VCN1; + break; case AMDGPU_UCODE_ID_DMCU_ERAM: *type = GFX_FW_TYPE_DMCU_ERAM; break; @@ -1341,6 +1192,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_VCN1_RAM: *type = GFX_FW_TYPE_VCN1_RAM; break; + case AMDGPU_UCODE_ID_DMCUB: + *type = GFX_FW_TYPE_DMUB; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -1470,7 +1324,8 @@ out: || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_SMC)) /*skip ucode loading in SRIOV VF */ continue; @@ -1519,16 +1374,13 @@ static int psp_load_fw(struct amdgpu_device *adev) if (!psp->cmd) return -ENOMEM; - /* this fw pri bo is not used under SRIOV */ - if (!amdgpu_sriov_vf(psp->adev)) { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - if (ret) - goto failed; - } + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + goto failed; ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, @@ -1562,6 +1414,39 @@ skip_memalloc: if (ret) goto failed; + ret = psp_asd_load(psp); + if (ret) { + DRM_ERROR("PSP load asd failed!\n"); + return ret; + } + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } + + if (psp->adev->psp.ta_fw) { + ret = psp_ras_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "RAS: Failed to initialize RAS\n"); + + ret = psp_hdcp_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "HDCP: Failed to initialize HDCP\n"); + + ret = psp_dtm_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "DTM: Failed to initialize DTM\n"); + } + return 0; failed: @@ -1619,6 +1504,8 @@ static int psp_hw_fini(void *handle) psp_hdcp_terminate(psp); } + psp_asd_unload(psp); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; @@ -1627,8 +1514,6 @@ static int psp_hw_fini(void *handle) &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); - amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr, - &psp->asd_shared_buf); amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); @@ -1704,6 +1589,39 @@ static int psp_resume(void *handle) if (ret) goto failed; + ret = psp_asd_load(psp); + if (ret) { + DRM_ERROR("PSP load asd failed!\n"); + goto failed; + } + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } + + if (psp->adev->psp.ta_fw) { + ret = psp_ras_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "RAS: Failed to initialize RAS\n"); + + ret = psp_hdcp_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "HDCP: Failed to initialize HDCP\n"); + + ret = psp_dtm_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "DTM: Failed to initialize DTM\n"); + } + mutex_unlock(&adev->firmware.mutex); return 0; @@ -1758,6 +1676,56 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, return psp_execute_np_fw_load(&adev->psp, &ucode); } +int psp_ring_cmd_submit(struct psp_context *psp, + uint64_t cmd_buf_mc_addr, + uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame *write_frame; + struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + psp_write_ptr_reg = psp_ring_get_wptr(psp); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring_buffer_start; + else + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); + write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + psp_ring_set_wptr(psp, psp_write_ptr_reg); + return 0; +} + static bool psp_check_fw_loading_status(struct amdgpu_device *adev, enum AMDGPU_UCODE_ID ucode_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 09c5474ebcc3..611021514c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -32,7 +32,6 @@ #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 -#define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_XGMI_SHARED_MEM_SIZE 0x4000 #define PSP_RAS_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 @@ -94,9 +93,6 @@ struct psp_funcs enum psp_ring_type ring_type); int (*ring_destroy)(struct psp_context *psp, enum psp_ring_type ring_type); - int (*cmd_submit)(struct psp_context *psp, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index); bool (*compare_sram_data)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, enum AMDGPU_UCODE_ID ucode_type); @@ -116,6 +112,8 @@ struct psp_funcs int (*mem_training_init)(struct psp_context *psp); void (*mem_training_fini)(struct psp_context *psp); int (*mem_training)(struct psp_context *psp, uint32_t ops); + uint32_t (*ring_get_wptr)(struct psp_context *psp); + void (*ring_set_wptr)(struct psp_context *psp, uint32_t value); }; #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 @@ -131,6 +129,11 @@ struct psp_xgmi_topology_info { struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; }; +struct psp_asd_context { + bool asd_initialized; + uint32_t session_id; +}; + struct psp_xgmi_context { uint8_t initialized; uint32_t session_id; @@ -169,6 +172,8 @@ struct psp_dtm_context { #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 +/*Define the VRAM size that will be encroached by BIST training.*/ +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, @@ -199,7 +204,6 @@ struct psp_memory_training_context { /*vram offset of the p2c training data*/ u64 p2c_train_data_offset; - struct amdgpu_bo *p2c_bo; /*vram offset of the c2p training data*/ u64 c2p_train_data_offset; @@ -239,15 +243,12 @@ struct psp_context struct amdgpu_bo *tmr_bo; uint64_t tmr_mc_addr; - /* asd firmware and buffer */ + /* asd firmware */ const struct firmware *asd_fw; uint32_t asd_fw_version; uint32_t asd_feature_version; uint32_t asd_ucode_size; uint8_t *asd_start_addr; - struct amdgpu_bo *asd_shared_bo; - uint64_t asd_shared_mc_addr; - void *asd_shared_buf; /* fence buffer */ struct amdgpu_bo *fence_buf_bo; @@ -282,6 +283,7 @@ struct psp_context uint32_t ta_dtm_ucode_size; uint8_t *ta_dtm_start_addr; + struct psp_asd_context asd_context; struct psp_xgmi_context xgmi_context; struct psp_ras_context ras; struct psp_hdcp_context hdcp_context; @@ -300,8 +302,6 @@ struct amdgpu_psp_funcs { #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) -#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \ - (psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ @@ -346,6 +346,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->ras_cure_posion ? \ (psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL) +#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp)) +#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value)) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -372,4 +375,8 @@ int psp_rlc_autoload_start(struct psp_context *psp); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, uint32_t value); +int psp_ring_cmd_submit(struct psp_context *psp, + uint64_t cmd_buf_mc_addr, + uint64_t fence_mc_addr, + int index); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 404483437bd3..cef94e2169fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -198,9 +198,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, - struct ras_common_if *head); - /** * DOC: AMDGPU RAS debugfs control interface * @@ -318,7 +315,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * default: ret = -EINVAL; break; - }; + } if (ret) return -EINVAL; @@ -445,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, } /* return an obj equal to head, or the first when head is NULL */ -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -689,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; + int i; if (!obj) return -EINVAL; @@ -703,6 +701,13 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, if (adev->umc.funcs->query_ras_error_address) adev->umc.funcs->query_ras_error_address(adev, &err_data); break; + case AMDGPU_RAS_BLOCK__SDMA: + if (adev->sdma.funcs->query_ras_error_count) { + for (i = 0; i < adev->sdma.num_instances; i++) + adev->sdma.funcs->query_ras_error_count(adev, i, + &err_data); + } + break; case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->query_ras_error_count) adev->gfx.funcs->query_ras_error_count(adev, &err_data); @@ -737,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, return 0; } +uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) +{ + uint32_t df_inst_id; + + if ((!adev->df.funcs) || + (!adev->df.funcs->get_df_inst_id) || + (!adev->df.funcs->get_dram_base_addr)) + return addr; + + df_inst_id = adev->df.funcs->get_df_inst_id(adev); + + return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id); +} + /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info) @@ -754,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; + /* Calculate XGMI relative offset */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + block_info.address = get_xgmi_relative_phy_addr(adev, + block_info.address); + } + switch (info->head.block) { case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->ras_error_inject) @@ -1314,6 +1339,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, data = con->eh_data; if (!data || data->count == 0) { *bps = NULL; + ret = -EINVAL; goto out; } @@ -1347,7 +1373,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_ras *ras = container_of(work, struct amdgpu_ras, recovery_work); - amdgpu_device_gpu_recover(ras->adev, 0); + if (amdgpu_device_should_recover_gpu(ras->adev)) + amdgpu_device_gpu_recover(ras->adev, 0); atomic_set(&ras->in_recovery, 0); } @@ -1687,7 +1714,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, *supported = 0; if (amdgpu_sriov_vf(adev) || - adev->asic_type != CHIP_VEGA20) + (adev->asic_type != CHIP_VEGA20 && + adev->asic_type != CHIP_ARCTURUS)) return; if (adev->is_atom_fw && @@ -1872,7 +1900,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev) * See feature_enable_on_boot */ amdgpu_ras_disable_all_features(adev, 1); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } } @@ -1935,6 +1963,6 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); - amdgpu_ras_reset_gpu(adev, false); + amdgpu_ras_reset_gpu(adev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f80fd3428c98..a5fe29a9373e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -494,8 +494,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, - bool is_baco) +static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -611,6 +610,9 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head); + extern atomic_t amdgpu_ras_in_intr; static inline bool amdgpu_ras_intr_triggered(void) @@ -618,6 +620,11 @@ static inline bool amdgpu_ras_intr_triggered(void) return !!atomic_read(&amdgpu_ras_in_intr); } +static inline void amdgpu_ras_intr_cleared(void) +{ + atomic_set(&amdgpu_ras_in_intr, 0); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6010999d9020..a2ee30b16212 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -160,7 +160,7 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); return AMDGPU_RAS_SUCCESS; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 761ff8be6314..485335267d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,8 +50,18 @@ struct amdgpu_sdma_instance { bool burst_nop; }; +struct amdgpu_sdma_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev, + void *ras_ih_info); + void (*ras_fini)(struct amdgpu_device *adev); + int (*query_ras_error_count)(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status); +}; + struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; + struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES]; + uint32_t num_sdma_sched; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; struct amdgpu_irq_src ecc_irq; @@ -59,6 +69,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; + const struct amdgpu_sdma_ras_funcs *funcs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 95e5e93edd18..a09b6b9c27d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, + bool explicit) { struct amdgpu_sync_entry *e; @@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, * amdgpu_sync_fence - remember to sync to this fence * * @sync: sync object to add fence to - * @fence: fence to sync to + * @f: fence to sync to + * @explicit: if this is an explicit dependency * + * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f, bool explicit) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + bool explicit) { struct amdgpu_sync_entry *e; if (!f) return 0; - if (amdgpu_sync_same_dev(adev, f) && - amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) - amdgpu_sync_keep_later(&sync->last_vm_update, f); if (amdgpu_sync_add_later(sync, f, explicit)) return 0; @@ -180,6 +180,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, } /** + * amdgpu_sync_vm_fence - remember to sync to this VM fence + * + * @adev: amdgpu device + * @sync: sync object to add fence to + * @fence: the VM fence to add + * + * Add the fence to the sync object and remember it as VM update. + */ +int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) +{ + if (!fence) + return 0; + + amdgpu_sync_keep_later(&sync->last_vm_update, fence); + return amdgpu_sync_fence(sync, fence, false); +} + +/** * amdgpu_sync_resv - sync to a reservation object * * @sync: sync object to add fences from reservation object to @@ -204,7 +222,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, /* always sync to the exclusive fence */ f = dma_resv_get_excl(resv); - r = amdgpu_sync_fence(adev, sync, f, false); + r = amdgpu_sync_fence(sync, f, false); flist = dma_resv_get_list(resv); if (!flist || r) @@ -222,13 +240,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, continue; if (amdgpu_sync_same_dev(adev, f)) { - /* VM updates are only interesting - * for other VM updates and moves. + /* VM updates only sync with moves but not with user + * command submissions or KFD evictions fences */ - if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && - (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && - ((owner == AMDGPU_FENCE_OWNER_VM) != - (fence_owner == AMDGPU_FENCE_OWNER_VM))) + if (owner == AMDGPU_FENCE_OWNER_VM && + fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) continue; /* Ignore fence from the same owner and explicit one as @@ -239,7 +255,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, continue; } - r = amdgpu_sync_fence(adev, sync, f, false); + r = amdgpu_sync_fence(sync, f, false); if (r) break; } @@ -340,7 +356,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(NULL, clone, f, e->explicit); + r = amdgpu_sync_fence(clone, f, e->explicit); if (r) return r; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index b5f1778a2319..d62c2b81d92b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -40,8 +40,9 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f, bool explicit); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + bool explicit); +int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, @@ -49,7 +50,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, + bool *explicit); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2616e2eafdeb..dee446278417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -41,6 +41,7 @@ #include <linux/swap.h> #include <linux/swiotlb.h> #include <linux/dma-buf.h> +#include <linux/sizes.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> @@ -1522,11 +1523,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, struct dma_fence *f; int i; - /* Don't evict VM page tables while they are busy, otherwise we can't - * cleanly handle page faults. - */ if (bo->type == ttm_bo_type_kernel && - !dma_resv_test_signaled_rcu(bo->base.resv, true)) + !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo))) return false; /* If bo is a KFD BO, check if the bo belongs to the current process. @@ -1717,12 +1715,17 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); ctx->c2p_bo = NULL; - amdgpu_bo_free_kernel(&ctx->p2c_bo, NULL, NULL); - ctx->p2c_bo = NULL; - return 0; } +static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size) +{ + if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) ) + vram_size -= SZ_1M; + + return ALIGN(vram_size, SZ_1M); +} + /** * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training * @@ -1741,7 +1744,7 @@ static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) return 0; } - ctx->c2p_train_data_offset = adev->fw_vram_usage.mem_train_fb_loc; + ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size); ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; @@ -1751,17 +1754,6 @@ static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) ctx->c2p_train_data_offset); ret = amdgpu_bo_create_kernel_at(adev, - ctx->p2c_train_data_offset, - ctx->train_data_size, - AMDGPU_GEM_DOMAIN_VRAM, - &ctx->p2c_bo, - NULL); - if (ret) { - DRM_ERROR("alloc p2c_bo failed(%d)!\n", ret); - goto Err_out; - } - - ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, AMDGPU_GEM_DOMAIN_VRAM, @@ -1769,15 +1761,12 @@ static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) NULL); if (ret) { DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); - goto Err_out; + amdgpu_ttm_training_reserve_vram_fini(adev); + return ret; } ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; return 0; - -Err_out: - amdgpu_ttm_training_reserve_vram_fini(adev); - return ret; } /** @@ -1990,11 +1979,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) if (enable) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL); + sched = &ring->sched; + r = drm_sched_entity_init(&adev->mman.entity, + DRM_SCHED_PRIORITY_KERNEL, &sched, + 1, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 833fc4b68940..9ef312428231 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -447,6 +447,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL; + const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL; if (NULL == ucode->fw) return 0; @@ -460,6 +461,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, header = (const struct common_firmware_header *)ucode->fw->data; cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data; + dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && @@ -470,7 +472,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM && ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM && - ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) { + ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV && + ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -506,6 +509,12 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(dmcu_hdr->intv_offset_bytes)), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCUB) { + ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes); + memcpy(ucode->kaddr, + (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 914acecda5cf..b0e656409c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -251,6 +251,13 @@ struct dmcu_firmware_header_v1_0 { uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */ }; +/* version_major=1, version_minor=0 */ +struct dmcub_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t inst_const_bytes; /* size of instruction region, in bytes */ + uint32_t bss_data_bytes; /* size of bss/data region, in bytes */ +}; + /* header is fixed size */ union amdgpu_firmware_header { struct common_firmware_header common; @@ -268,6 +275,7 @@ union amdgpu_firmware_header { struct sdma_firmware_header_v1_1 sdma_v1_1; struct gpu_info_firmware_header_v1_0 gpu_info; struct dmcu_firmware_header_v1_0 dmcu; + struct dmcub_firmware_header_v1_0 dmcub; uint8_t raw[0x100]; }; @@ -307,6 +315,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_DMCU_INTV, AMDGPU_UCODE_ID_VCN0_RAM, AMDGPU_UCODE_ID_VCN1_RAM, + AMDGPU_UCODE_ID_DMCUB, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index d4fb9cf27e21..f4d40855147b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -95,13 +95,6 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - /* When “Full RAS” is enabled, the per-IP interrupt sources should - * be disabled and the driver should only look for the aggregated - * interrupt via sync flood - */ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return AMDGPU_RAS_SUCCESS; - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); if (adev->umc.funcs && adev->umc.funcs->query_ras_error_count) @@ -113,6 +106,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, err_data->err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, sizeof(struct eeprom_table_record), GFP_KERNEL); + /* still call query_ras_error_address to clear error status * even NOMEM error is encountered */ @@ -132,7 +126,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, err_data->err_addr_cnt)) DRM_WARN("Failed to add ras bad page!\n"); - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev); } kfree(err_data->err_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 3283032a78e5..a615a1eb750b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,38 +21,6 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ -/* implement 64 bits REG operations via 32 bits interface */ -#define RREG64_UMC(reg) (RREG32(reg) | \ - ((uint64_t)RREG32((reg) + 1) << 32)) -#define WREG64_UMC(reg, v) \ - do { \ - WREG32((reg), lower_32_bits(v)); \ - WREG32((reg) + 1, upper_32_bits(v)); \ - } while (0) - -/* - * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, - * uint32_t umc_reg_offset, uint32_t channel_index) - */ -#define amdgpu_umc_for_each_channel(func) \ - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \ - uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \ - for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \ - /* enable the index mode to query eror count per channel */ \ - adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \ - for (channel_inst = 0; \ - channel_inst < adev->umc.channel_inst_num; \ - channel_inst++) { \ - /* calc the register offset according to channel instance */ \ - umc_reg_offset = adev->umc.channel_offs * channel_inst; \ - /* get channel index of interleaved memory */ \ - channel_index = adev->umc.channel_idx_tbl[ \ - umc_inst * adev->umc.channel_inst_num + channel_inst]; \ - (func)(adev, err_data, umc_reg_offset, channel_index); \ - } \ - } \ - adev->umc.funcs->disable_umc_index_mode(adev); - struct amdgpu_umc_funcs { void (*err_cnt_init)(struct amdgpu_device *adev); int (*ras_late_init)(struct amdgpu_device *adev); @@ -60,9 +28,6 @@ struct amdgpu_umc_funcs { void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); - void (*enable_umc_index_mode)(struct amdgpu_device *adev, - uint32_t umc_instance); - void (*disable_umc_index_mode)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e324bfe6c58f..a92f3b18e657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -330,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) int amdgpu_uvd_entity_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; int r; ring = &adev->uvd.inst[0].ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL); + sched = &ring->sched; + r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); if (r) { DRM_ERROR("Failed setting up UVD kernel entity.\n"); return r; @@ -349,6 +350,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) unsigned size; void *ptr; int i, j; + bool in_ras_intr = amdgpu_ras_intr_triggered(); cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -376,13 +378,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) return -ENOMEM; /* re-write 0 since err_event_athub will corrupt VCPU buffer */ - if (amdgpu_ras_intr_triggered()) { - DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + if (in_ras_intr) memset(adev->uvd.inst[j].saved_bo, 0, size); - } else { + else memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); - } } + + if (in_ras_intr) + DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 46b590af2fd2..59ddba137946 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -240,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) int amdgpu_vce_entity_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; int r; ring = &adev->vce.ring[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); + sched = &ring->sched; + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; @@ -651,7 +652,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, if ((addr + (uint64_t)size) > (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", + DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9d870444d7d6..f96464e2c157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -28,19 +28,10 @@ #include <linux/module.h> #include <linux/pci.h> -#include <drm/drm.h> - #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vcn.h" #include "soc15d.h" -#include "soc15_common.h" - -#include "vcn/vcn_1_0_offset.h" -#include "vcn/vcn_1_0_sh_mask.h" - -/* 1 second timeout */ -#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) /* Firmware Names */ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" @@ -84,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case CHIP_ARCTURUS: fw_name = FIRMWARE_ARCTURUS; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case CHIP_RENOIR: fw_name = FIRMWARE_RENOIR; @@ -174,15 +168,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); return r; } - } - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); - return r; + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } } } @@ -195,15 +189,14 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.indirect_sram) { - amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); - } - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, + &adev->vcn.inst[j].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + } kvfree(adev->vcn.inst[j].saved_bo); amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, @@ -214,8 +207,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); - - amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); } release_firmware(adev->vcn.fw); @@ -296,6 +287,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); } @@ -308,26 +300,17 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, j, &new_state); } - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); fences += fence[j]; } if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); - if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); - else - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); } else { schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } @@ -340,11 +323,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); - if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); - else - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); } if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -360,17 +340,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) new_state.fw_based = VCN_DPG_STATE__PAUSE; - else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } } @@ -520,9 +493,14 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) { + struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; long r; + /* temporarily disable ib test for sriov */ + if (amdgpu_sriov_vf(adev)) + return 0; + r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); if (r) goto error; @@ -678,10 +656,15 @@ err: int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { + struct amdgpu_device *adev = ring->adev; struct dma_fence *fence = NULL; struct amdgpu_bo *bo = NULL; long r; + /* temporarily disable ib test for sriov */ + if (amdgpu_sriov_vf(adev)) + return 0; + r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &bo, NULL, NULL); @@ -708,108 +691,3 @@ error: amdgpu_bo_unref(&bo); return r; } - -int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t tmp = 0; - unsigned i; - int r; - - WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); - r = amdgpu_ring_alloc(ring, 3); - if (r) - return r; - - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0)); - amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_commit(ring); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); - if (tmp == 0xDEADBEEF) - break; - udelay(1); - } - - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - - return r; -} - -static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_job *job; - struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - const unsigned ib_size_dw = 16; - int i, r; - - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); - if (r) - return r; - - ib = &job->ibs[0]; - - ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); - ib->ptr[1] = 0xDEADBEEF; - for (i = 2; i < 16; i += 2) { - ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); - ib->ptr[i+1] = 0; - } - ib->length_dw = 16; - - r = amdgpu_job_submit_direct(job, ring, &f); - if (r) - goto err; - - if (fence) - *fence = dma_fence_get(f); - dma_fence_put(f); - - return 0; - -err: - amdgpu_job_free(job); - return r; -} - -int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t tmp = 0; - unsigned i; - struct dma_fence *fence = NULL; - long r = 0; - - r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); - if (r) - goto error; - - r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - r = -ETIMEDOUT; - goto error; - } else if (r < 0) { - goto error; - } else { - r = 0; - } - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); - if (tmp == 0xDEADBEEF) - break; - udelay(1); - } - - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - - dma_fence_put(fence); -error: - return r; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index dface275c81a..d6deb0eb1e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -31,6 +31,7 @@ #define AMDGPU_VCN_MAX_ENC_RINGS 3 #define AMDGPU_MAX_VCN_INSTANCES 2 +#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES #define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) #define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) @@ -56,33 +57,41 @@ #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \ do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ } while (0) -#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \ +#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ bool video_range, aon_range; \ \ - addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ @@ -100,27 +109,27 @@ internal_reg_offset >>= 2; \ }) -#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ - ({ \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ +#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.dpg_sram_curr_addr++ = offset; \ - *adev->vcn.dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } \ } while (0) enum engine_status_constants { @@ -158,7 +167,6 @@ struct amdgpu_vcn_reg{ unsigned ib_size; unsigned gp_scratch8; unsigned scratch9; - unsigned jpeg_pitch; }; struct amdgpu_vcn_inst { @@ -168,9 +176,12 @@ struct amdgpu_vcn_inst { void *saved_bo; struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; - struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; struct amdgpu_vcn_reg external; + struct amdgpu_bo *dpg_sram_bo; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; }; struct amdgpu_vcn { @@ -182,18 +193,18 @@ struct amdgpu_vcn { struct dpg_pause_state pause_state; bool indirect_sram; - struct amdgpu_bo *dpg_sram_bo; - void *dpg_sram_cpu_addr; - uint64_t dpg_sram_gpu_addr; - uint32_t *dpg_sram_curr_addr; uint8_t num_vcn_inst; - struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; - struct amdgpu_vcn_reg internal; + struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + struct amdgpu_vcn_reg internal; + struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS]; + struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES]; + uint32_t num_vcn_enc_sched; + uint32_t num_vcn_dec_sched; unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); @@ -209,7 +220,4 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); -int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring); -int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e32ae906d797..adc813cde8e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -45,98 +45,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->pg_flags = 0; } -uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - BUG_ON(!ring->funcs->emit_rreg); - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for gpu reset case because this way may - * block gpu_recover() routine forever, e.g. this virt_kiq_rreg - * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will - * never return if we keep waiting in virt_kiq_rreg, which cause - * gpu_recover() hang there. - * - * also don't wait anymore for IRQ context - * */ - if (r < 1 && (adev->in_gpu_reset || in_interrupt())) - goto failed_kiq_read; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq_read; - - return adev->wb.wb[adev->virt.reg_val_offs]; - -failed_kiq_read: - pr_err("failed to read reg:%x\n", reg); - return ~0; -} - -void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - BUG_ON(!ring->funcs->emit_wreg); - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for gpu reset case because this way may - * block gpu_recover() routine forever, e.g. this virt_kiq_rreg - * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will - * never return if we keep waiting in virt_kiq_rreg, which cause - * gpu_recover() hang there. - * - * also don't wait anymore for IRQ context - * */ - if (r < 1 && (adev->in_gpu_reset || in_interrupt())) - goto failed_kiq_write; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq_write; - - return; - -failed_kiq_write: - pr_err("failed to write reg:%x\n", reg); -} - void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask) @@ -379,54 +287,3 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) } } } - -static uint32_t parse_clk(char *buf, bool min) -{ - char *ptr = buf; - uint32_t clk = 0; - - do { - ptr = strchr(ptr, ':'); - if (!ptr) - break; - ptr+=2; - if (kstrtou32(ptr, 10, &clk)) - return 0; - } while (!min); - - return clk * 100; -} - -uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest) -{ - char *buf = NULL; - uint32_t clk = 0; - - buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); - clk = parse_clk(buf, lowest); - - kfree(buf); - - return clk; -} - -uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) -{ - char *buf = NULL; - uint32_t clk = 0; - - buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); - clk = parse_clk(buf, lowest); - - kfree(buf); - - return clk; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b0b2bdc750df..daaf909d009a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -57,8 +57,6 @@ struct amdgpu_virt_ops { int (*reset_gpu)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); - int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); - int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); }; /* @@ -85,8 +83,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, /* VRAM LOST by GIM */ AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, - /* HW PERF SIM in GIM */ - AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3), + /* PP ONE VF MODE in GIM */ + AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), }; struct amd_sriov_msg_pf2vf_info_header { @@ -257,8 +255,6 @@ struct amdgpu_virt { struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; uint32_t gim_feature; - /* protect DPM events to GIM */ - struct mutex dpm_mutex; uint32_t reg_access_mode; }; @@ -286,13 +282,11 @@ static inline bool is_virtual_machine(void) #endif } -#define amdgim_is_hwperf(adev) \ - ((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION) +#define amdgpu_sriov_is_pp_one_vf(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); -uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, uint32_t ref, uint32_t mask); @@ -306,6 +300,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, unsigned int key, unsigned int chksum); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); -uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); -uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 598c24505c73..d16231d6a790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -83,6 +83,32 @@ struct amdgpu_prt_cb { }; /** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(&vm->eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(&vm->eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(&vm->eviction_lock); +} + +/** * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer @@ -562,8 +588,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - /* One for the VM updates, one for TTM and one for the CS job */ - entry->tv.num_shared = 3; + /* One for TTM and one for the CS job */ + entry->tv.num_shared = 2; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -656,7 +682,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param) { struct amdgpu_vm_bo_base *bo_base, *tmp; - int r = 0; + int r; vm->bulk_moveable &= list_empty(&vm->evicted); @@ -665,7 +691,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = validate(param, bo); if (r) - break; + return r; if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -678,7 +704,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - return r; + amdgpu_vm_eviction_lock(vm); + vm->evicting = false; + amdgpu_vm_eviction_unlock(vm); + + return 0; } /** @@ -1555,15 +1585,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; + amdgpu_vm_eviction_lock(vm); + if (vm->evicting) { + r = -EBUSY; + goto error_unlock; + } + r = vm->update_funcs->prepare(¶ms, owner, exclusive); if (r) - return r; + goto error_unlock; r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) - return r; + goto error_unlock; + + r = vm->update_funcs->commit(¶ms, fence); - return vm->update_funcs->commit(¶ms, fence); +error_unlock: + amdgpu_vm_eviction_unlock(vm); + return r; } /** @@ -2500,6 +2540,41 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, } /** + * amdgpu_vm_evictable - check if we can evict a VM + * + * @bo: A page table of the VM. + * + * Check if it is possible to evict a VM. + */ +bool amdgpu_vm_evictable(struct amdgpu_bo *bo) +{ + struct amdgpu_vm_bo_base *bo_base = bo->vm_bo; + + /* Page tables of a destroyed VM can go away immediately */ + if (!bo_base || !bo_base->vm) + return true; + + /* Don't evict VM page tables while they are busy */ + if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true)) + return false; + + /* Try to block ongoing updates */ + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) + return false; + + /* Don't evict VM page tables while they are updated */ + if (!dma_fence_is_signaled(bo_base->vm->last_direct) || + !dma_fence_is_signaled(bo_base->vm->last_delayed)) { + amdgpu_vm_eviction_unlock(bo_base->vm); + return false; + } + + bo_base->vm->evicting = true; + amdgpu_vm_eviction_unlock(bo_base->vm); + return true; +} + +/** * amdgpu_vm_bo_invalidate - mark the bo as invalid * * @adev: amdgpu_device pointer @@ -2661,8 +2736,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - return dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, - true, true, timeout); + timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, + true, true, timeout); + if (timeout <= 0) + return timeout; + + timeout = dma_fence_wait_timeout(vm->last_direct, true, timeout); + if (timeout <= 0) + return timeout; + + return dma_fence_wait_timeout(vm->last_delayed, true, timeout); } /** @@ -2696,18 +2779,22 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); + /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->direct, adev->vm_manager.vm_pte_rqs, - adev->vm_manager.vm_pte_num_rqs, NULL); + r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); if (r) return r; - r = drm_sched_entity_init(&vm->delayed, adev->vm_manager.vm_pte_rqs, - adev->vm_manager.vm_pte_num_rqs, NULL); + r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); if (r) goto error_free_direct; vm->pte_support_ats = false; + vm->is_compute_context = false; if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2730,6 +2817,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, else vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; + vm->last_direct = dma_fence_get_stub(); + vm->last_delayed = dma_fence_get_stub(); + + mutex_init(&vm->eviction_lock); + vm->evicting = false; amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) @@ -2780,6 +2872,8 @@ error_free_root: vm->root.base.bo = NULL; error_free_delayed: + dma_fence_put(vm->last_direct); + dma_fence_put(vm->last_delayed); drm_sched_entity_destroy(&vm->delayed); error_free_direct: @@ -2893,6 +2987,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs = &amdgpu_vm_sdma_funcs; dma_fence_put(vm->last_update); vm->last_update = NULL; + vm->is_compute_context = true; if (vm->pasid) { unsigned long flags; @@ -2947,6 +3042,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } vm->pasid = 0; + vm->is_compute_context = false; } /** @@ -2978,6 +3074,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } + dma_fence_wait(vm->last_direct, false); + dma_fence_put(vm->last_direct); + dma_fence_wait(vm->last_delayed, false); + dma_fence_put(vm->last_delayed); + list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); @@ -3194,11 +3295,20 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + if (vm->is_compute_context) { + /* Intentionally setting invalid PTE flag + * combination to force a no-retry-fault + */ + flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | + AMDGPU_PTE_TF; + value = 0; + + } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE; + } else { /* Let the hw retry silently on the PTE */ value = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 4dbbe1b6b413..b4640ab38c95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> #include <drm/ttm/ttm_bo_driver.h> +#include <linux/sched/mm.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -239,6 +240,13 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; + /* Lock to prevent eviction while we are updating page tables + * use vm_eviction_lock/unlock(vm) + */ + struct mutex eviction_lock; + bool evicting; + unsigned int saved_flags; + /* BOs who needs a validation */ struct list_head evicted; @@ -266,6 +274,10 @@ struct amdgpu_vm { struct drm_sched_entity direct; struct drm_sched_entity delayed; + /* Last submission to the scheduler entities */ + struct dma_fence *last_direct; + struct dma_fence *last_delayed; + unsigned int pasid; /* dedicated to vm */ struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; @@ -298,6 +310,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* mark whether can do the bulk move */ bool bulk_moveable; + /* Flag to indicate if VM is used for compute */ + bool is_compute_context; }; struct amdgpu_vm_manager { @@ -317,8 +331,8 @@ struct amdgpu_vm_manager { u64 vram_base_offset; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; - struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS]; - unsigned vm_pte_num_rqs; + struct drm_gpu_scheduler *vm_pte_scheds[AMDGPU_MAX_RINGS]; + unsigned vm_pte_num_scheds; struct amdgpu_ring *page_fault; /* partial resident texture handling */ @@ -376,6 +390,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); +bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 832db59f441e..19b7f80758f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -71,7 +71,7 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, p->num_dw_left = ndw; /* Wait for moves to be completed */ - r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); + r = amdgpu_sync_fence(&p->job->sync, exclusive, false); if (r) return r; @@ -95,11 +95,10 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - struct amdgpu_bo *root = p->vm->root.base.bo; struct amdgpu_ib *ib = p->job->ibs; struct drm_sched_entity *entity; + struct dma_fence *f, *tmp; struct amdgpu_ring *ring; - struct dma_fence *f; int r; entity = p->direct ? &p->vm->direct : &p->vm->delayed; @@ -112,7 +111,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (r) goto error; - amdgpu_bo_fence(root, f, true); + tmp = dma_fence_get(f); + if (p->direct) + swap(p->vm->last_direct, tmp); + else + swap(p->vm->last_delayed, tmp); + dma_fence_put(tmp); + if (fence && !p->direct) swap(*fence, f); dma_fence_put(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 61d13d8b7b20..a97af422575a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -146,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); if (fica_out != 0x1f) pr_err("xGMI error counters not enabled!\n"); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in); if ((fica_out & 0xffff) == 2) error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); - adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); return snprintf(buf, PAGE_SIZE, "%d\n", error_count); } @@ -261,6 +261,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo INIT_LIST_HEAD(&tmp->device_list); mutex_init(&tmp->hive_lock); mutex_init(&tmp->reset_lock); + task_barrier_init(&tmp->tb); if (lock) mutex_lock(&tmp->hive_lock); @@ -290,13 +291,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu_xgmi(adev)) - ret = smu_set_xgmi_pstate(&adev->smu, pstate); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_xgmi_pstate) - ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, - pstate); - + ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", @@ -408,6 +403,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) top_info->num_nodes = count; hive->number_devices = count; + task_barrier_add_task(&hive->tb); + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { /* update node list for other device in the hive */ @@ -470,6 +467,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) mutex_destroy(&hive->hive_lock); mutex_destroy(&hive->reset_lock); } else { + task_barrier_rem_task(&hive->tb); amdgpu_xgmi_sysfs_rem_dev_info(adev, hive); mutex_unlock(&hive->hive_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index bbf504ff7051..74011fbc2251 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -22,6 +22,7 @@ #ifndef __AMDGPU_XGMI_H__ #define __AMDGPU_XGMI_H__ +#include <drm/task_barrier.h> #include "amdgpu_psp.h" struct amdgpu_hive_info { @@ -33,6 +34,7 @@ struct amdgpu_hive_info { struct device_attribute dev_attr; struct amdgpu_device *adev; int pstate; /*0 -- low , 1 -- high , -1 unknown*/ + struct task_barrier tb; }; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index d9cc746af5e6..847ca9b3ce4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -74,9 +74,9 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, case CHIP_VEGA20: case CHIP_RAVEN: athub_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); athub_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index ceb9aa4df0e7..921a69abda55 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -77,9 +77,9 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI14: case CHIP_NAVI12: athub_v2_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); athub_v2_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 6858cde9fc5d..ea702a64f807 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -361,7 +361,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - struct amdgpu_connector_atom_dig *dig_connector; int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE; u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector); u8 tmp; @@ -369,8 +368,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder, if (!amdgpu_connector->con_priv) return panel_mode; - dig_connector = amdgpu_connector->con_priv; - if (dp_bridge != ENCODER_OBJECT_ID_NONE) { /* DP bridge chips */ if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux, @@ -713,7 +710,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig; struct amdgpu_connector *amdgpu_connector; struct amdgpu_connector_atom_dig *dig_connector; struct amdgpu_atombios_dp_link_train_info dp_info; @@ -721,7 +717,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder, if (!amdgpu_encoder->enc_priv) return; - dig = amdgpu_encoder->enc_priv; amdgpu_connector = to_amdgpu_connector(connector); if (!amdgpu_connector->con_priv) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index 980c363b1a0a..b4cc7c55fa16 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -76,11 +76,6 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan, } args.lpI2CDataOut = cpu_to_le16(out); } else { - if (num > ATOM_MAX_HW_I2C_READ) { - DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num); - r = -EINVAL; - goto done; - } args.ucRegIndex = 0; args.lpI2CDataOut = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 1befdee9f0f1..006f21ef7ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1310,6 +1310,17 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) return r; } +static bool cik_asic_supports_baco(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + return amdgpu_dpm_is_baco_supported(adev); + default: + return false; + } +} + static enum amd_reset_method cik_asic_reset_method(struct amdgpu_device *adev) { @@ -1349,7 +1360,7 @@ static int cik_asic_reset(struct amdgpu_device *adev) if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = cik_asic_pci_config_reset(adev); } @@ -1927,6 +1938,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_pcie_usage = &cik_get_pcie_usage, .need_reset_on_init = &cik_need_reset_on_init, .get_pcie_replay_count = &cik_get_pcie_replay_count, + .supports_baco = &cik_asic_supports_baco, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 9870bf27870e..f91ab4c246b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,7 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c45304f1047c..580d3f93d670 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1372,16 +1372,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version cik_sdma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6221298b477..d6aca1c08068 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -31,6 +31,9 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; static void df_v1_7_sw_init(struct amdgpu_device *adev) { + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; } static void df_v1_7_sw_fini(struct amdgpu_device *adev) @@ -66,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); return df_v1_7_channel_number[fb_channel_number]; } @@ -77,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, u32 tmp; /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); @@ -92,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit boradcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 4043ebcea5de..5a1bd8ed1a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -27,6 +27,9 @@ #include "df/df_3_6_offset.h" #include "df/df_3_6_sh_mask.h" +#define DF_3_6_SMN_REG_INST_DIST 0x8 +#define DF_3_6_INST_CNT 8 + static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; @@ -183,6 +186,61 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +/* same as perfmon_wreg but return status on write value check */ +static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t lo_val, + uint32_t hi_addr, uint32_t hi_val) +{ + unsigned long flags, address, data; + uint32_t lo_val_rb, hi_val_rb; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + WREG32(data, lo_val); + WREG32(address, hi_addr); + WREG32(data, hi_val); + + WREG32(address, lo_addr); + lo_val_rb = RREG32(data); + WREG32(address, hi_addr); + hi_val_rb = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + if (!(lo_val == lo_val_rb && hi_val == hi_val_rb)) + return -EBUSY; + + return 0; +} + + +/* + * retry arming counters every 100 usecs within 1 millisecond interval. + * if retry fails after time out, return error. + */ +#define ARM_RETRY_USEC_TIMEOUT 1000 +#define ARM_RETRY_USEC_INTERVAL 100 +static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t lo_val, + uint32_t hi_addr, uint32_t hi_val) +{ + int countdown = ARM_RETRY_USEC_TIMEOUT; + + while (countdown) { + + if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val, + hi_addr, hi_val)) + break; + + countdown -= ARM_RETRY_USEC_INTERVAL; + udelay(ARM_RETRY_USEC_INTERVAL); + } + + return countdown > 0 ? 0 : -ETIME; +} + /* get the number of df counters available */ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, struct device_attribute *attr, @@ -207,6 +265,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, /* device attr for available perfmon counters */ static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); +static void df_v3_6_query_hashes(struct amdgpu_device *adev) +{ + u32 tmp; + + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + /* encoding for hash-enabled on Arcturus */ + if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); + adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl64K); + adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl2M); + adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl1G); + } +} + /* init perfmons */ static void df_v3_6_sw_init(struct amdgpu_device *adev) { @@ -218,6 +302,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) adev->df_perfmon_config_assign_mask[i] = 0; + + df_v3_6_query_hashes(adev); } static void df_v3_6_sw_fini(struct amdgpu_device *adev) @@ -256,7 +342,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) fb_channel_number = 0; @@ -270,7 +356,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable) { tmp = RREG32_SOC15(DF, 0, @@ -289,7 +375,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } } @@ -334,20 +420,20 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, switch (target_cntr) { case 0: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4; break; case 1: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5; break; case 2: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6; break; case 3: - *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; - *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7; break; } @@ -422,6 +508,44 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, return -ENOSPC; } +#define DEFERRED_ARM_MASK (1 << 31) +static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev, + uint64_t config, bool is_deferred) +{ + int target_cntr; + + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + if (target_cntr < 0) + return -EINVAL; + + if (is_deferred) + adev->df_perfmon_config_assign_mask[target_cntr] |= + DEFERRED_ARM_MASK; + else + adev->df_perfmon_config_assign_mask[target_cntr] &= + ~DEFERRED_ARM_MASK; + + return 0; +} + +static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev, + uint64_t config) +{ + int target_cntr; + + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); + + /* + * we never get target_cntr < 0 since this funciton is only called in + * pmc_count for now but we should check anyways. + */ + return (target_cntr >= 0 && + (adev->df_perfmon_config_assign_mask[target_cntr] + & DEFERRED_ARM_MASK)); + +} + /* release performance counter */ static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, uint64_t config) @@ -451,29 +575,33 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, int is_enable) { uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - int ret = 0; + int err = 0, ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: + if (is_enable) + return df_v3_6_pmc_add_cntr(adev, config); df_v3_6_reset_perfmon_cntr(adev, config); - if (is_enable) { - ret = df_v3_6_pmc_add_cntr(adev, config); - } else { - ret = df_v3_6_pmc_get_ctrl_settings(adev, + ret = df_v3_6_pmc_get_ctrl_settings(adev, config, &lo_base_addr, &hi_base_addr, &lo_val, &hi_val); - if (ret) - return ret; + if (ret) + return ret; + + err = df_v3_6_perfmon_arm_with_retry(adev, + lo_base_addr, + lo_val, + hi_base_addr, + hi_val); - df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val, - hi_base_addr, hi_val); - } + if (err) + ret = df_v3_6_pmc_set_deferred(adev, config, true); break; default: @@ -501,7 +629,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, if (ret) return ret; - df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); + df_v3_6_reset_perfmon_cntr(adev, config); if (is_disable) df_v3_6_pmc_release_cntr(adev, config); @@ -518,18 +646,29 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, uint64_t config, uint64_t *count) { - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint32_t lo_base_addr, hi_base_addr, lo_val = 0, hi_val = 0; *count = 0; switch (adev->asic_type) { case CHIP_VEGA20: - df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, &hi_base_addr); if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; + /* rearm the counter or throw away count value on failure */ + if (df_v3_6_pmc_is_deferred(adev, config)) { + int rearm_err = df_v3_6_perfmon_arm_with_status(adev, + lo_base_addr, lo_val, + hi_base_addr, hi_val); + + if (rearm_err) + return; + + df_v3_6_pmc_set_deferred(adev, config, false); + } + df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, hi_base_addr, &hi_val); @@ -542,12 +681,63 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, config, lo_base_addr, hi_base_addr, lo_val, hi_val); break; - default: break; } } +static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev, + uint32_t df_inst) +{ + uint32_t base_addr_reg_val = 0; + uint64_t base_addr = 0; + + base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 + + df_inst * DF_3_6_SMN_REG_INST_DIST); + + if (REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + AddrRngVal) == 0) { + DRM_WARN("address range not valid"); + return 0; + } + + base_addr = REG_GET_FIELD(base_addr_reg_val, + DF_CS_UMC_AON0_DramBaseAddress0, + DramBaseAddr); + + return base_addr << 28; +} + +static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev) +{ + uint32_t xgmi_node_id = 0; + uint32_t df_inst_id = 0; + + /* Walk through DF dst nodes to find current XGMI node */ + for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) { + + xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 + + df_inst_id * DF_3_6_SMN_REG_INST_DIST); + xgmi_node_id = REG_GET_FIELD(xgmi_node_id, + DF_CS_UMC_AON0_DramLimitAddress0, + DstFabricID); + + /* TODO: establish reason dest fabric id is offset by 7 */ + xgmi_node_id = xgmi_node_id >> 7; + + if (adev->gmc.xgmi.physical_node_id == xgmi_node_id) + break; + } + + if (df_inst_id == DF_3_6_INST_CNT) { + DRM_WARN("cant match df dst id with gpu node"); + return 0; + } + + return df_inst_id; +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, .sw_fini = df_v3_6_sw_fini, @@ -561,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .pmc_stop = df_v3_6_pmc_stop, .pmc_get_count = df_v3_6_pmc_get_count, .get_fica = df_v3_6_get_fica, - .set_fica = df_v3_6_set_fica + .set_fica = df_v3_6_set_fica, + .get_dram_base_addr = df_v3_6_get_dram_base_addr, + .get_df_inst_id = df_v3_6_get_df_inst_id }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ba9e53a1abc3..1785fdad6ecb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -50,9 +51,6 @@ * Navi10 has two graphic rings to share each graphic pipe. * 1. Primary ring * 2. Async ring - * - * In bring-up phase, it just used primary ring so set gfx ring number as 1 at - * first. */ #define GFX10_NUM_GFX_RINGS 2 #define GFX10_MEC_HPD_SIZE 2048 @@ -123,7 +121,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -171,7 +169,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -348,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 2, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) @@ -474,18 +486,10 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) else udelay(1); } - if (i < adev->usec_timeout) { - if (amdgpu_emu_mode == 1) - DRM_INFO("ring test on %d succeeded in %d msecs\n", - ring->idx, i); - else - DRM_INFO("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + amdgpu_gfx_scratch_free(adev, scratch); return r; @@ -535,14 +539,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -591,8 +591,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) } if (adev->gfx.cp_fw_write_wait == false) - DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ - GRBM requires 1-cycle delay in cp firmware\n"); + DRM_WARN_ONCE("CP firmware version too old, please update!"); } @@ -617,11 +616,29 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) +{ + bool ret = false; + + switch (adev->pdev->revision) { + case 0xc2: + case 0xc3: + ret = true; + break; + default: + ret = false; + break; + } + + return ret ; +} + static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_NAVI10: - adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; default: break; @@ -805,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - + if (info->fw) { + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } if (adev->gfx.rlc.is_rlc_v2_1 && adev->gfx.rlc.save_restore_list_cntl_size_bytes && adev->gfx.rlc.save_restore_list_gpm_size_bytes && @@ -1948,7 +1966,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4; rlc_toc++; - }; + } return 0; } @@ -3319,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3591,23 +3612,16 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - DRM_INFO("gfx %d ring me %d pipe %d q %d\n", - i, ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - ring->sched.ready = true; - DRM_INFO("compute ring %d mec %d pipe %d q %d\n", - i, ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_helper(ring); if (r) - ring->sched.ready = false; + return r; } return 0; @@ -4215,7 +4229,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -4241,7 +4255,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: gfx_v10_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -4723,6 +4737,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -4731,9 +4746,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7f0a63628c43..31f44d05e606 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1576,7 +1576,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev) static void gfx_v6_0_constants_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 sx_debug_1; u32 hdp_host_path_cntl; u32 tmp; @@ -1678,7 +1678,6 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d92e92e5d50b..8f20a5dd44fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4258,7 +4258,7 @@ static int gfx_v7_0_late_init(void *handle) static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; @@ -4335,7 +4335,6 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 52a647d7022d..fa245973de12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1677,7 +1677,7 @@ fail: static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; int ret; @@ -1817,7 +1817,6 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; @@ -4559,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -6447,6 +6449,7 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -6455,9 +6458,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 97105a5bb246..90f64b8bc358 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,6 +48,8 @@ #include "amdgpu_ras.h" +#include "gfx_v9_4.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -131,18 +133,6 @@ MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 -struct ras_gfx_subblock_reg { - const char *name; - uint32_t hwip; - uint32_t inst; - uint32_t seg; - uint32_t reg_offset; - uint32_t sec_count_mask; - uint32_t sec_count_shift; - uint32_t ded_count_mask; - uint32_t ded_count_shift; -}; - enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -748,9 +738,138 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); +static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev); static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} + static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -991,8 +1110,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || (adev->gfx.pfp_feature_version < 46)) - DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ - GRBM requires 1-cycle delay in cp firmware\n"); + DRM_WARN_ONCE("CP firmware version too old, please update!"); switch (adev->asic_type) { case CHIP_VEGA10: @@ -1044,18 +1162,54 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) } } +struct amdgpu_gfxoff_quirk { + u16 chip_vendor; + u16 chip_device; + u16 subsys_vendor; + u16 subsys_device; + u8 revision; +}; + +static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { + /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + { 0, 0, 0, 0, 0 }, +}; + +static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) +{ + const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; + + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device && + pdev->revision == p->revision) { + return true; + } + ++p; + } + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { + if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || - adev->pdev->device == 0x15d8) && - (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */ - !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) && + ((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (adev->pm.pp_feature & PP_GFXOFF_MASK) @@ -1834,6 +1988,17 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .query_ras_error_count = &gfx_v9_0_query_ras_error_count }; +static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_0_select_se_sh, + .read_wave_data = &gfx_v9_0_read_wave_data, + .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_4_ras_error_inject, + .query_ras_error_count = &gfx_v9_4_query_ras_error_count +}; + static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -1885,6 +2050,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_ARCTURUS: + adev->gfx.funcs = &gfx_v9_4_gfx_funcs; adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -2275,6 +2441,22 @@ static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) } } +static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) +{ + uint32_t tmp; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); + tmp = REG_SET_FIELD(tmp, SQ_CONFIG, + DISABLE_BARRIER_WAITCNT, 1); + WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); + break; + default: + break; + }; +} + static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -2320,6 +2502,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) gfx_v9_0_init_compute_vmid(adev); gfx_v9_0_init_gds_vmid(adev); + gfx_v9_0_init_sq_config(adev); } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -3119,74 +3302,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3323,8 +3438,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3593,7 +3711,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3650,6 +3768,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { if (adev->asic_type != CHIP_ARCTURUS) @@ -3667,6 +3802,8 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); + gfx_v9_0_init_tcp_config(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3678,36 +3815,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3719,7 +3826,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3936,33 +4043,61 @@ static const u32 sgpr_init_compute_shader[] = 0xbe800080, 0xbf810000, }; +/* When below register arrays changed, please update gpr_reg_size, + and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, + to cover all gfx9 ASICs */ static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const struct soc15_reg_entry sgpr1_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, }; -static const struct soc15_reg_entry sgpr_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +static const struct soc15_reg_entry sgpr2_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, }; -static const struct soc15_reg_entry sec_ded_counter_registers[] = { +static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, @@ -3996,6 +4131,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, + { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4050,10 +4186,16 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; struct amdgpu_ib ib; struct dma_fence *f = NULL; - int r, i, j, k; + int r, i; unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; + int compute_dim_x = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se; + int sgpr_work_group_size = 5; + int gpr_reg_size = compute_dim_x / 16 + 6; + /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return 0; @@ -4063,9 +4205,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; total_size = - ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ + total_size += + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ total_size += - ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ total_size = ALIGN(total_size, 256); vgpr_offset = total_size; total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); @@ -4092,7 +4236,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* VGPR */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) - PACKET3_SET_SH_REG_START; @@ -4108,7 +4252,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR1 */ + /* write the register state for the compute dispatch */ + for (i = 0; i < gpr_reg_size; i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4118,13 +4290,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); - /* SGPR */ + /* SGPR2 */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); - ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; } /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; @@ -4136,7 +4308,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4160,18 +4332,17 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) goto fail; } - /* read back registers to clear the counters */ - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { - for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { - for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0x0, k); - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - } - } + switch (adev->asic_type) + { + case CHIP_VEGA20: + gfx_v9_0_clear_ras_edc_counter(adev); + break; + case CHIP_ARCTURUS: + gfx_v9_4_clear_ras_edc_counter(adev); + break; + default: + break; } - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); - mutex_unlock(&adev->grbm_idx_mutex); fail: amdgpu_ib_free(adev, &ib, NULL); @@ -4189,6 +4360,7 @@ static int gfx_v9_0_early_init(void *handle) else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); @@ -4202,10 +4374,6 @@ static int gfx_v9_0_ecc_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_gfx_ras_late_init(adev); - if (r) - return r; - r = gfx_v9_0_do_edc_gds_workarounds(adev); if (r) return r; @@ -4215,6 +4383,10 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; + r = amdgpu_gfx_ras_late_init(adev); + if (r) + return r; + return 0; } @@ -4533,7 +4705,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); switch (adev->asic_type) { case CHIP_RAVEN: @@ -4595,7 +4767,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, case CHIP_ARCTURUS: case CHIP_RENOIR: gfx_v9_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -4612,12 +4784,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags = 0; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -4626,18 +4798,18 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; if (adev->asic_type != CHIP_ARCTURUS) { /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; @@ -5108,6 +5280,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ @@ -5116,9 +5289,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) amdgpu_ring_write(ring, reg); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - adev->virt.reg_val_offs * 4)); + kiq->reg_val_offs * 4)); } static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, @@ -5440,7 +5613,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, } -static const struct ras_gfx_subblock_reg ras_subblock_regs[] = { +static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) @@ -5888,7 +6061,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, int ret; struct ta_ras_trigger_error_input block_info = { 0 }; - if (adev->asic_type != CHIP_VEGA20) + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return -EINVAL; if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) @@ -6013,7 +6186,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); - for (i = 0; i < 16; i++) { + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); @@ -6032,7 +6205,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, } } - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); @@ -6053,7 +6226,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, } } - for (i = 0; i < 4; i++) { + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); @@ -6065,7 +6238,7 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, } } - for (i = 0; i < 32; i++) { + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); @@ -6092,36 +6265,36 @@ static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, return 0; } -static int __get_ras_error_count(const struct soc15_reg_entry *reg, +static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg, uint32_t se_id, uint32_t inst_id, uint32_t value, uint32_t *sec_count, uint32_t *ded_count) { uint32_t i; uint32_t sec_cnt, ded_cnt; - for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) { - if(ras_subblock_regs[i].reg_offset != reg->reg_offset || - ras_subblock_regs[i].seg != reg->seg || - ras_subblock_regs[i].inst != reg->inst) + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { + if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || + gfx_v9_0_ras_fields[i].seg != reg->seg || + gfx_v9_0_ras_fields[i].inst != reg->inst) continue; sec_cnt = (value & - ras_subblock_regs[i].sec_count_mask) >> - ras_subblock_regs[i].sec_count_shift; + gfx_v9_0_ras_fields[i].sec_count_mask) >> + gfx_v9_0_ras_fields[i].sec_count_shift; if (sec_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", - ras_subblock_regs[i].name, + gfx_v9_0_ras_fields[i].name, se_id, inst_id, sec_cnt); *sec_count += sec_cnt; } ded_cnt = (value & - ras_subblock_regs[i].ded_count_mask) >> - ras_subblock_regs[i].ded_count_shift; + gfx_v9_0_ras_fields[i].ded_count_mask) >> + gfx_v9_0_ras_fields[i].ded_count_shift; if (ded_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", - ras_subblock_regs[i].name, + gfx_v9_0_ras_fields[i].name, se_id, inst_id, ded_cnt); *ded_count += ded_cnt; @@ -6131,6 +6304,58 @@ static int __get_ras_error_count(const struct soc15_reg_entry *reg, return 0; } +static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev) +{ + int i, j, k; + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); + } + + WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); +} + static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { @@ -6139,7 +6364,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, uint32_t i, j, k; uint32_t reg_value; - if (adev->asic_type != CHIP_VEGA20) + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return -EINVAL; err_data->ue_count = 0; @@ -6147,14 +6372,14 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { - for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { - for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0, k); reg_value = - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) - __get_ras_error_count(&sec_ded_counter_registers[i], + gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i], j, k, reg_value, &sec_count, &ded_count); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c new file mode 100644 index 000000000000..f099f13d7f1e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -0,0 +1,978 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/kernel.h> + +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_atomfirmware.h" +#include "amdgpu_pm.h" + +#include "gc/gc_9_4_1_offset.h" +#include "gc/gc_9_4_1_sh_mask.h" +#include "soc15_common.h" + +#include "gfx_v9_4.h" +#include "amdgpu_ras.h" + +static const struct soc15_reg_entry gfx_v9_4_edc_counter_regs[] = { + /* CPC */ + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1 }, + /* DC */ + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1 }, + /* CPF */ + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1 }, + /* GDS */ + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1 }, + /* SPI */ + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1 }, + /* SQ */ + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16 }, + /* SQC */ + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6 }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), 0, 4, 6 }, + /* TA */ + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16 }, + /* TCA */ + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2 }, + /* TCC */ + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16 }, + /* TCI */ + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72 }, + /* TCP */ + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16 }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16 }, + /* TD */ + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16 }, + /* GCEA */ + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32 }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32 }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 1, 32 }, + /* RLC */ + { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), 0, 1, 1 }, + { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), 0, 1, 1 }, +}; + +static void gfx_v9_4_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); +} + +static const struct soc15_ras_field_entry gfx_v9_4_ras_fields[] = { + /* CPC */ + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) }, + { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) }, + { "CPC_DC_CSINVOC_RAM_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) }, + { "CPC_DC_RESTORE_RAM_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) }, + { "CPC_DC_CSINVOC_RAM1_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1), + SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) }, + { "CPC_DC_RESTORE_RAM1_ME1", + SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1), + SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) }, + + /* CPF */ + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1), + SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) }, + { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), + SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) }, + + /* GDS */ + { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), + SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC), + SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) }, + { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, + { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, + { "GDS_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, + + /* SPI */ + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) }, + { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) }, + { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) }, + { "SPI_WB_GRANT_61", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_DED_COUNT) }, + { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT), + SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) }, + + /* SQ */ + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), + SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) }, + + /* SQC */ + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKA_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKA_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_HIT_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKA_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + DATA_BANKA_MISS_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKB_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + INST_BANKB_MISS_FIFO_DED_COUNT) }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_HIT_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKB_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, + DATA_BANKB_MISS_FIFO_DED_COUNT) }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, + + /* TA */ + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_DED_COUNT) }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT), + SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) }, + + /* TCA */ + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) }, + + /* TCC */ + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) }, + + /* TCI */ + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) }, + + /* TCP */ + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0, 0 }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, + + /* TD */ + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT), + SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) }, + + /* EA */ + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, + { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, + { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, + { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0, 0 }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_DATAMEM_DED_COUNT) }, + { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) }, + { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 }, + { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) }, + { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) }, + { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) }, + { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) }, + { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) }, + { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) }, + { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) }, + { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) }, + { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT), + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) }, + { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), + SOC15_REG_FIELD(GCEA_EDC_CNT, MAM_AFMEM_SEC_COUNT), 0, 0 }, + { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0, + SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) }, + + /* RLC */ + { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) }, + { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) }, + { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) }, + { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) }, + { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) }, + { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) }, + { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) }, + { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) }, + { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT), + SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) }, +}; + +static const char * const vml2_mems[] = { + "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_0_4K_MEM0", + "UTC_VML2_BANK_CACHE_0_4K_MEM1", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_1_4K_MEM0", + "UTC_VML2_BANK_CACHE_1_4K_MEM1", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_2_4K_MEM0", + "UTC_VML2_BANK_CACHE_2_4K_MEM1", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", + "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", + "UTC_VML2_BANK_CACHE_3_4K_MEM0", + "UTC_VML2_BANK_CACHE_3_4K_MEM1", + "UTC_VML2_IFIFO_GROUP0", + "UTC_VML2_IFIFO_GROUP1", + "UTC_VML2_IFIFO_GROUP2", + "UTC_VML2_IFIFO_GROUP3", + "UTC_VML2_IFIFO_GROUP4", + "UTC_VML2_IFIFO_GROUP5", + "UTC_VML2_IFIFO_GROUP6", + "UTC_VML2_IFIFO_GROUP7", + "UTC_VML2_IFIFO_GROUP8", + "UTC_VML2_IFIFO_GROUP9", + "UTC_VML2_IFIFO_GROUP10", + "UTC_VML2_IFIFO_GROUP11", + "UTC_VML2_IFIFO_GROUP12", + "UTC_VML2_IFIFO_GROUP13", + "UTC_VML2_IFIFO_GROUP14", + "UTC_VML2_IFIFO_GROUP15", + "UTC_VML2_IFIFO_GROUP16", + "UTC_VML2_IFIFO_GROUP17", + "UTC_VML2_IFIFO_GROUP18", + "UTC_VML2_IFIFO_GROUP19", + "UTC_VML2_IFIFO_GROUP20", + "UTC_VML2_IFIFO_GROUP21", + "UTC_VML2_IFIFO_GROUP22", + "UTC_VML2_IFIFO_GROUP23", + "UTC_VML2_IFIFO_GROUP24", +}; + +static const char * const vml2_walker_mems[] = { + "UTC_VML2_CACHE_PDE0_MEM0", + "UTC_VML2_CACHE_PDE0_MEM1", + "UTC_VML2_CACHE_PDE1_MEM0", + "UTC_VML2_CACHE_PDE1_MEM1", + "UTC_VML2_CACHE_PDE2_MEM0", + "UTC_VML2_CACHE_PDE2_MEM1", + "UTC_VML2_RDIF_ARADDRS", + "UTC_VML2_RDIF_LOG_FIFO", + "UTC_VML2_QUEUE_REQ", + "UTC_VML2_QUEUE_RET", +}; + +static const char * const utcl2_router_mems[] = { + "UTCL2_ROUTER_GROUP0_VML2_REQ_FIFO0", + "UTCL2_ROUTER_GROUP1_VML2_REQ_FIFO1", + "UTCL2_ROUTER_GROUP2_VML2_REQ_FIFO2", + "UTCL2_ROUTER_GROUP3_VML2_REQ_FIFO3", + "UTCL2_ROUTER_GROUP4_VML2_REQ_FIFO4", + "UTCL2_ROUTER_GROUP5_VML2_REQ_FIFO5", + "UTCL2_ROUTER_GROUP6_VML2_REQ_FIFO6", + "UTCL2_ROUTER_GROUP7_VML2_REQ_FIFO7", + "UTCL2_ROUTER_GROUP8_VML2_REQ_FIFO8", + "UTCL2_ROUTER_GROUP9_VML2_REQ_FIFO9", + "UTCL2_ROUTER_GROUP10_VML2_REQ_FIFO10", + "UTCL2_ROUTER_GROUP11_VML2_REQ_FIFO11", + "UTCL2_ROUTER_GROUP12_VML2_REQ_FIFO12", + "UTCL2_ROUTER_GROUP13_VML2_REQ_FIFO13", + "UTCL2_ROUTER_GROUP14_VML2_REQ_FIFO14", + "UTCL2_ROUTER_GROUP15_VML2_REQ_FIFO15", + "UTCL2_ROUTER_GROUP16_VML2_REQ_FIFO16", + "UTCL2_ROUTER_GROUP17_VML2_REQ_FIFO17", + "UTCL2_ROUTER_GROUP18_VML2_REQ_FIFO18", + "UTCL2_ROUTER_GROUP19_VML2_REQ_FIFO19", + "UTCL2_ROUTER_GROUP20_VML2_REQ_FIFO20", + "UTCL2_ROUTER_GROUP21_VML2_REQ_FIFO21", + "UTCL2_ROUTER_GROUP22_VML2_REQ_FIFO22", + "UTCL2_ROUTER_GROUP23_VML2_REQ_FIFO23", + "UTCL2_ROUTER_GROUP24_VML2_REQ_FIFO24", +}; + +static const char * const atc_l2_cache_2m_mems[] = { + "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", + "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", +}; + +static const char * const atc_l2_cache_4k_mems[] = { + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", + "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", +}; + +static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev, + struct ras_err_data *err_data) +{ + uint32_t i, data; + uint32_t sec_count, ded_count; + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0); + + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + vml2_walker_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + vml2_walker_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) { + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i); + data = RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL); + + sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + utcl2_router_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + utcl2_router_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL); + + sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_2m_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_2m_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i); + data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL); + + sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL, + SEC_COUNT); + if (sec_count) { + DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, + atc_l2_cache_4k_mems[i], sec_count); + err_data->ce_count += sec_count; + } + + ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL, + DED_COUNT); + if (ded_count) { + DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, + atc_l2_cache_4k_mems[i], ded_count); + err_data->ue_count += ded_count; + } + } + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + + return 0; +} + +static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t se_id, uint32_t inst_id, + uint32_t value, uint32_t *sec_count, + uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_ras_fields); i++) { + if (gfx_v9_4_ras_fields[i].reg_offset != reg->reg_offset || + gfx_v9_4_ras_fields[i].seg != reg->seg || + gfx_v9_4_ras_fields[i].inst != reg->inst) + continue; + + sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >> + gfx_v9_4_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", + gfx_v9_4_ras_fields[i].name, se_id, inst_id, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >> + gfx_v9_4_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", + gfx_v9_4_ras_fields[i].name, se_id, inst_id, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i, j, k; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance; + k++) { + gfx_v9_4_select_se_sh(adev, j, 0, k); + reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_edc_counter_regs[i])); + if (reg_value) + gfx_v9_4_ras_error_count( + &gfx_v9_4_edc_counter_regs[i], + j, k, reg_value, &sec_count, + &ded_count); + } + } + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; + + gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_4_query_utc_edc_status(adev, err_data); + + return 0; +} + +void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev) +{ + int i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) { + for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance; + k++) { + gfx_v9_4_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_edc_counter_regs[i])); + } + } + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0); + + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0); + + for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) { + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i); + RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL); + } + + for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i); + RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL); + } + + WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255); + WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); +} + +int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return -EINVAL; + + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); + block_info.sub_block_index = info->head.sub_block_index; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h new file mode 100644 index 000000000000..2e3f6f755ad4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h @@ -0,0 +1,35 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_4_H__ +#define __GFX_V9_4_H__ + +void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev); + +int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + +int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); + +#endif /* __GFX_V9_4_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index e91bd7945777..1a2f18b908fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -75,40 +75,45 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - /* Program the system aperture low logical page number. */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - - if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) - /* - * Raven2 has a HW issue that it is unable to use the vram which - * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the - * workaround that increase system aperture high address (add 1) - * to get rid of the VM fault and hardware hang. - */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.fb_end >> 18) + 0x1, - adev->gmc.agp_end >> 18)); - else - WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - - /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, - (u32)(value >> 12)); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, - (u32)(value >> 44)); - - /* Program "protection fault". */ - WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page_addr >> 44)); - - WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { + /* Program the system aperture low logical page number. */ + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + + if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) + /* + * Raven2 has a HW issue that it is unable to use the + * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. + * So here is the workaround that increase system + * aperture high address (add 1) to get rid of the VM + * fault and hardware hang. + */ + WREG32_SOC15_RLC(GC, 0, + mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max((adev->gmc.fb_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); + else + WREG32_SOC15_RLC( + GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + } } static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) @@ -264,7 +269,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) { /* * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are * VF copy registers so vbios post doesn't program them, for @@ -280,10 +285,12 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) gfxhub_v1_0_init_gart_aperture_regs(adev); gfxhub_v1_0_init_system_aperture_regs(adev); gfxhub_v1_0_init_tlb_regs(adev); - gfxhub_v1_0_init_cache_regs(adev); + if (!amdgpu_sriov_vf(adev)) + gfxhub_v1_0_init_cache_regs(adev); gfxhub_v1_0_enable_system_domain(adev); - gfxhub_v1_0_disable_identity_aperture(adev); + if (!amdgpu_sriov_vf(adev)) + gfxhub_v1_0_disable_identity_aperture(adev); gfxhub_v1_0_setup_vmid_config(adev); gfxhub_v1_0_program_invalidation(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f5725336a5f2..9775eca6fe43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -246,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 tmp; /* Use register 17 for GART */ const unsigned eng = 17; unsigned int i; @@ -273,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared @@ -380,6 +397,64 @@ error_alloc: DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + /* 2 dwords flush + 8 dwords fence */ + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v10_0_flush_gpu_tlb(adev, vmid, + i, flush_type); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, flush_type); + } + break; + } + } + + return 0; +} + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -531,6 +606,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, .map_mtype = gmc_v10_0_map_mtype, @@ -564,22 +640,13 @@ static int gmc_v10_0_early_init(void *handle) static int gmc_v10_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; - unsigned i; - - for(i = 0; i < adev->num_rings; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - unsigned vmhub = ring->funcs->vmhub; + int r; - ring->vm_inv_eng = vm_inv_eng[vmhub]++; - dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", - ring->idx, ring->name, ring->vm_inv_eng, - ring->funcs->vmhub); - } + amdgpu_bo_late_init(adev); - /* Engine 17 is used for GART flushes */ - for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) - BUG_ON(vm_inv_eng[i] > 17); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); + if (r) + return r; return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } @@ -731,6 +798,10 @@ static int gmc_v10_0_sw_init(void *handle) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -743,15 +814,6 @@ static int gmc_v10_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * Reserve 8M stolen memory for navi10 like vega10 - * TODO: will check if it's really needed on asic. - */ - if (amdgpu_emu_mode == 1) - adev->gmc.stolen_size = 0; - else - adev->gmc.stolen_size = 9 * 1024 *1024; - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f08e5330642d..9da9596a3638 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,7 +381,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU && + adev->gmc.real_vram_size > adev->gmc.aper_size) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } @@ -418,6 +419,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1333,6 +1366,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6d96d40fbcb8..27d83204fa2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; + +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index a5b68b5e452f..90216abf14a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,10 +38,12 @@ #include "dce/dce_12_0_sh_mask.h" #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" #include "athub/athub_1_0_offset.h" #include "oss/osssys_4_0_offset.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" @@ -207,6 +209,11 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, { u32 bits, i, tmp, reg; + /* Devices newer then VEGA10/12 shall have these programming + sequences performed by PSP BL */ + if (adev->asic_type >= CHIP_VEGA20) + return 0; + bits = 0x7f; switch (state) { @@ -393,8 +400,10 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; - adev->gmc.ecc_irq.num_types = 1; - adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; + if (!amdgpu_sriov_vf(adev)) { + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; + } } static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, @@ -434,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->pdev->device == 0x15d8))); } +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -455,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); const unsigned eng = 17; - u32 j, tmp; + u32 j, inv_req, tmp; struct amdgpu_vmhub *hub; BUG_ON(vmhub >= adev->num_vmhubs); hub = &adev->vmhub[vmhub]; - tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal @@ -472,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t req = hub->vm_inv_eng0_req + eng; uint32_t ack = hub->vm_inv_eng0_ack + eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); return; } @@ -500,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared @@ -532,6 +553,68 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } +/** + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (adev->in_gpu_reset) + return -EIO; + + if (ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + /* 2 dwords flush + 8 dwords fence */ + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v9_0_flush_gpu_tlb(adev, vmid, + i, flush_type); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, flush_type); + } + break; + } + } + + return 0; + +} + static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, .map_mtype = gmc_v9_0_map_mtype, @@ -715,7 +799,15 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; - adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; + case CHIP_ARCTURUS: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.funcs = &umc_v6_1_funcs; break; @@ -730,6 +822,9 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) case CHIP_VEGA20: adev->mmhub.funcs = &mmhub_v1_0_funcs; break; + case CHIP_ARCTURUS: + adev->mmhub.funcs = &mmhub_v9_4_funcs; + break; default: break; } @@ -779,36 +874,6 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) } } -static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring; - unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = - {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, - GFXHUB_FREE_VM_INV_ENGS_BITMAP}; - unsigned i; - unsigned vmhub, inv_eng; - - for (i = 0; i < adev->num_rings; ++i) { - ring = adev->rings[i]; - vmhub = ring->funcs->vmhub; - - inv_eng = ffs(vm_inv_engs[vmhub]); - if (!inv_eng) { - dev_err(adev->dev, "no VM inv eng for ring %s\n", - ring->name); - return -EINVAL; - } - - ring->vm_inv_eng = inv_eng - 1; - vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); - - dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", - ring->name, ring->vm_inv_eng, ring->funcs->vmhub); - } - - return 0; -} - static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -817,7 +882,7 @@ static int gmc_v9_0_late_init(void *handle) if (!gmc_v9_0_keep_stolen_memory(adev)) amdgpu_bo_late_init(adev); - r = gmc_v9_0_allocate_vm_inv_eng(adev); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; /* Check if ecc is available */ @@ -825,11 +890,12 @@ static int gmc_v9_0_late_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA20: + case CHIP_ARCTURUS: r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); - if (adev->df_funcs->enable_ecc_force_par_wr_rmw) - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_INFO("ECC is active.\n"); } @@ -1034,7 +1100,7 @@ static int gmc_v9_0_sw_init(void *handle) else chansize = 128; - numchan = adev->df_funcs->get_hbm_channel_number(adev); + numchan = adev->df.funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } @@ -1100,11 +1166,13 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - /* interrupt sent to DF. */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, - &adev->gmc.ecc_irq); - if (r) - return r; + if (!amdgpu_sriov_vf(adev)) { + /* interrupt sent to DF. */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + if (r) + return r; + } /* Set the internal MC address mask * This is the max address of the GPU's @@ -1290,12 +1358,13 @@ static int gmc_v9_0_hw_init(void *handle) else value = true; - gfxhub_v1_0_set_fault_enable_default(adev, value); - if (adev->asic_type == CHIP_ARCTURUS) - mmhub_v9_4_set_fault_enable_default(adev, value); - else - mmhub_v1_0_set_fault_enable_default(adev, value); - + if (!amdgpu_sriov_vf(adev)) { + gfxhub_v1_0_set_fault_enable_default(adev, value); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_fault_enable_default(adev, value); + else + mmhub_v1_0_set_fault_enable_default(adev, value); + } for (i = 0; i < adev->num_vmhubs; ++i) gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 971c0840358f..e0585e8c6c1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -24,24 +24,6 @@ #ifndef __GMC_V9_0_H__ #define __GMC_V9_0_H__ - /* - * The latest engine allocation on gfx9 is: - * Engine 2, 3: firmware - * Engine 0, 1, 4~16: amdgpu ring, - * subject to change when ring number changes - * Engine 17: Gart flushes - */ -#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 -#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 - extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; - -/* amdgpu_amdkfd*.c */ -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t value); -void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t value); -void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, - uint32_t vmid, uint64_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c new file mode 100644 index 000000000000..0debfd9f428c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -0,0 +1,586 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v1_0.h" + +#include "vcn/vcn_1_0_offset.h" +#include "vcn/vcn_1_0_sh_mask.h" + +static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); + +static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[(*ptr)++] = 0; + ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); + } else { + ring->ring[(*ptr)++] = reg_offset; + ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); + } + ring->ring[(*ptr)++] = val; +} + +static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) +{ + struct amdgpu_device *adev = ring->adev; + + uint32_t reg, reg_offset, val, mask, i; + + // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); + reg_offset = (reg << 2); + val = lower_32_bits(ring->gpu_addr); + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); + reg_offset = (reg << 2); + val = upper_32_bits(ring->gpu_addr); + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 3rd to 5th: issue MEM_READ commands + for (i = 0; i <= 2; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); + ring->ring[ptr++] = 0; + } + + // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x13; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 7th: program mmUVD_JRBC_RB_REF_DATA + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA); + reg_offset = (reg << 2); + val = 0x1; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x1; + mask = 0x1; + + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = 0x01400200; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = val; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[ptr++] = 0; + ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); + } else { + ring->ring[ptr++] = reg_offset; + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); + } + ring->ring[ptr++] = mask; + + //9th to 21st: insert no-op + for (i = 0; i <= 12; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ring->ring[ptr++] = 0; + } + + //22nd: reset mmUVD_JRBC_RB_RPTR + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR); + reg_offset = (reg << 2); + val = 0; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); + + //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch + reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x12; + jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val); +} + +/** + * jpeg_v1_0_decode_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v1_0_decode_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v1_0_decode_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * jpeg_v1_0_decode_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * jpeg_v1_0_decode_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0xffffffff); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + /* emit trap */ + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG decode TRAP\n"); + + switch (entry->src_id) { + case 126: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +/** + * jpeg_v1_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +int jpeg_v1_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v1_0_set_dec_ring_funcs(adev); + jpeg_v1_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v1_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + */ +int jpeg_v1_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq); + if (r) + return r; + + ring = &adev->jpeg.inst->ring_dec; + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch = + SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v1_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG free up sw allocation + */ +void jpeg_v1_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec); +} + +/** + * jpeg_v1_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +void jpeg_v1_0_start(struct amdgpu_device *adev, int mode) +{ + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + + if (mode == 0) { + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + } + + /* initialize wptr */ + ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); + + /* copy patch commands to the jpeg ring */ + jpeg_v1_0_decode_ring_set_patch_ring(ring, + (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); +} + +static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .nop = PACKET0(0x81ff, 0), + .support_64bit_ptrs = false, + .no_user_fence = true, + .vmhub = AMDGPU_MMHUB_0, + .extra_dw = 64, + .get_rptr = jpeg_v1_0_decode_ring_get_rptr, + .get_wptr = jpeg_v1_0_decode_ring_get_wptr, + .set_wptr = jpeg_v1_0_decode_ring_set_wptr, + .emit_frame_size = + 6 + 6 + /* hdp invalidate / flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */ + 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */ + .emit_ib = jpeg_v1_0_decode_ring_emit_ib, + .emit_fence = jpeg_v1_0_decode_ring_emit_fence, + .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v1_0_decode_ring_nop, + .insert_start = jpeg_v1_0_decode_ring_insert_start, + .insert_end = jpeg_v1_0_decode_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = vcn_v1_0_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg, + .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs; + DRM_INFO("JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = { + .set = jpeg_v1_0_set_interrupt_state, + .process = jpeg_v1_0_process_interrupt, +}; + +static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h new file mode 100644 index 000000000000..bbf33a6a3972 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h @@ -0,0 +1,32 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V1_0_H__ +#define __JPEG_V1_0_H__ + +int jpeg_v1_0_early_init(void *handle); +int jpeg_v1_0_sw_init(void *handle); +void jpeg_v1_0_sw_fini(void *handle); +void jpeg_v1_0_start(struct amdgpu_device *adev, int mode); + +#endif /*__JPEG_V1_0_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c new file mode 100644 index 000000000000..ff2e6e1ccde7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -0,0 +1,827 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" + +#include "vcn/vcn_2_0_0_offset.h" +#include "vcn/vcn_2_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff +#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 +#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a +#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb +#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf +#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 +#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed +#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 +#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 +#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 + +static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v2_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v2_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v2_0_set_dec_ring_funcs(adev); + jpeg_v2_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v2_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v2_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = &adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v2_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v2_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v2_0_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v2_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + r = amdgpu_ring_test_helper(ring); + if (!r) + DRM_INFO("JPEG decode initialized successfully.\n"); + + return r; +} + +/** + * jpeg_v2_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v2_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) + jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + return 0; +} + +/** + * jpeg_v2_0_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v2_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v2_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v2_0_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v2_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v2_0_hw_init(adev); + + return r; +} + +static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev) +{ + uint32_t data; + int r = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); + + SOC15_WAIT_ON_RREG(JPEG, 0, + mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + return r; + } + } + + /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */ + data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data); + + return 0; +} + +static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev) +{ + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + uint32_t data; + int r = 0; + + data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)); + data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + data |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data); + + data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data); + + SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS, + (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + return r; + } + } + + return 0; +} + +static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); +} + +static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); +} + +/** + * jpeg_v2_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* disable power gating */ + r = jpeg_v2_0_disable_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v2_0_disable_clock_gating(adev); + + WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v2_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_0_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable JPEG CGC */ + jpeg_v2_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v2_0_enable_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v2_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v2_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v2_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * jpeg_v2_0_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * jpeg_v2_0_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static bool jpeg_v2_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v2_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret); + + return ret; +} + +static int jpeg_v2_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE); + + if (enable) { + if (jpeg_v2_0_is_idle(handle)) + return -EBUSY; + jpeg_v2_0_enable_clock_gating(adev); + } else { + jpeg_v2_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v2_0_stop(adev); + else + ret = jpeg_v2_0_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v2_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { + .name = "jpeg_v2_0", + .early_init = jpeg_v2_0_early_init, + .late_init = NULL, + .sw_init = jpeg_v2_0_sw_init, + .sw_fini = jpeg_v2_0_sw_fini, + .hw_init = jpeg_v2_0_hw_init, + .hw_fini = jpeg_v2_0_hw_fini, + .suspend = jpeg_v2_0_suspend, + .resume = jpeg_v2_0_resume, + .is_idle = jpeg_v2_0_is_idle, + .wait_for_idle = jpeg_v2_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v2_0_set_clockgating_state, + .set_powergating_state = jpeg_v2_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = jpeg_v2_0_dec_ring_get_rptr, + .get_wptr = jpeg_v2_0_dec_ring_get_wptr, + .set_wptr = jpeg_v2_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v2_0_dec_ring_emit_ib, + .emit_fence = jpeg_v2_0_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v2_0_dec_ring_nop, + .insert_start = jpeg_v2_0_dec_ring_insert_start, + .insert_end = jpeg_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs; + DRM_INFO("JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = { + .set = jpeg_v2_0_set_interrupt_state, + .process = jpeg_v2_0_process_interrupt, +}; + +static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &jpeg_v2_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h new file mode 100644 index 000000000000..15a344ed340f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -0,0 +1,42 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V2_0_H__ +#define __JPEG_V2_0_H__ + +void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); + +extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; + +#endif /* __JPEG_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c new file mode 100644 index 000000000000..c6d046df4b70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -0,0 +1,641 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v2_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2 + +static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + +static int amdgpu_ih_clientid_jpeg[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +/** + * jpeg_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v2_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) { + u32 harvest; + int i; + + adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS; + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->jpeg.harvest_config |= 1 << i; + } + + if (adev->jpeg.harvest_config == (AMDGPU_JPEG_HARVEST_JPEG0 | + AMDGPU_JPEG_HARVEST_JPEG1)) + return -ENOENT; + } else + adev->jpeg.num_jpeg_inst = 1; + + jpeg_v2_5_set_dec_ring_funcs(adev); + jpeg_v2_5_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v2_5_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v2_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; + sprintf(ring->name, "jpeg_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH); + } + + return 0; +} + +/** + * jpeg_v2_5_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v2_5_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v2_5_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v2_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + DRM_INFO("JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v2_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) + jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + } + + return 0; +} + +/** + * jpeg_v2_5_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v2_5_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v2_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v2_5_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v2_5_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v2_5_hw_init(adev); + + return r; +} + +static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data); +} + +static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data); +} + +/** + * jpeg_v2_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = &adev->jpeg.inst[i].ring_dec; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v2_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR); + } + + return 0; +} + +/** + * jpeg_v2_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_5_stop(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v2_5_enable_clock_gating(adev, i); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v2_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ret &= (((RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + } + + return ret; +} + +static int jpeg_v2_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret); + if (ret) + return ret; + } + + return ret; +} + +static int jpeg_v2_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE); + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (enable) { + if (jpeg_v2_5_is_idle(handle)) + return -EBUSY; + jpeg_v2_5_enable_clock_gating(adev, i); + } else { + jpeg_v2_5_disable_clock_gating(adev, i); + } + } + + return 0; +} + +static int jpeg_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if(state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v2_5_stop(adev); + else + ret = jpeg_v2_5_start(adev); + + if(!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { + .name = "jpeg_v2_5", + .early_init = jpeg_v2_5_early_init, + .late_init = NULL, + .sw_init = jpeg_v2_5_sw_init, + .sw_fini = jpeg_v2_5_sw_fini, + .hw_init = jpeg_v2_5_hw_init, + .hw_fini = jpeg_v2_5_hw_fini, + .suspend = jpeg_v2_5_suspend, + .resume = jpeg_v2_5_resume, + .is_idle = jpeg_v2_5_is_idle, + .wait_for_idle = jpeg_v2_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v2_5_set_clockgating_state, + .set_powergating_state = jpeg_v2_5_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = jpeg_v2_5_dec_ring_get_rptr, + .get_wptr = jpeg_v2_5_dec_ring_get_wptr, + .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */ + .emit_ib = jpeg_v2_0_dec_ring_emit_ib, + .emit_fence = jpeg_v2_0_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v2_0_dec_ring_nop, + .insert_start = jpeg_v2_0_dec_ring_insert_start, + .insert_end = jpeg_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec.me = i; + DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i); + } +} + +static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = { + .set = jpeg_v2_5_set_interrupt_state, + .process = jpeg_v2_5_process_interrupt, +}; + +static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].irq.num_types = 1; + adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs; + } +} + +const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 2, + .minor = 5, + .rev = 0, + .funcs = &jpeg_v2_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h new file mode 100644 index 000000000000..2b4087c02620 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V2_5_H__ +#define __JPEG_V2_5_H__ + +extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block; + +#endif /* __JPEG_V2_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 28105e4af507..49a3a56ec017 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -27,17 +27,13 @@ #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "mmhub/mmhub_1_0_default.h" -#include "mmhub/mmhub_9_4_0_offset.h" #include "vega10_enum.h" - +#include "soc15.h" #include "soc15_common.h" #define mmDAGB0_CNTL_MISC2_RV 0x008f #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 -#define EA_EDC_CNT_MASK 0x3 -#define EA_EDC_CNT_SHIFT 0x2 - u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); @@ -527,9 +523,9 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, case CHIP_RAVEN: case CHIP_RENOIR: mmhub_v1_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v1_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -564,59 +560,191 @@ void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) *flags |= AMD_CG_SUPPORT_MC_LS; } +static const struct soc15_ras_field_entry mmhub_v1_0_ras_fields[] = { + { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), + SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + } +}; + +static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = { + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), 0, 0, 0}, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0}, +}; + +static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t value, uint32_t *sec_count, uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) { + if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset) + continue; + + sec_cnt = (value & + mmhub_v1_0_ras_fields[i].sec_count_mask) >> + mmhub_v1_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("MMHUB SubBlock %s, SEC %d\n", + mmhub_v1_0_ras_fields[i].name, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & + mmhub_v1_0_ras_fields[i].ded_count_mask) >> + mmhub_v1_0_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("MMHUB SubBlock %s, DED %d\n", + mmhub_v1_0_ras_fields[i].name, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - int i; - uint32_t ea0_edc_cnt, ea0_edc_cnt2; - uint32_t ea1_edc_cnt, ea1_edc_cnt2; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - /* EDC CNT will be cleared automatically after read */ - ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20); - ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20); - ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20); - ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20); - - /* error count of each error type is recorded by 2 bits, - * ce and ue count in EDC_CNT - */ - for (i = 0; i < 5; i++) { - err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); - err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); - ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; - ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; - err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); - err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); - ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; - ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; - } - /* successive ue count in EDC_CNT */ - for (i = 0; i < 5; i++) { - err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); - err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); - ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; - ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i; + uint32_t reg_value; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) { + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i])); + if (reg_value) + mmhub_v1_0_get_ras_error_count(&mmhub_v1_0_edc_cnt_regs[i], + reg_value, &sec_count, &ded_count); } - /* ce and ue count in EDC_CNT2 */ - for (i = 0; i < 3; i++) { - err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); - err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); - ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; - ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; - err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); - err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); - ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; - ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; - } - /* successive ue count in EDC_CNT2 */ - for (i = 0; i < 6; i++) { - err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); - err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); - ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; - ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; - } + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; } const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index a7cb185d639a..bde189680521 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -427,9 +427,9 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI14: case CHIP_NAVI12: mmhub_v2_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 66efe2f7bd76..a5281df8d84f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -21,6 +21,7 @@ * */ #include "amdgpu.h" +#include "amdgpu_ras.h" #include "mmhub_v9_4.h" #include "mmhub/mmhub_9_4_1_offset.h" @@ -29,7 +30,7 @@ #include "athub/athub_1_0_offset.h" #include "athub/athub_1_0_sh_mask.h" #include "vega10_enum.h" - +#include "soc15.h" #include "soc15_common.h" #define MMHUB_NUM_INSTANCES 2 @@ -53,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) return base; } -void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, +static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to @@ -79,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, { uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -100,6 +101,16 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, (u32)(adev->gmc.gart_end >> 44)); } +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid, + page_table_base); +} + static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, int hubid) { @@ -117,45 +128,53 @@ static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, adev->gmc.agp_start >> 24); - /* Program the system aperture low logical page number. */ - WREG32_SOC15_OFFSET(MMHUB, 0, - mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET, - min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - WREG32_SOC15_OFFSET(MMHUB, 0, - mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET, - max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + if (!amdgpu_sriov_vf(adev)) { + /* Program the system aperture low logical page number. */ + WREG32_SOC15_OFFSET( + MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15_OFFSET( + MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - /* Set default page address. */ - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; - WREG32_SOC15_OFFSET(MMHUB, 0, + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15_OFFSET( + MMHUB, 0, mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, (u32)(value >> 12)); - WREG32_SOC15_OFFSET(MMHUB, 0, + WREG32_SOC15_OFFSET( + MMHUB, 0, mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, (u32)(value >> 44)); - /* Program "protection fault". */ - WREG32_SOC15_OFFSET(MMHUB, 0, - mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET, - (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15_OFFSET(MMHUB, 0, - mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET, - (u32)((u64)adev->dummy_page_addr >> 44)); + /* Program "protection fault". */ + WREG32_SOC15_OFFSET( + MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15_OFFSET( + MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)((u64)adev->dummy_page_addr >> 44)); - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, - mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + tmp = RREG32_SOC15_OFFSET( + MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + tmp); + } } static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) @@ -313,7 +332,8 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) adev->vm_manager.block_size - 9); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, tmp); @@ -356,30 +376,16 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) int i; for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { - if (amdgpu_sriov_vf(adev)) { - /* - * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase - * they are VF copy registers so vbios post doesn't - * program them, for SRIOV driver need to program them - */ - WREG32_SOC15_OFFSET(MMHUB, 0, - mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE, - i * MMHUB_INSTANCE_REGISTER_OFFSET, - adev->gmc.vram_start >> 24); - WREG32_SOC15_OFFSET(MMHUB, 0, - mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP, - i * MMHUB_INSTANCE_REGISTER_OFFSET, - adev->gmc.vram_end >> 24); - } - /* GART Enable. */ mmhub_v9_4_init_gart_aperture_regs(adev, i); mmhub_v9_4_init_system_aperture_regs(adev, i); mmhub_v9_4_init_tlb_regs(adev, i); - mmhub_v9_4_init_cache_regs(adev, i); + if (!amdgpu_sriov_vf(adev)) + mmhub_v9_4_init_cache_regs(adev, i); mmhub_v9_4_enable_system_domain(adev, i); - mmhub_v9_4_disable_identity_aperture(adev, i); + if (!amdgpu_sriov_vf(adev)) + mmhub_v9_4_disable_identity_aperture(adev, i); mmhub_v9_4_setup_vmid_config(adev, i); mmhub_v9_4_program_invalidation(adev, i); } @@ -619,9 +625,9 @@ int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_ARCTURUS: mmhub_v9_4_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v9_4_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -655,3 +661,942 @@ void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_LS; } + +static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = { + /* MMHUB Range 0 */ + { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 1 */ + { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHAB Range 2*/ + { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Rang 3 */ + { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 4 */ + { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUAB Range 5 */ + { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 6 */ + { "MMEA6_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA6_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA6_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA6_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA6_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA6_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_DED_COUNT), + }, + + /* MMHUB Range 7*/ + { "MMEA7_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_DED_COUNT), + }, + { "MMEA7_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_DED_COUNT), + }, + { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IORD_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_CMDMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_DATAMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), + 0, 0, + }, + { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT), + }, + { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT), + }, + { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IORD_CMDMEM_DED_COUNT), + }, + { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_CMDMEM_DED_COUNT), + }, + { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_DATAMEM_DED_COUNT), + }, + { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT), + }, + { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), + 0, 0, + SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT), + }, + { "MMEA7_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_DED_COUNT), + }, + { "MMEA7_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_DED_COUNT), + }, + { "MMEA7_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_DED_COUNT), + }, + { "MMEA7_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_SED_COUNT), + SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_DED_COUNT), + } +}; + +static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = { + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 }, +}; + +static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg, + uint32_t value, uint32_t *sec_count, uint32_t *ded_count) +{ + uint32_t i; + uint32_t sec_cnt, ded_cnt; + + for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) { + if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset) + continue; + + sec_cnt = (value & + mmhub_v9_4_ras_fields[i].sec_count_mask) >> + mmhub_v9_4_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("MMHUB SubBlock %s, SEC %d\n", + mmhub_v9_4_ras_fields[i].name, + sec_cnt); + *sec_count += sec_cnt; + } + + ded_cnt = (value & + mmhub_v9_4_ras_fields[i].ded_count_mask) >> + mmhub_v9_4_ras_fields[i].ded_count_shift; + if (ded_cnt) { + DRM_INFO("MMHUB SubBlock %s, DED %d\n", + mmhub_v9_4_ras_fields[i].name, + ded_cnt); + *ded_count += ded_cnt; + } + } + + return 0; +} + +static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0, ded_count = 0; + uint32_t i; + uint32_t reg_value; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) { + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i])); + if (reg_value) + mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i], + reg_value, &sec_count, &ded_count); + } + + err_data->ce_count += sec_count; + err_data->ue_count += ded_count; +} + +const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = { + .ras_late_init = amdgpu_mmhub_ras_late_init, + .query_ras_error_count = mmhub_v9_4_query_ras_error_count, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h index d435cfcec1a8..1b979773776c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -23,6 +23,8 @@ #ifndef __MMHUB_V9_4_H__ #define __MMHUB_V9_4_H__ +extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs; + u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); @@ -32,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev); int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h index 8af0bddf85e4..20958639b601 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h @@ -47,6 +47,18 @@ struct mmsch_v1_0_init_header { uint32_t uvd_table_size; }; +struct mmsch_vf_eng_init_header { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v1_1_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_vf_eng_init_header eng[2]; +}; + struct mmsch_v1_0_cmd_direct_reg_header { uint32_t reg_offset : 28; uint32_t command_type : 4; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index cc5bf595f9b1..5fd67e1cc2a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -158,82 +158,6 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_ai_mailbox_set_valid(adev, false); } -static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf) -{ - int r = 0; - u32 req, val, size; - - if (!amdgim_is_hwperf(adev) || buf == NULL) - return -EBADRQC; - - switch(type) { - case PP_SCLK: - req = IDH_IRQ_GET_PP_SCLK; - break; - case PP_MCLK: - req = IDH_IRQ_GET_PP_MCLK; - break; - default: - return -EBADRQC; - } - - mutex_lock(&adev->virt.dpm_mutex); - - xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); - - r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); - if (!r && adev->fw_vram_usage.va != NULL) { - val = RREG32_NO_KIQ( - SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1)); - size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) + - val), PAGE_SIZE); - - if (size < PAGE_SIZE) - strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val)); - else - size = 0; - - r = size; - goto out; - } - - r = xgpu_ai_poll_msg(adev, IDH_FAIL); - if(r) - pr_info("%s DPM request failed", - (type == PP_SCLK)? "SCLK" : "MCLK"); - -out: - mutex_unlock(&adev->virt.dpm_mutex); - return r; -} - -static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level) -{ - int r = 0; - u32 req = IDH_IRQ_FORCE_DPM_LEVEL; - - if (!amdgim_is_hwperf(adev)) - return -EBADRQC; - - mutex_lock(&adev->virt.dpm_mutex); - xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0); - - r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); - if (!r) - goto out; - - r = xgpu_ai_poll_msg(adev, IDH_FAIL); - if (!r) - pr_info("DPM request failed"); - else - pr_info("Mailbox is broken"); - -out: - mutex_unlock(&adev->virt.dpm_mutex); - return r; -} - static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, enum idh_request req) { @@ -326,7 +250,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) */ locked = mutex_trylock(&adev->lock_reset); if (locked) - adev->in_gpu_reset = 1; + adev->in_gpu_reset = true; do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) @@ -338,7 +262,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) flr_done: if (locked) { - adev->in_gpu_reset = 0; + adev->in_gpu_reset = false; mutex_unlock(&adev->lock_reset); } @@ -455,6 +379,4 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .reset_gpu = xgpu_ai_request_reset, .wait_reset = NULL, .trans_msg = xgpu_ai_mailbox_trans_msg, - .get_pp_clk = xgpu_ai_get_pp_clk, - .force_dpm_level = xgpu_ai_force_dpm_level, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 077e91a33d62..37dbe0f2142f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -35,10 +35,6 @@ enum idh_request { IDH_REL_GPU_FINI_ACCESS, IDH_REQ_GPU_RESET_ACCESS, - IDH_IRQ_FORCE_DPM_LEVEL = 10, - IDH_IRQ_GET_PP_SCLK, - IDH_IRQ_GET_PP_MCLK, - IDH_LOG_VF_ERROR = 200, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 0d8767eb7a70..237fa5e16b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -252,7 +252,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) */ locked = mutex_trylock(&adev->lock_reset); if (locked) - adev->in_gpu_reset = 1; + adev->in_gpu_reset = true; do { if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) @@ -264,12 +264,16 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) flr_done: if (locked) { - adev->in_gpu_reset = 0; + adev->in_gpu_reset = false; mutex_unlock(&adev->lock_reset); } /* Trigger recovery for world switch failure if no TDR */ - if (amdgpu_device_should_recover_gpu(adev)) + if (amdgpu_device_should_recover_gpu(adev) + && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || + adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) amdgpu_device_gpu_recover(adev, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 9af73567e716..cf557a428298 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -110,7 +110,6 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl static int navi10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih = &adev->irq.ih; - int ret = 0; u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken; u32 tmp; @@ -179,7 +178,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) /* enable interrupts */ navi10_ih_enable_interrupts(adev); - return ret; + return 0; } /** @@ -427,7 +426,7 @@ static int navi10_ih_set_clockgating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; navi10_ih_update_clockgating_state(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 0db458f9fafc..65eb378fa035 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -52,6 +52,9 @@ #define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL #define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); + static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -314,6 +317,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev) static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) { uint32_t bif_doorbell_intr_cntl; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if); bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); if (REG_GET_FIELD(bif_doorbell_intr_cntl, @@ -324,7 +328,18 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device RAS_CNTLR_INTERRUPT_CLEAR, 1); WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); - amdgpu_ras_global_ras_isr(adev); + /* + * clear error status after ras_controller_intr according to + * hw team and count ue number for query + */ + nbio_v7_4_query_ras_error_count(adev, &obj->err_data); + + DRM_WARN("RAS controller interrupt triggered by NBIF error\n"); + + /* ras_controller_int is dedicated for nbif ras error, + * not the global interrupt for sync flood + */ + amdgpu_ras_reset_gpu(adev); } } @@ -441,10 +456,8 @@ static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, &adev->nbio.ras_controller_irq); - if (r) - return r; - return 0; + return r; } static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev) @@ -461,16 +474,16 @@ static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *a r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, &adev->nbio.ras_err_event_athub_irq); - if (r) - return r; - return 0; + return r; } +#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030 + static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - uint32_t global_sts, central_sts, int_eoi; + uint32_t global_sts, central_sts, int_eoi, parity_sts; uint32_t corr, fatal, non_fatal; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -479,6 +492,7 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal); non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrNonFatal); + parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2); if (corr) err_data->ce_count++; @@ -490,6 +504,11 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, /* clear error status register */ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts); + if (fatal) + /* clear parity fatal error indication field */ + WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2, + parity_sts); + if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS, BIFL_RasContller_Intr_Recv)) { /* clear interrupt status register */ diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 0ba66bef5746..2d1bebdf1603 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -53,6 +53,7 @@ #include "gfx_v10_0.h" #include "sdma_v5_0.h" #include "vcn_v2_0.h" +#include "jpeg_v2_0.h" #include "dce_virtual.h" #include "mes_v10_1.h" #include "mxgpu_nv.h" @@ -314,6 +315,16 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) return ret; } +static bool nv_asic_supports_baco(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (smu_baco_is_support(smu)) + return true; + else + return false; +} + static enum amd_reset_method nv_asic_reset_method(struct amdgpu_device *adev) { @@ -342,7 +353,12 @@ static int nv_asic_reset(struct amdgpu_device *adev) if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - ret = smu_baco_reset(smu); + ret = smu_baco_enter(smu); + if (ret) + return ret; + ret = smu_baco_exit(smu); + if (ret) + return ret; } else { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); @@ -462,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -473,9 +489,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; @@ -485,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -496,9 +513,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; default: return -EINVAL; @@ -617,6 +635,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .get_pcie_usage = &nv_get_pcie_usage, .need_reset_on_init = &nv_need_reset_on_init, .get_pcie_replay_count = &nv_get_pcie_replay_count, + .supports_baco = &nv_asic_supports_baco, }; static int nv_common_early_init(void *handle) @@ -656,10 +675,12 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB; adev->external_rev_id = adev->rev_id + 0x1; break; @@ -676,9 +697,11 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 20; break; @@ -697,10 +720,18 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | - AMD_CG_SUPPORT_VCN_MGCG; + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB; + /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * as a consequence, the rev_id and external_rev_id are wrong. + * workaround it by hardcoding rev_id to 0 (default value). + */ + if (amdgpu_sriov_vf(adev)) + adev->rev_id = 0; adev->external_rev_id = adev->rev_id + 0xa; break; default: @@ -919,13 +950,13 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: adev->nbio.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); nv_update_hdp_mem_power_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); nv_update_hdp_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 74a9fe8e0cfb..36b65797434e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -242,6 +242,7 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */ GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ + GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index b345e69ba246..7539104175e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -230,54 +230,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v10_0_cmd_submit(struct psp_context *psp, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v10_0_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -407,15 +359,30 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp) return -EINVAL; } +static uint32_t psp_v10_0_ring_get_wptr(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + return RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); +} + +static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v10_0_funcs = { .init_microcode = psp_v10_0_init_microcode, .ring_init = psp_v10_0_ring_init, .ring_create = psp_v10_0_ring_create, .ring_stop = psp_v10_0_ring_stop, .ring_destroy = psp_v10_0_ring_destroy, - .cmd_submit = psp_v10_0_cmd_submit, .compare_sram_data = psp_v10_0_compare_sram_data, .mode1_reset = psp_v10_0_mode1_reset, + .ring_get_wptr = psp_v10_0_ring_get_wptr, + .ring_set_wptr = psp_v10_0_ring_set_wptr, }; void psp_v10_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index ffeaa2f5588d..0829188c1a5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -22,6 +22,7 @@ #include <linux/firmware.h> #include <linux/module.h> +#include <linux/vmalloc.h> #include "amdgpu.h" #include "amdgpu_psp.h" @@ -43,10 +44,13 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi10_ta.bin"); MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ta.bin"); MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); @@ -186,6 +190,31 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); + adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); + adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + + adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version); + adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes); + adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr + + le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + } break; default: BUG(); @@ -208,6 +237,29 @@ out: return err; } +int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + int ret; + int retry_loop; + + for (retry_loop = 0; retry_loop < 10; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, + 0x80000000, + false); + + if (ret == 0) + return 0; + } + + return ret; +} + static bool psp_v11_0_is_sos_alive(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -233,9 +285,7 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) return 0; } - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); if (ret) return ret; @@ -251,9 +301,7 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1*/ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); return ret; } @@ -273,9 +321,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) return 0; } - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); if (ret) return ret; @@ -294,8 +340,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); return ret; } @@ -312,9 +357,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) if (psp_v11_0_is_sos_alive(psp)) return 0; - /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_v11_0_wait_for_bootloader(psp); if (ret) return ret; @@ -519,63 +562,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v11_0_cmd_submit(struct psp_context *psp, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - if (psp_v11_0_support_vmr_ring(psp)) - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); - else - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - /* write_frame ptr increments by size of rb_frame in bytes */ - /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - if (psp_v11_0_support_vmr_ring(psp)) { - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); - } else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v11_0_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -986,10 +972,13 @@ Err_out: */ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) { - int ret; - uint32_t p2c_header[4]; struct psp_memory_training_context *ctx = &psp->mem_train_ctx; uint32_t *pcache = (uint32_t*)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret; if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { DRM_DEBUG("Memory training is not supported.\n"); @@ -1004,7 +993,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return 0; } - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", pcache[0], pcache[1], pcache[2], pcache[3], p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); @@ -1041,11 +1030,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) DRM_DEBUG("Memory training ops:%x.\n", ops); if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long traing will encroach certain mount of bottom VRAM, + * saving the content of this bottom VRAM to system memory + * before training, and restoring it after training to avoid + * VRAM corruption. + */ + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + DRM_ERROR("failed to allocate system memory.\n"); + return -ENOMEM; + } + + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); if (ret) { DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); return ret; } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->nbio.funcs->hdp_flush(adev, NULL); + vfree(buf); } if (ops & PSP_MEM_TRAIN_SAVE) { @@ -1068,6 +1084,30 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return 0; } +static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (psp_v11_0_support_vmr_ring(psp)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (psp_v11_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, @@ -1077,7 +1117,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_create = psp_v11_0_ring_create, .ring_stop = psp_v11_0_ring_stop, .ring_destroy = psp_v11_0_ring_destroy, - .cmd_submit = psp_v11_0_cmd_submit, .compare_sram_data = psp_v11_0_compare_sram_data, .mode1_reset = psp_v11_0_mode1_reset, .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, @@ -1091,6 +1130,8 @@ static const struct psp_funcs psp_v11_0_funcs = { .mem_training_init = psp_v11_0_memory_training_init, .mem_training_fini = psp_v11_0_memory_training_fini, .mem_training = psp_v11_0_memory_training, + .ring_get_wptr = psp_v11_0_ring_get_wptr, + .ring_set_wptr = psp_v11_0_ring_set_wptr, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 8f553f6f92d6..58d8b6d732e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -334,63 +334,6 @@ static int psp_v12_0_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v12_0_cmd_submit(struct psp_context *psp, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - if (psp_v12_0_support_vmr_ring(psp)) - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); - else - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - /* write_frame ptr increments by size of rb_frame in bytes */ - /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - if (psp_v12_0_support_vmr_ring(psp)) { - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); - } else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v12_0_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -547,6 +490,30 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) return 0; } +static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v12_0_funcs = { .init_microcode = psp_v12_0_init_microcode, .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, @@ -555,9 +522,10 @@ static const struct psp_funcs psp_v12_0_funcs = { .ring_create = psp_v12_0_ring_create, .ring_stop = psp_v12_0_ring_stop, .ring_destroy = psp_v12_0_ring_destroy, - .cmd_submit = psp_v12_0_cmd_submit, .compare_sram_data = psp_v12_0_compare_sram_data, .mode1_reset = psp_v12_0_mode1_reset, + .ring_get_wptr = psp_v12_0_ring_get_wptr, + .ring_set_wptr = psp_v12_0_ring_set_wptr, }; void psp_v12_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index fdc00938327b..735c43c7daab 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -179,7 +179,7 @@ static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver) * Double check if the latest four legacy versions. * If yes, it is still the right version. */ - for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) { + for (i = 0; i < ARRAY_SIZE(sos_old_versions); i++) { if (sos_old_versions[i] == adev->psp.sos_fw_version) return true; } @@ -410,65 +410,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp, return ret; } -static int psp_v3_1_cmd_submit(struct psp_context *psp, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, - int index) -{ - unsigned int psp_write_ptr_reg = 0; - struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; - struct psp_ring *ring = &psp->km_ring; - struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; - struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + - ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; - struct amdgpu_device *adev = psp->adev; - uint32_t ring_size_dw = ring->ring_size / 4; - uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; - - /* KM (GPCOM) prepare write pointer */ - if (psp_v3_1_support_vmr_ring(psp)) - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); - else - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); - - /* Update KM RB frame pointer to new frame */ - /* write_frame ptr increments by size of rb_frame in bytes */ - /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ - if ((psp_write_ptr_reg % ring_size_dw) == 0) - write_frame = ring_buffer_start; - else - write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); - /* Check invalid write_frame ptr address */ - if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); - return -EINVAL; - } - - /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); - - /* Update KM RB frame */ - write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); - write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); - write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); - write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); - write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); - - /* Update the write Pointer in DWORDs */ - psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - if (psp_v3_1_support_vmr_ring(psp)) { - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); - /* send interrupt to PSP for SRIOV ring write pointer update */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_CONSUME_CMD); - } else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); - - return 0; -} - static int psp_v3_1_sram_map(struct amdgpu_device *adev, unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, @@ -642,6 +583,31 @@ static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) return false; } +static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (psp_v3_1_support_vmr_ring(psp)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + return data; +} + +static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (psp_v3_1_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + /* send interrupt to PSP for SRIOV ring write pointer update */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -650,11 +616,12 @@ static const struct psp_funcs psp_v3_1_funcs = { .ring_create = psp_v3_1_ring_create, .ring_stop = psp_v3_1_ring_stop, .ring_destroy = psp_v3_1_ring_destroy, - .cmd_submit = psp_v3_1_cmd_submit, .compare_sram_data = psp_v3_1_compare_sram_data, .smu_reload_quirk = psp_v3_1_smu_reload_quirk, .mode1_reset = psp_v3_1_mode1_reset, .support_vmr_ring = psp_v3_1_support_vmr_ring, + .ring_get_wptr = psp_v3_1_ring_get_wptr, + .ring_set_wptr = psp_v3_1_ring_set_wptr, }; void psp_v3_1_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a10175838013..7d509a40076f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* IB packet must end on a 8 DW boundary */ - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1260,16 +1260,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v2_4_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 5f4e2c616241..b6109a99fc43 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* IB packet must end on a 8 DW boundary */ - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1698,16 +1698,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v3_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4ef4d31f5231..e55884d204bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev); static const struct soc15_reg_golden golden_settings_sdma_4[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), @@ -254,7 +255,106 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) +}; + +static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = { + { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED), + 0, 0, + }, + { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED), + 0, 0, + }, + { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED), + 0, 0, + }, + { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED), + 0, 0, + }, + { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED), + 0, 0, + }, + { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED), + 0, 0, + }, }; static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, @@ -698,7 +798,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* IB packet must end on a 8 DW boundary */ - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -1579,7 +1679,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1686,6 +1786,7 @@ static int sdma_v4_0_early_init(void *handle) sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); sdma_v4_0_set_irq_funcs(adev); + sdma_v4_0_set_ras_funcs(adev); return 0; } @@ -1700,8 +1801,18 @@ static int sdma_v4_0_late_init(void *handle) struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; + int i; - return amdgpu_sdma_ras_late_init(adev, &ih_info); + /* read back edc counter registers to clear the counters */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); + } + + if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) + return adev->sdma.funcs->ras_late_init(adev, &ih_info); + else + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -1773,7 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - amdgpu_sdma_ras_fini(adev); + if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) + adev->sdma.funcs->ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -2064,9 +2176,9 @@ static int sdma_v4_0_set_clockgating_state(void *handle, case CHIP_ARCTURUS: case CHIP_RENOIR: sdma_v4_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v4_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -2409,10 +2521,73 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) sched = &adev->sdma.instance[i].page.sched; else sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = sched; + } + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; +} + +static void sdma_v4_0_get_ras_error_count(uint32_t value, + uint32_t instance, + uint32_t *sec_count) +{ + uint32_t i; + uint32_t sec_cnt; + + /* double bits error (multiple bits) error detection is not supported */ + for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) { + /* the SDMA_EDC_COUNTER register in each sdma instance + * shares the same sed shift_mask + * */ + sec_cnt = (value & + sdma_v4_0_ras_fields[i].sec_count_mask) >> + sdma_v4_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("Detected %s in SDMA%d, SED %d\n", + sdma_v4_0_ras_fields[i].name, + instance, sec_cnt); + *sec_count += sec_cnt; + } + } +} + +static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0; + uint32_t reg_value = 0; + + reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER); + /* double bit error is not supported */ + if (reg_value) + sdma_v4_0_get_ras_error_count(reg_value, + instance, &sec_count); + /* err_data->ce_count should be initialized to 0 + * before calling into this function */ + err_data->ce_count += sec_count; + /* double bit error is not supported + * set ue count to 0 */ + err_data->ue_count = 0; + + return 0; +}; + +static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { + .ras_late_init = amdgpu_sdma_ras_late_init, + .ras_fini = amdgpu_sdma_ras_fini, + .query_ras_error_count = sdma_v4_0_query_ras_error_count, +}; + +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + adev->sdma.funcs = &sdma_v4_0_ras_funcs; + break; + default: + break; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index f4ad2990f973..67b9830b7c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); - /* IB packet must end on a 8 DW boundary */ - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + /* An IB packet must end on a 8 DW boundary--the next dword + * must be on a 8-dword boundary. Our IB packet below is 6 + * dwords long, thus add x number of NOPs, such that, in + * modular arithmetic, + * wptr + 6 + x = 8k, k >= 0, which in C is, + * (wptr + 6 + x) % 8 = 0. + * The expression below, is a solution of x. + */ + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); @@ -907,16 +914,9 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) udelay(1); } - if (i < adev->usec_timeout) { - if (amdgpu_emu_mode == 1) - DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i); - else - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + amdgpu_device_wb_free(adev, index); return r; @@ -981,13 +981,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -1086,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, } /** - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw - * + * sdma_v5_0_ring_pad_ib - pad the IB * @ib: indirect buffer to fill with padding * + * Pad the IB with NOPs to a boundary multiple of 8. */ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { @@ -1097,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib u32 pad_count; int i; - pad_count = (8 - (ib->length_dw & 0x7)) % 8; + pad_count = (-ib->length_dw) & 0x7; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = @@ -1528,9 +1525,9 @@ static int sdma_v5_0_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: sdma_v5_0_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v5_0_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -1721,17 +1718,15 @@ static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f2d70a47a3af..4d415bfdb42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1197,6 +1197,11 @@ static int si_asic_reset(struct amdgpu_device *adev) return 0; } +static bool si_asic_supports_baco(struct amdgpu_device *adev) +{ + return false; +} + static enum amd_reset_method si_asic_reset_method(struct amdgpu_device *adev) { @@ -1425,6 +1430,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_pcie_usage = &si_get_pcie_usage, .need_reset_on_init = &si_need_reset_on_init, .get_pcie_replay_count = &si_get_pcie_replay_count, + .supports_baco = &si_asic_supports_baco, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index bdda8b4e03f0..42d5601b6bf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -648,7 +648,7 @@ static int si_dma_set_clockgating_state(void *handle, bool enable; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - enable = (state == AMD_CG_STATE_GATE) ? true : false; + enable = (state == AMD_CG_STATE_GATE); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -834,16 +834,14 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) { - struct drm_gpu_scheduler *sched; unsigned i; adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } const struct amdgpu_ip_block_version si_dma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index c44723c267c9..c902f26cf50d 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -234,7 +234,7 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", (uint16_t)address, numbytes); - if (drm_debug & DRM_UT_DRIVER) { + if (drm_debug_enabled(DRM_UT_DRIVER)) { print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, 16, 1, data, numbytes, false); } @@ -388,7 +388,7 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", (uint16_t)address, bytes_received); - if (drm_debug & DRM_UT_DRIVER) { + if (drm_debug_enabled(DRM_UT_DRIVER)) { print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, 16, 1, data, bytes_received, false); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8e1640bc07af..15f3424a1ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -67,7 +67,9 @@ #include "vce_v4_0.h" #include "vcn_v1_0.h" #include "vcn_v2_0.h" +#include "jpeg_v2_0.h" #include "vcn_v2_5.h" +#include "jpeg_v2_5.h" #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" @@ -477,56 +479,18 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) return ret; } -static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - - *cap = smu_baco_is_support(smu); - return 0; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); - } -} - static int soc15_asic_baco_reset(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - dev_info(adev->dev, "GPU BACO reset\n"); - - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - - if (smu_baco_reset(smu)) - return -EIO; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_reset(adev); + if (ret) + return ret; /* re-enable doorbell interrupt after BACO exit */ if (ras && ras->supported) @@ -535,21 +499,11 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return 0; } -static int soc15_mode2_reset(struct amdgpu_device *adev) -{ - if (is_support_sw_smu(adev)) - return smu_mode2_reset(&adev->smu); - if (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->asic_reset_mode_2) - return -ENOENT; - - return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); -} - static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; + bool baco_reset = false; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (adev->asic_type) { case CHIP_RAVEN: @@ -557,23 +511,21 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: case CHIP_VEGA12: - soc15_asic_get_baco_capability(adev, &baco_reset); + case CHIP_ARCTURUS: + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_reset); - else - baco_reset = false; - if (baco_reset) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + baco_reset = amdgpu_dpm_is_baco_supported(adev); - if (hive || (ras && ras->supported)) - baco_reset = false; - } + /* + * 1. PMFW version > 0x284300: all cases use baco + * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco + */ + if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400) + baco_reset = false; break; default: - baco_reset = false; break; } @@ -585,13 +537,17 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static int soc15_asic_reset(struct amdgpu_device *adev) { + /* original raven doesn't have full asic reset */ + if (adev->pdev->device == 0x15dd && adev->rev_id < 0x8) + return 0; + switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: - return soc15_mode2_reset(adev); + return amdgpu_dpm_mode2_reset(adev); default: if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); @@ -599,6 +555,22 @@ static int soc15_asic_reset(struct amdgpu_device *adev) } } +static bool soc15_supports_baco(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_ARCTURUS: + return amdgpu_dpm_is_baco_supported(adev); + case CHIP_VEGA20: + if (adev->psp.sos_fw_version >= 0x80067) + return amdgpu_dpm_is_baco_supported(adev); + return false; + default: + return false; + } +} + /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, u32 cntl_reg, u32 status_reg) { @@ -709,9 +681,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) - adev->df_funcs = &df_v3_6_funcs; + adev->df.funcs = &df_v3_6_funcs; else - adev->df_funcs = &df_v1_7_funcs; + adev->df.funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); adev->nbio.funcs->detect_hw_virt(adev); @@ -746,11 +718,11 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) { - if (is_support_sw_smu(adev)) + if (is_support_sw_smu(adev)) { + if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -798,11 +770,16 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + } else { amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + } + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block); break; case CHIP_RENOIR: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); @@ -810,8 +787,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); - if (is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) @@ -821,6 +797,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; default: return -EINVAL; @@ -999,6 +976,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .supports_baco = &soc15_supports_baco, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -1007,6 +985,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .read_bios_from_rom = &soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, @@ -1019,7 +998,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .get_pcie_usage = &vega20_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &soc15_get_pcie_replay_count, - .reset_method = &soc15_asic_reset_method + .supports_baco = &soc15_supports_baco, }; static int soc15_common_early_init(void *handle) @@ -1145,9 +1124,7 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_VCN | - AMD_PG_SUPPORT_VCN_DPG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } else if (adev->pdev->device == 0x15d8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -1190,9 +1167,7 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_VCN | - AMD_PG_SUPPORT_VCN_DPG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } break; case CHIP_ARCTURUS: @@ -1208,7 +1183,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | - AMD_CG_SUPPORT_IH_CG; + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; @@ -1229,12 +1206,14 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_DF_MGCG; adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x91; break; @@ -1272,7 +1251,7 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df_funcs->sw_init(adev); + adev->df.funcs->sw_init(adev); return 0; } @@ -1282,7 +1261,7 @@ static int soc15_common_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_nbio_ras_fini(adev); - adev->df_funcs->sw_fini(adev); + adev->df.funcs->sw_fini(adev); return 0; } @@ -1492,38 +1471,38 @@ static int soc15_common_set_clockgating_state(void *handle, case CHIP_VEGA12: case CHIP_VEGA20: adev->nbio.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); - adev->df_funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); + adev->df.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); break; case CHIP_RAVEN: case CHIP_RENOIR: adev->nbio.funcs->update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; case CHIP_ARCTURUS: soc15_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -1561,7 +1540,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - adev->df_funcs->get_clockgating_state(adev, flags); + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 57af489a5de3..d0fb7a67c1a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -60,6 +60,18 @@ struct soc15_allowed_register_entry { bool grbm_indexed; }; +struct soc15_ras_field_entry { + const char *name; + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t sec_count_mask; + uint32_t sec_count_shift; + uint32_t ded_count_mask; + uint32_t ded_count_shift; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 839f186e1182..19e870c79896 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -52,6 +52,7 @@ uint32_t old_ = 0; \ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ uint32_t loop = adev->usec_timeout; \ + ret = 0; \ while ((tmp_ & (mask)) != (expected_value)) { \ if (old_ != tmp_) { \ loop = adev->usec_timeout; \ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 47c4b96b14d1..793bf70e64b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -28,19 +28,24 @@ #include "rsmu/rsmu_0_0_2_sh_mask.h" #include "umc/umc_6_1_1_offset.h" #include "umc/umc_6_1_1_sh_mask.h" +#include "umc/umc_6_1_2_offset.h" -#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 +#define UMC_6_INST_DIST 0x40000 /* * (addr / 256) * 8192, the higher 26 bits in ErrorAddr * is the index of 8KB block */ -#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) /* channel index is the index of 256B block */ #define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) /* offset in 256B block */ #define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) +#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++) +#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) +#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) + const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { {2, 18, 11, 27}, {4, 20, 13, 29}, @@ -49,24 +54,10 @@ const uint32_t {9, 25, 0, 16}, {15, 31, 6, 22} }; -static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, - uint32_t umc_instance) +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) { - uint32_t rsmu_umc_index; - - rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 1); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE, umc_instance); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_WREN, 1 << umc_instance); - WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - rsmu_umc_index); } static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) @@ -75,15 +66,23 @@ static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) RSMU_UMC_INDEX_MODE_EN, 0); } -static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev) +static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) { uint32_t rsmu_umc_index; rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + return REG_GET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE); + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN); +} + +static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; } static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, @@ -95,39 +94,50 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, uint64_t mc_umc_status; uint32_t mc_umc_status_addr; - ecc_err_cnt_sel_addr = - SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); - ecc_err_cnt_addr = - SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + } else { + /* UMC 6_1_1 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + } /* select the lower chip and check the error count */ - ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 0); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); - ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); /* clear the lower chip err count */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 1); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); - ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); /* clear the higher chip err count */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ - mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) @@ -141,11 +151,18 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev uint64_t mc_umc_status; uint32_t mc_umc_status_addr; - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + } else { + /* UMC 6_1_1 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + } /* check the MCUMC_STATUS */ - mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -155,49 +172,78 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } -static void umc_v6_1_query_error_count(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint32_t umc_reg_offset, - uint32_t channel_index) -{ - umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, - &(err_data->ce_count)); - umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, - &(err_data->ue_count)); -} - static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t channel_index) + uint32_t umc_reg_offset, + uint32_t ch_inst, + uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page; + uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; struct eeprom_table_record *err_rec; - - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); + } else { + /* UMC 6_1_1 registers */ + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); + } /* skip error address process if -ENOMEM */ if (!err_data->err_addr) { /* clear umc status */ - WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); return; } err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); @@ -218,57 +264,105 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; err_rec->cu = 0; err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev); + err_rec->mcumc_id = umc_inst; err_data->err_addr_cnt++; } } /* clear umc status */ - WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); } static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_error_address(adev, + err_data, + umc_reg_offset, + ch_inst, + umc_inst); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t channel_index) + uint32_t umc_reg_offset) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt_addr; - ecc_err_cnt_sel_addr = - SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); - ecc_err_cnt_addr = - SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); + } else { + /* UMC 6_1_1 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + } /* select the lower chip and check the error count */ - ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 0); /* set ce error interrupt type to APIC based interrupt */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrInt, 0x1); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); /* set error count to initial value */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 1); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); } static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) { - void *ras_error_status = NULL; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); + } - amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { @@ -276,6 +370,4 @@ const struct amdgpu_umc_funcs umc_v6_1_funcs = { .ras_late_init = amdgpu_umc_ras_late_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, - .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, - .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h index dab9cbd292c5..0ce1d323cfdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -35,7 +35,8 @@ /* total channel instances in one umc block */ #define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) /* UMC regiser per channel offset */ -#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 +#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20 0x800 +#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT 0x400 /* EccErrCnt max value */ #define UMC_V6_1_CE_CNT_MAX 0xffff diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 01e62fb8e6e0..0fa8aae2d78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -763,7 +763,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 217084d56ab8..e0aadcaf6c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1421,7 +1421,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 475ae68f38f5..217db187207c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -739,7 +739,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 683701cf7270..3fd102efb7af 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -887,7 +887,7 @@ static int vce_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); int i; if ((adev->asic_type == CHIP_POLARIS10) || diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index b4f84a820a44..1a24fadd30e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" +#include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" #include "soc15_common.h" @@ -36,21 +37,22 @@ #include "mmhub/mmhub_9_1_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" +#include "jpeg_v1_0.h" -#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x05ac -#define mmUVD_REG_XX_MASK_BASE_IDX 1 +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1 +#define mmUVD_REG_XX_MASK_1_0 0x05ac +#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); -static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); -static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); + +static void vcn_v1_0_idle_work_handler(struct work_struct *work); /** * vcn_v1_0_early_init - set function pointers @@ -68,9 +70,10 @@ static int vcn_v1_0_early_init(void *handle) vcn_v1_0_set_dec_ring_funcs(adev); vcn_v1_0_set_enc_ring_funcs(adev); - vcn_v1_0_set_jpeg_ring_funcs(adev); vcn_v1_0_set_irq_funcs(adev); + jpeg_v1_0_early_init(handle); + return 0; } @@ -101,15 +104,13 @@ static int vcn_v1_0_sw_init(void *handle) return r; } - /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq); - if (r) - return r; - r = amdgpu_vcn_sw_init(adev); if (r) return r; + /* Override the work func */ + adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; @@ -149,17 +150,11 @@ static int vcn_v1_0_sw_init(void *handle) return r; } - ring = &adev->vcn.inst->ring_jpeg; - sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); - if (r) - return r; - adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; - adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch = - SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); - return 0; + r = jpeg_v1_0_sw_init(handle); + + return r; } /** @@ -178,6 +173,8 @@ static int vcn_v1_0_sw_fini(void *handle) if (r) return r; + jpeg_v1_0_sw_fini(handle); + r = amdgpu_vcn_sw_fini(adev); return r; @@ -207,7 +204,7 @@ static int vcn_v1_0_hw_init(void *handle) goto done; } - ring = &adev->vcn.inst->ring_jpeg; + ring = &adev->jpeg.inst->ring_dec; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -838,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) vcn_v1_0_mc_resume_spg_mode(adev); - WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10); - WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, - RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3); + WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10); + WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0, + RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3); /* enable VCPU clock */ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); @@ -947,22 +944,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - ring = &adev->vcn.inst->ring_jpeg; - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - - /* initialize wptr */ - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - - /* copy patch commands to the jpeg ring */ - vcn_v1_0_jpeg_ring_set_patch_ring(ring, - (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + jpeg_v1_0_start(adev, 0); return 0; } @@ -1106,13 +1088,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - /* initialize JPEG wptr */ - ring = &adev->vcn.inst->ring_jpeg; - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - - /* copy patch commands to the jpeg ring */ - vcn_v1_0_jpeg_ring_set_patch_ring(ring, - (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + jpeg_v1_0_start(adev, 1); return 0; } @@ -1223,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) } static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { int ret_code; uint32_t reg_data = 0; @@ -1316,7 +1292,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.inst->ring_jpeg; + ring = &adev->jpeg.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1370,7 +1346,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ @@ -1716,389 +1692,6 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } - -/** - * vcn_v1_0_jpeg_ring_get_rptr - get read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware read pointer - */ -static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); -} - -/** - * vcn_v1_0_jpeg_ring_get_wptr - get write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware write pointer - */ -static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); -} - -/** - * vcn_v1_0_jpeg_ring_set_wptr - set write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the write pointer to the hardware - */ -static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); -} - -/** - * vcn_v1_0_jpeg_ring_insert_start - insert a start command - * - * @ring: amdgpu_ring pointer - * - * Write a start command to the ring. - */ -static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80010000); -} - -/** - * vcn_v1_0_jpeg_ring_insert_end - insert a end command - * - * @ring: amdgpu_ring pointer - * - * Write a end command to the ring. - */ -static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00010000); -} - -/** - * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command - * - * @ring: amdgpu_ring pointer - * @fence: fence to emit - * - * Write a fence and a trap command to the ring. - */ -static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) -{ - struct amdgpu_device *adev = ring->adev; - - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - amdgpu_ring_write(ring, - PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); - amdgpu_ring_write(ring, 0xffffffff); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x3fbc); - - amdgpu_ring_write(ring, - PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x1); - - /* emit trap */ - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); - amdgpu_ring_write(ring, 0); -} - -/** - * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer - * - * @ring: amdgpu_ring pointer - * @ib: indirect buffer to execute - * - * Write ring commands to execute the indirect buffer. - */ -static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmid = AMDGPU_JOB_GET_VMID(job); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, ib->length_dw); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, - PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x2); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); - amdgpu_ring_write(ring, 0x2); -} - -static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, val); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, - PACKETJ(0, 0, 0, PACKETJ_TYPE3)); - } - amdgpu_ring_write(ring, mask); -} - -static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) -{ - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t data0, data1, mask; - - pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - - /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; - data1 = lower_32_bits(pd_addr); - mask = 0xffffffff; - vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); -} - -static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, - PACKETJ(0, 0, 0, PACKETJ_TYPE0)); - } - amdgpu_ring_write(ring, val); -} - -static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) -{ - int i; - - WARN_ON(ring->wptr % 2 || count % 2); - - for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } -} - -static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) -{ - struct amdgpu_device *adev = ring->adev; - ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - ring->ring[(*ptr)++] = 0; - ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); - } else { - ring->ring[(*ptr)++] = reg_offset; - ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); - } - ring->ring[(*ptr)++] = val; -} - -static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) -{ - struct amdgpu_device *adev = ring->adev; - - uint32_t reg, reg_offset, val, mask, i; - - // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); - reg_offset = (reg << 2); - val = lower_32_bits(ring->gpu_addr); - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); - reg_offset = (reg << 2); - val = upper_32_bits(ring->gpu_addr); - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 3rd to 5th: issue MEM_READ commands - for (i = 0; i <= 2; i++) { - ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); - ring->ring[ptr++] = 0; - } - - // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); - reg_offset = (reg << 2); - val = 0x13; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 7th: program mmUVD_JRBC_RB_REF_DATA - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA); - reg_offset = (reg << 2); - val = 0x1; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); - reg_offset = (reg << 2); - val = 0x1; - mask = 0x1; - - ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); - ring->ring[ptr++] = 0x01400200; - ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); - ring->ring[ptr++] = val; - ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); - if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || - ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { - ring->ring[ptr++] = 0; - ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); - } else { - ring->ring[ptr++] = reg_offset; - ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); - } - ring->ring[ptr++] = mask; - - //9th to 21st: insert no-op - for (i = 0; i <= 12; i++) { - ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); - ring->ring[ptr++] = 0; - } - - //22nd: reset mmUVD_JRBC_RB_RPTR - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR); - reg_offset = (reg << 2); - val = 0; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); - - //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch - reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); - reg_offset = (reg << 2); - val = 0x12; - vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); -} - static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -2123,9 +1716,6 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, case 120: amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; - case 126: - amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -2174,6 +1764,86 @@ static int vcn_v1_0_set_powergating_state(void *handle, return ret; } +static void vcn_v1_0_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); + unsigned int fences = 0, i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + adev->vcn.pause_dpg_mode(adev, 0, &new_state); + } + + fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec); + + if (fences == 0) { + amdgpu_gfx_off_ctrl(adev, true); + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); + } else { + schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + } +} + +void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (set_clocks) { + amdgpu_gfx_off_ctrl(adev, false); + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + unsigned int fences = 0, i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]); + + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + + adev->vcn.pause_dpg_mode(adev, 0, &new_state); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -2220,7 +1890,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .insert_start = vcn_v1_0_dec_ring_insert_start, .insert_end = vcn_v1_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, + .begin_use = vcn_v1_0_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, @@ -2252,48 +1922,13 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v1_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, + .begin_use = vcn_v1_0_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_JPEG, - .align_mask = 0xf, - .nop = PACKET0(0x81ff, 0), - .support_64bit_ptrs = false, - .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, - .extra_dw = 64, - .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, - .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, - .set_wptr = vcn_v1_0_jpeg_ring_set_wptr, - .emit_frame_size = - 6 + 6 + /* hdp invalidate / flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */ - 26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */ - 6, - .emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */ - .emit_ib = vcn_v1_0_jpeg_ring_emit_ib, - .emit_fence = vcn_v1_0_jpeg_ring_emit_fence, - .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_jpeg_ring_test_ring, - .test_ib = amdgpu_vcn_jpeg_ring_test_ib, - .insert_nop = vcn_v1_0_jpeg_ring_nop, - .insert_start = vcn_v1_0_jpeg_ring_insert_start, - .insert_end = vcn_v1_0_jpeg_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg, - .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; @@ -2310,12 +1945,6 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) DRM_INFO("VCN encode is enabled in VM mode\n"); } -static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) -{ - adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; - DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); -} - static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { .set = vcn_v1_0_set_interrupt_state, .process = vcn_v1_0_process_interrupt, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h index 2a497a7a4840..f67d7391fc21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h @@ -24,6 +24,8 @@ #ifndef __VCN_V1_0_H__ #define __VCN_V1_0_H__ +void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring); + extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 38f787a560cb..4f7216788f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -47,39 +47,13 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 -#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff -#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 -#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a -#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b -#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea -#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb -#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf -#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 -#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 -#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 -#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 -#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec -#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed -#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 -#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 -#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 -#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f - -#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 - -#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x026c -#define mmUVD_REG_XX_MASK_BASE_IDX 1 - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); -static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); /** * vcn_v2_0_early_init - set function pointers @@ -97,7 +71,6 @@ static int vcn_v2_0_early_init(void *handle) vcn_v2_0_set_dec_ring_funcs(adev); vcn_v2_0_set_enc_ring_funcs(adev); - vcn_v2_0_set_jpeg_ring_funcs(adev); vcn_v2_0_set_irq_funcs(adev); return 0; @@ -132,12 +105,6 @@ static int vcn_v2_0_sw_init(void *handle) return r; } - /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq); - if (r) - return r; - r = amdgpu_vcn_sw_init(adev); if (r) return r; @@ -194,19 +161,8 @@ static int vcn_v2_0_sw_init(void *handle) return r; } - ring = &adev->vcn.inst->ring_jpeg; - ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); - if (r) - return r; - adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; - adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); - return 0; } @@ -258,11 +214,6 @@ static int vcn_v2_0_hw_init(void *handle) goto done; } - ring = &adev->vcn.inst->ring_jpeg; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; - done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", @@ -296,9 +247,6 @@ static int vcn_v2_0_hw_fini(void *handle) ring->sched.ready = false; } - ring = &adev->vcn.inst->ring_jpeg; - ring->sched.ready = false; - return 0; } @@ -393,7 +341,6 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); } static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) @@ -404,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -631,146 +578,23 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } /** - * jpeg_v2_0_start - start JPEG block - * - * @adev: amdgpu_device pointer - * - * Setup and start the JPEG block - */ -static int jpeg_v2_0_start(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg; - uint32_t tmp; - int r = 0; - - /* disable power gating */ - tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); - - SOC15_WAIT_ON_RREG(VCN, 0, - mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); - - if (r) { - DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); - return r; - } - - /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */ - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); - - /* JPEG disable CGC */ - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); - tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK - | JPEG_CGC_GATE__JPEG2_DEC_MASK - | JPEG_CGC_GATE__JPEG_ENC_MASK - | JPEG_CGC_GATE__JMCIF_MASK - | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - - return 0; -} - -/** - * jpeg_v2_0_stop - stop JPEG block - * - * @adev: amdgpu_device pointer - * - * stop the JPEG block - */ -static int jpeg_v2_0_stop(struct amdgpu_device *adev) -{ - uint32_t tmp; - int r = 0; - - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable JPEG CGC */ - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); - - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); - tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK - |JPEG_CGC_GATE__JPEG2_DEC_MASK - |JPEG_CGC_GATE__JPEG_ENC_MASK - |JPEG_CGC_GATE__JMCIF_MASK - |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); - - /* enable power gating */ - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)); - tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; - tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); - - tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); - - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, - (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); - - if (r) { - DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); - return r; - } - - return r; -} - -/** * vcn_v2_0_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer @@ -930,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -939,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -955,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -984,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) vcn_v2_0_mc_resume_dpg_mode(adev, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* release VCPU reset to boot */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL2), 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - - (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -1052,12 +876,8 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); - if (r) - return r; - goto jpeg; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); vcn_v2_0_disable_static_power_gating(adev); @@ -1209,10 +1029,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); -jpeg: - r = jpeg_v2_0_start(adev); - - return r; + return 0; } static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) @@ -1231,9 +1048,6 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); - tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); - tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); @@ -1252,10 +1066,6 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) uint32_t tmp; int r; - r = jpeg_v2_0_stop(adev); - if (r) - return r; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v2_0_stop_dpg_mode(adev); if (r) @@ -1320,7 +1130,7 @@ power_off: } static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { struct amdgpu_ring *ring; uint32_t reg_data = 0; @@ -1403,7 +1213,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ @@ -1781,272 +1591,6 @@ void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_ amdgpu_ring_write(ring, val); } -/** - * vcn_v2_0_jpeg_ring_get_rptr - get read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware read pointer - */ -static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); -} - -/** - * vcn_v2_0_jpeg_ring_get_wptr - get write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware write pointer - */ -static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) - return adev->wb.wb[ring->wptr_offs]; - else - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); -} - -/** - * vcn_v2_0_jpeg_ring_set_wptr - set write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the write pointer to the hardware - */ -static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) { - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); - } -} - -/** - * vcn_v2_0_jpeg_ring_insert_start - insert a start command - * - * @ring: amdgpu_ring pointer - * - * Write a start command to the ring. - */ -void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80010000); -} - -/** - * vcn_v2_0_jpeg_ring_insert_end - insert a end command - * - * @ring: amdgpu_ring pointer - * - * Write a end command to the ring. - */ -void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x68e04); - - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00010000); -} - -/** - * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command - * - * @ring: amdgpu_ring pointer - * @fence: fence to emit - * - * Write a fence and a trap command to the ring. - */ -void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) -{ - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, seq); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x3fbc); - - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x1); - - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); - amdgpu_ring_write(ring, 0); -} - -/** - * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer - * - * @ring: amdgpu_ring pointer - * @ib: indirect buffer to execute - * - * Write ring commands to execute the indirect buffer. - */ -void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) -{ - unsigned vmid = AMDGPU_JOB_GET_VMID(job); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, ib->length_dw); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); - - amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); - amdgpu_ring_write(ring, 0); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x2); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); - amdgpu_ring_write(ring, 0x2); -} - -void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask) -{ - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x01400200); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, val); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE3)); - } - amdgpu_ring_write(ring, mask); -} - -void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) -{ - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t data0, data1, mask; - - pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); - - /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; - data1 = lower_32_bits(pd_addr); - mask = 0xffffffff; - vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); -} - -void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) -{ - uint32_t reg_offset = (reg << 2); - - amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, - PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); - } else { - amdgpu_ring_write(ring, reg_offset); - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - } - amdgpu_ring_write(ring, val); -} - -void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) -{ - int i; - - WARN_ON(ring->wptr % 2 || count % 2); - - for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } -} - static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -2071,9 +1615,6 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; - case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -2083,7 +1624,7 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; @@ -2219,36 +1760,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_JPEG, - .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, - .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, - .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ - 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ - 8 + 16, - .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ - .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, - .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_jpeg_ring_test_ring, - .test_ib = amdgpu_vcn_jpeg_ring_test_ib, - .insert_nop = vcn_v2_0_jpeg_ring_nop, - .insert_start = vcn_v2_0_jpeg_ring_insert_start, - .insert_end = vcn_v2_0_jpeg_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; @@ -2265,12 +1776,6 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) DRM_INFO("VCN encode is enabled in VM mode\n"); } -static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) -{ - adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; - DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); -} - static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { .set = vcn_v2_0_set_interrupt_state, .process = vcn_v2_0_process_interrupt, @@ -2278,7 +1783,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h index 8467292f32e5..6c9de1882428 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -37,6 +37,7 @@ extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +extern int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring); extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, @@ -49,19 +50,6 @@ extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr); extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); -extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring); -extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring); -extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags); -extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, - struct amdgpu_ib *ib, uint32_t flags); -extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask); -extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr); -extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); -extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count); - extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; #endif /* __VCN_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 93edf9193a7b..70fae7977f8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -29,6 +29,7 @@ #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" +#include "mmsch_v1_0.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -47,16 +48,16 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c -#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f - -#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 +#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); -static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -91,11 +92,16 @@ static int vcn_v2_5_early_init(void *handle) } else adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 2; + if (amdgpu_sriov_vf(adev)) { + adev->vcn.num_vcn_inst = 2; + adev->vcn.harvest_config = 0; + adev->vcn.num_enc_rings = 1; + } else { + adev->vcn.num_enc_rings = 2; + } vcn_v2_5_set_dec_ring_funcs(adev); vcn_v2_5_set_enc_ring_funcs(adev); - vcn_v2_5_set_jpeg_ring_funcs(adev); vcn_v2_5_set_irq_funcs(adev); return 0; @@ -130,12 +136,6 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; } - - /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], - VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); - if (r) - return r; } r = amdgpu_vcn_sw_init(adev); @@ -184,12 +184,11 @@ static int vcn_v2_5_sw_init(void *handle) adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); - adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); - ring = &adev->vcn.inst[j].ring_dec; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; + + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + (amdgpu_sriov_vf(adev) ? 2*j : 8*j); sprintf(ring->name, "vcn_dec_%d", j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); if (r) @@ -198,22 +197,26 @@ static int vcn_v2_5_sw_init(void *handle) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst[j].ring_enc[i]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; + + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); + sprintf(ring->name, "vcn_enc_%d.%d", j, i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); if (r) return r; } + } - ring = &adev->vcn.inst[j].ring_jpeg; - ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; - sprintf(ring->name, "vcn_jpeg_%d", j); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + return 0; } @@ -229,6 +232,9 @@ static int vcn_v2_5_sw_fini(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -249,35 +255,44 @@ static int vcn_v2_5_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j, r; + int i, j, r = 0; + + if (amdgpu_sriov_vf(adev)) + r = vcn_v2_5_sriov_start(adev); for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; - ring = &adev->vcn.inst[j].ring_dec; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index, j); + if (amdgpu_sriov_vf(adev)) { + adev->vcn.inst[j].ring_enc[0].sched.ready = true; + adev->vcn.inst[j].ring_enc[1].sched.ready = false; + adev->vcn.inst[j].ring_enc[2].sched.ready = false; + adev->vcn.inst[j].ring_dec.sched.ready = true; + } else { - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + ring = &adev->vcn.inst[j].ring_dec; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, j); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[j].ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) goto done; - } - ring = &adev->vcn.inst[j].ring_jpeg; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + } } + done: if (!r) - DRM_INFO("VCN decode and encode initialized successfully.\n"); + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -300,7 +315,9 @@ static int vcn_v2_5_hw_fini(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); ring->sched.ready = false; @@ -309,9 +326,6 @@ static int vcn_v2_5_hw_fini(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; ring->sched.ready = false; } - - ring = &adev->vcn.inst[i].ring_jpeg; - ring->sched.ready = false; } return 0; @@ -378,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { @@ -412,6 +426,99 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) } } +static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + /** * vcn_v2_5_disable_clock_gating - disable VCN clock gating * @@ -530,6 +637,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) } } +static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + /** * vcn_v2_5_enable_clock_gating - enable VCN clock gating * @@ -592,111 +747,134 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) } } -/** - * jpeg_v2_5_start - start JPEG block - * - * @adev: amdgpu_device pointer - * - * Setup and start the JPEG block - */ -static int jpeg_v2_5_start(struct amdgpu_device *adev) +static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { struct amdgpu_ring *ring; - uint32_t tmp; - int i; - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - ring = &adev->vcn.inst[i].ring_jpeg; - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); - tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); - - tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); - tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK - | JPEG_CGC_GATE__JPEG2_DEC_MASK - | JPEG_CGC_GATE__JMCIF_MASK - | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); - - tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); - tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK - | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK - | JPEG_CGC_CTRL__JMCIF_MODE_MASK - | JPEG_CGC_CTRL__JRBBM_MODE_MASK); - WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); - - /* MJPEG global tiling registers */ - WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); - - WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); - } - - return 0; -} - -/** - * jpeg_v2_5_stop - stop JPEG block - * - * @adev: amdgpu_device pointer - * - * stop the JPEG block - */ -static int jpeg_v2_5_stop(struct amdgpu_device *adev) -{ - uint32_t tmp; - int i; + uint32_t rb_bufsz, tmp; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); - tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK - |JPEG_CGC_GATE__JPEG2_DEC_MASK - |JPEG_CGC_GATE__JMCIF_MASK - |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); - - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - } + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); + + /* unblock VCPU register access */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + + ring = &adev->vcn.inst[inst_idx].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); return 0; } @@ -713,6 +891,11 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -722,6 +905,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return 0; + /*SW clock gating */ vcn_v2_5_disable_clock_gating(adev); @@ -874,23 +1060,250 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); } - r = jpeg_v2_5_start(adev); - return r; + return 0; } -static int vcn_v2_5_stop(struct amdgpu_device *adev) +static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev, + struct amdgpu_mm_table *table) +{ + uint32_t data = 0, loop = 0, size = 0; + uint64_t addr = table->gpu_addr; + struct mmsch_v1_1_init_header *header = NULL;; + + header = (struct mmsch_v1_1_init_header *)table->cpu_addr; + size = header->total_size; + + /* + * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of + * memory descriptor location + */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); + WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); + + /* 2, update vmid of descriptor */ + data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID); + data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data); + + /* 3, notify mmsch about the size of this descriptor */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0); + + /* + * 5, kick off the initialization and wait until + * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001); + + data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP); + loop = 10; + while ((data & 0x10000002) != 0x10000002) { + udelay(100); + data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP); + loop--; + if (!loop) + break; + } + + if (!loop) { + dev_err(adev->dev, + "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n", + data); + return -EBUSY; + } + + return 0; +} + +static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t offset, size, tmp, i, rb_bufsz; + uint32_t table_size = 0; + struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; + struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; + struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; + struct mmsch_v1_0_cmd_end end = { { 0 } }; + uint32_t *init_table = adev->virt.mm_table.cpu_addr; + struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table; + + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + header->version = MMSCH_VERSION; + header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2; + init_table += header->total_size; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + header->eng[i].table_offset = header->total_size; + header->eng[i].init_status = 0; + header->eng[i].table_size = 0; + + table_size = 0; + + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + /* mc resume*/ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + offset = 0; + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0); + } else { + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), + size); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), + upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), + ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(ring->gpu_addr)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + MMSCH_V1_0_INSERT_DIRECT_WT( + SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); + + /* add end packet */ + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; + init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4; + + /* refine header */ + header->eng[i].table_size = table_size; + header->total_size += table_size; + } + + return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table); +} + +static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + int ret_code = 0; uint32_t tmp; - int i, r; - r = jpeg_v2_5_stop(adev); - if (r) - return r; + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_stop_dpg_mode(adev, i); + continue; + } + /* wait for vcn idle */ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); if (r) @@ -946,6 +1359,67 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) return 0; } +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, + 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + /** * vcn_v2_5_dec_ring_get_rptr - get read pointer * @@ -988,6 +1462,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1013,7 +1491,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .emit_ib = vcn_v2_0_dec_ring_emit_ib, .emit_fence = vcn_v2_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ring = vcn_v2_0_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v2_0_dec_ring_insert_nop, .insert_start = vcn_v2_0_dec_ring_insert_start, @@ -1125,86 +1603,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -/** - * vcn_v2_5_jpeg_ring_get_rptr - get read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware read pointer - */ -static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); -} - -/** - * vcn_v2_5_jpeg_ring_get_wptr - get write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware write pointer - */ -static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) - return adev->wb.wb[ring->wptr_offs]; - else - return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); -} - -/** - * vcn_v2_5_jpeg_ring_set_wptr - set write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the write pointer to the hardware - */ -static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) { - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); - } -} - -static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_JPEG, - .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_1, - .get_rptr = vcn_v2_5_jpeg_ring_get_rptr, - .get_wptr = vcn_v2_5_jpeg_ring_get_wptr, - .set_wptr = vcn_v2_5_jpeg_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ - 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ - 8 + 16, - .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ - .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, - .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_jpeg_ring_test_ring, - .test_ib = amdgpu_vcn_jpeg_ring_test_ib, - .insert_nop = vcn_v2_0_jpeg_ring_nop, - .insert_start = vcn_v2_0_jpeg_ring_insert_start, - .insert_end = vcn_v2_0_jpeg_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1233,19 +1631,6 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) } } -static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; - adev->vcn.inst[i].ring_jpeg.me = i; - DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); - } -} - static bool vcn_v2_5_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1281,7 +1666,10 @@ static int vcn_v2_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = (state == AMD_CG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; if (enable) { if (vcn_v2_5_is_idle(handle)) @@ -1300,6 +1688,9 @@ static int vcn_v2_5_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + if (amdgpu_sriov_vf(adev)) + return 0; + if(state == adev->vcn.cur_state) return 0; @@ -1352,9 +1743,6 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); break; - case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -1376,7 +1764,7 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 5cb7e231de5f..407c6093c2ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -234,16 +234,9 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); - } else { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1); - } ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); @@ -253,10 +246,19 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); } - if ((adev->asic_type == CHIP_ARCTURUS - && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) - || adev->asic_type == CHIP_RENOIR) + if ((adev->asic_type == CHIP_ARCTURUS && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || + adev->asic_type == CHIP_RENOIR) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } else { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_FBPA_ENABLE, 1); + } WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + } /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, @@ -715,7 +717,7 @@ static int vega10_ih_set_clockgating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; vega10_ih_update_clockgating_state(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index f1b171e30774..78b35901643b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -689,40 +689,6 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); -} - -int smu7_asic_baco_reset(struct amdgpu_device *adev) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - - dev_info(adev->dev, "GPU BACO reset\n"); - - return 0; -} - /** * vi_asic_pci_config_reset - soft reset GPU * @@ -745,6 +711,21 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) return r; } +static bool vi_asic_supports_baco(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + return amdgpu_dpm_is_baco_supported(adev); + default: + return false; + } +} + static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { @@ -757,7 +738,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: baco_reset = false; @@ -786,7 +767,7 @@ static int vi_asic_reset(struct amdgpu_device *adev) if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = vi_asic_pci_config_reset(adev); } @@ -1119,6 +1100,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_pcie_usage = &vi_get_pcie_usage, .need_reset_on_init = &vi_need_reset_on_init, .get_pcie_replay_count = &vi_get_pcie_replay_count, + .supports_baco = &vi_asic_supports_baco, }; #define CZ_REV_BRISTOL(rev) \ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 40d4174913a4..defb4aaf929a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,7 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif |