diff options
Diffstat (limited to 'drivers/gpu/drm/amd')
292 files changed, 7004 insertions, 3757 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 7777d55275de..5fcd510f1abb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -1,4 +1,33 @@ # SPDX-License-Identifier: MIT + +config DRM_AMDGPU + tristate "AMD GPU" + depends on DRM && PCI && MMU + select FW_LOADER + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER + select DRM_KMS_HELPER + select DRM_SCHED + select DRM_TTM + select DRM_TTM_HELPER + select POWER_SUPPLY + select HWMON + select BACKLIGHT_CLASS_DEVICE + select INTERVAL_TREE + select DRM_BUDDY + # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work + # ACPI_VIDEO's dependencies must also be selected. + select INPUT if ACPI + select ACPI_VIDEO if ACPI + # On x86 ACPI_VIDEO also needs ACPI_WMI + select X86_PLATFORM_DEVICES if ACPI && X86 + select ACPI_WMI if ACPI && X86 + help + Choose this option if you have a recent AMD Radeon graphics card. + + If M is selected, the module will be called amdgpu. + config DRM_AMDGPU_SI bool "Enable amdgpu support for SI parts" depends on DRM_AMDGPU diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6ad39cf71bdd..798d0e9a60b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ - amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o + amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ + amdgpu_ring_mux.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -250,7 +251,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o include $(FULL_AMD_PATH)/pm/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2eca58220550..6b74df446694 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -82,7 +82,6 @@ #include "amdgpu_vce.h" #include "amdgpu_vcn.h" #include "amdgpu_jpeg.h" -#include "amdgpu_mn.h" #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" @@ -219,10 +218,12 @@ extern int amdgpu_use_xgmi_p2p; extern int sched_policy; extern bool debug_evictions; extern bool no_system_mem_limit; +extern int halt_if_hws_hang; #else static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; +static const int __maybe_unused halt_if_hws_hang; #endif #ifdef CONFIG_HSA_AMD_P2P extern bool pcie_p2p; @@ -675,7 +676,7 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 11 +#define HWIP_MAX_INSTANCE 28 #define HW_ID_MAX 300 #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) @@ -1063,6 +1064,7 @@ struct amdgpu_device { struct work_struct reset_work; bool job_hang; + bool dc_enabled; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1120,6 +1122,8 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); +void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev); + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b14800ac179e..57b5e11446c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -847,7 +847,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; if (atif->notifications.brightness_change) { - if (amdgpu_device_has_dc_support(adev)) { + if (adev->dc_enabled) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5d9a34601a1a..f99d4873bf22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -195,7 +195,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) } adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, - adev_to_drm(adev), &gpu_resources); + &gpu_resources); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -673,7 +673,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err; } - ret = amdgpu_job_alloc(adev, 1, &job, NULL); + ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job); if (ret) goto err; @@ -760,9 +760,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) { - struct ras_err_data err_data = {0, 0, 0, NULL}; - - amdgpu_umc_poison_handler(adev, &err_data, reset); + amdgpu_umc_poison_handler(adev, reset); } bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 647220a8762d..f50e3ba4d7a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -353,7 +353,6 @@ int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); @@ -381,7 +380,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) } static inline -bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, +bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 81e3b528bbc9..e92b93557c13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -787,7 +787,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, for (se_idx = 0; se_idx < se_cnt; se_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { - gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); /* @@ -820,7 +820,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); soc15_grbm_select(adev, 0, 0, 0, 0); unlock_spi_csq_mutexes(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1f76e27f1a35..8782916e64a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -29,6 +29,7 @@ #include "amdgpu_object.h" #include "amdgpu_gem.h" #include "amdgpu_vm.h" +#include "amdgpu_hmm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> @@ -403,63 +404,15 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) { - struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; - bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED; - uint32_t mapping_flags; - uint64_t pte_flags; - bool snoop = false; + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_MTYPE_DEFAULT; - mapping_flags = AMDGPU_VM_PAGE_READABLE; if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - switch (adev->asic_type) { - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) { - if (uncached) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else if (coherent) - mapping_flags |= AMDGPU_VM_MTYPE_CC; - else - mapping_flags |= AMDGPU_VM_MTYPE_RW; - if (adev->asic_type == CHIP_ALDEBARAN && - adev->gmc.xgmi.connected_to_cpu) - snoop = true; - } else { - if (uncached || coherent) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - if (amdgpu_xgmi_same_hive(adev, bo_adev)) - snoop = true; - } - } else { - if (uncached || coherent) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - snoop = true; - } - break; - default: - if (uncached || coherent) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - - if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - } - - pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); - pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - - return pte_flags; + return amdgpu_gem_va_map_flags(adev, mapping_flags); } /** @@ -997,7 +950,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, goto out; } - ret = amdgpu_mn_register(bo, user_addr); + ret = amdgpu_hmm_register(bo, user_addr); if (ret) { pr_err("%s: Failed to register MMU notifier: %d\n", __func__, ret); @@ -1037,7 +990,7 @@ release_out: amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); unregister_out: if (ret) - amdgpu_mn_unregister(bo); + amdgpu_hmm_unregister(bo); out: mutex_unlock(&process_info->lock); return ret; @@ -1672,6 +1625,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } } + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) + alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); if (!*mem) { ret = -ENOMEM; @@ -1816,7 +1774,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_unlock(&process_info->lock); /* No more MMU notifiers */ - amdgpu_mn_unregister(mem->bo); + amdgpu_hmm_unregister(mem->bo); ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) @@ -1906,16 +1864,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( */ mutex_lock(&mem->process_info->lock); - /* Lock mmap-sem. If we find an invalid userptr BO, we can be - * sure that the MMU notifier is no longer running - * concurrently and the queues are actually stopped - */ - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - mmap_write_lock(current->mm); - is_invalid_userptr = atomic_read(&mem->invalid); - mmap_write_unlock(current->mm); - } - mutex_lock(&mem->lock); domain = mem->domain; @@ -2256,7 +2204,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, ret = drm_vma_node_allow(&obj->vma_node, drm_priv); if (ret) { - kfree(mem); + kfree(*mem); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index b81b77a9efa6..ac6fe0ae4609 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -101,39 +101,101 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) } } +static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, + struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes) +{ + u32 start_addr, fw_size, drv_size; + + start_addr = le32_to_cpu(fw_usage->start_address_in_kb); + fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); + drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb); + + DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n", + start_addr, + fw_size, + drv_size); + + if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == + (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->mman.fw_vram_usage_start_offset = (start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->mman.fw_vram_usage_size = fw_size << 10; + /* Use the default scratch size */ + *usage_bytes = 0; + } else { + *usage_bytes = drv_size << 10; + } + return 0; +} + +static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev, + struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes) +{ + u32 fw_start_addr, fw_size, drv_start_addr, drv_size; + + fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb); + fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); + + drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb); + drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb); + + DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n", + fw_start_addr, + fw_size, + drv_start_addr, + drv_size); + + if (amdgpu_sriov_vf(adev) && + ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->mman.fw_vram_usage_start_offset = (fw_start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->mman.fw_vram_usage_size = fw_size << 10; + } + + if (amdgpu_sriov_vf(adev) && + ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { + /* driver request VRAM reservation for SR-IOV */ + adev->mman.drv_vram_usage_start_offset = (drv_start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->mman.drv_vram_usage_size = drv_size << 10; + } + + *usage_bytes = 0; + return 0; +} + int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_usagebyfirmware); - struct vram_usagebyfirmware_v2_1 *firmware_usage; - uint32_t start_addr, size; - uint16_t data_offset; + struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1; + struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2; + u16 data_offset; + u8 frev, crev; int usage_bytes = 0; - if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { - firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); - DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", - le32_to_cpu(firmware_usage->start_address_in_kb), - le16_to_cpu(firmware_usage->used_by_firmware_in_kb), - le16_to_cpu(firmware_usage->used_by_driver_in_kb)); - - start_addr = le32_to_cpu(firmware_usage->start_address_in_kb); - size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb); - - if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == - (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << - ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { - /* Firmware request VRAM reservation for SR-IOV */ - adev->mman.fw_vram_usage_start_offset = (start_addr & - (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; - adev->mman.fw_vram_usage_size = size << 10; - /* Use the default scratch size */ - usage_bytes = 0; - } else { - usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10; + if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) { + if (frev == 2 && crev == 1) { + fw_usage_v2_1 = + (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); + amdgpu_atomfirmware_allocate_fb_v2_1(adev, + fw_usage_v2_1, + &usage_bytes); + } else if (frev >= 2 && crev >= 2) { + fw_usage_v2_2 = + (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset); + amdgpu_atomfirmware_allocate_fb_v2_2(adev, + fw_usage_v2_2, + &usage_bytes); } } + ctx->scratch_size_bytes = 0; if (usage_bytes == 0) usage_bytes = 20 * 1024; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index e363f56c72af..30c28a69e847 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) if (!found) return false; + pci_dev_put(pdev); adev->bios = kmalloc(size, GFP_KERNEL); if (!adev->bios) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cfb262911bfc..2ebbc6382a06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -26,7 +26,6 @@ #include <drm/display/drm_dp_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 365e3fb6a9e5..8516c814bc9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -294,12 +294,8 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } for (i = 0; i < p->gang_size; ++i) { - ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm); - if (ret) - goto free_all_kdata; - - ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i], - &fpriv->vm); + ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, + num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; } @@ -433,7 +429,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->gang_leader->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence); dma_fence_put(fence); if (r) return r; @@ -455,9 +451,20 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->gang_leader->sync, fence); - dma_fence_put(fence); + r = amdgpu_sync_fence(&p->sync, fence); + if (r) + goto error; + /* + * When we have an explicit dependency it might be necessary to insert a + * pipeline sync to make sure that all caches etc are flushed and the + * next job actually sees the results from the previous one. + */ + if (fence->context == p->gang_leader->base.entity->fence_context) + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + +error: + dma_fence_put(fence); return r; } @@ -1106,7 +1113,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); if (r) return r; @@ -1117,7 +1124,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); if (r) return r; } @@ -1136,7 +1143,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); if (r) return r; } @@ -1149,7 +1156,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&job->sync, vm->last_update); + r = amdgpu_sync_fence(&p->sync, vm->last_update); if (r) return r; @@ -1181,7 +1188,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_job *leader = p->gang_leader; struct amdgpu_bo_list_entry *e; unsigned int i; int r; @@ -1193,17 +1199,14 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) sync_mode = amdgpu_bo_explicit_sync(bo) ? AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; - r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode, + r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, &fpriv->vm); if (r) return r; } for (i = 0; i < p->gang_size; ++i) { - if (p->jobs[i] == leader) - continue; - - r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); + r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); if (r) return r; } @@ -1251,7 +1254,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, continue; fence = &p->jobs[i]->base.s_fence->scheduled; - r = amdgpu_sync_fence(&leader->sync, fence); + r = drm_sched_job_add_dependency(&leader->base, fence); if (r) goto error_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index f80adf9069ec..113f39510a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -76,6 +76,8 @@ struct amdgpu_cs_parser { unsigned num_post_deps; struct amdgpu_cs_post_dep *post_deps; + + struct amdgpu_sync sync; }; int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index de61a85c4b02..0f16d3c09309 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1969,7 +1969,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_ta_if_debugfs_init(adev); #if defined(CONFIG_DRM_AMD_DC) - if (amdgpu_device_has_dc_support(adev)) + if (adev->dc_enabled) dtn_debugfs_init(adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f1e9663b4051..cfa411c12072 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -37,6 +37,7 @@ #include <linux/pci-p2pdma.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include <linux/vgaarb.h> @@ -1568,7 +1569,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) * @pdev: pci dev pointer * @state: vga_switcheroo state * - * Callback for the switcheroo driver. Suspends or resumes the + * Callback for the switcheroo driver. Suspends or resumes * the asics before or after it is powered up using ACPI methods. */ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, @@ -1915,6 +1916,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) } } +void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { + adev->mode_info.num_crtc = 1; + adev->enable_virtual_display = true; + DRM_INFO("virtual_display:%d, num_crtc:%d\n", + adev->enable_virtual_display, adev->mode_info.num_crtc); + } +} + /** * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware * @@ -2397,7 +2408,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp) { r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_CSA_SIZE); @@ -2462,6 +2473,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (WARN_ON(!hive)) { + r = -ENOENT; + goto init_failed; + } + if (!hive->reset_domain || !amdgpu_reset_get_reset_domain(hive->reset_domain)) { r = -ENOENT; @@ -3347,8 +3363,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) */ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev) || - adev->enable_virtual_display || + if (adev->enable_virtual_display || (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) return false; @@ -4171,21 +4186,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) r = amdgpu_device_ip_resume(adev); - /* no matter what r is, always need to properly release full GPU */ - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_data_exchange(adev); - amdgpu_virt_release_full_gpu(adev, true); - } - if (r) { dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); - return r; + goto exit; } amdgpu_fence_driver_hw_init(adev); r = amdgpu_device_ip_late_init(adev); if (r) - return r; + goto exit; queue_delayed_work(system_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); @@ -4193,9 +4202,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) - return r; + goto exit; } +exit: + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_data_exchange(adev); + amdgpu_virt_release_full_gpu(adev, true); + } + + if (r) + return r; + /* Make sure IB tests flushed */ flush_delayed_work(&adev->delayed_init_work); @@ -4213,25 +4231,27 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) amdgpu_ras_resume(adev); - /* - * Most of the connector probing functions try to acquire runtime pm - * refs to ensure that the GPU is powered on when connector polling is - * performed. Since we're calling this from a runtime PM callback, - * trying to acquire rpm refs will cause us to deadlock. - * - * Since we're guaranteed to be holding the rpm lock, it's safe to - * temporarily disable the rpm helpers so this doesn't deadlock us. - */ + if (adev->mode_info.num_crtc) { + /* + * Most of the connector probing functions try to acquire runtime pm + * refs to ensure that the GPU is powered on when connector polling is + * performed. Since we're calling this from a runtime PM callback, + * trying to acquire rpm refs will cause us to deadlock. + * + * Since we're guaranteed to be holding the rpm lock, it's safe to + * temporarily disable the rpm helpers so this doesn't deadlock us. + */ #ifdef CONFIG_PM - dev->dev->power.disable_depth++; + dev->dev->power.disable_depth++; #endif - if (!amdgpu_device_has_dc_support(adev)) - drm_helper_hpd_irq_event(dev); - else - drm_kms_helper_hotplug_event(dev); + if (!adev->dc_enabled) + drm_helper_hpd_irq_event(dev); + else + drm_kms_helper_hotplug_event(dev); #ifdef CONFIG_PM - dev->dev->power.disable_depth--; + dev->dev->power.disable_depth--; #endif + } adev->in_suspend = false; if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) @@ -4580,6 +4600,10 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) if (amdgpu_gpu_recovery == 0) goto disabled; + /* Skip soft reset check in fatal error mode */ + if (!amdgpu_ras_is_poison_mode_supported(adev)) + return true; + if (!amdgpu_device_ip_check_soft_reset(adev)) { dev_info(adev->dev,"Timeout, but no hardware hang detected.\n"); return false; @@ -5027,6 +5051,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) pm_runtime_enable(&(p->dev)); pm_runtime_resume(&(p->dev)); } + + pci_dev_put(p); } static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) @@ -5065,6 +5091,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) if (expires < ktime_get_mono_fast_ns()) { dev_warn(adev->dev, "failed to suspend display audio\n"); + pci_dev_put(p); /* TODO: abort the succeeding gpu reset? */ return -ETIMEDOUT; } @@ -5072,97 +5099,10 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) pm_runtime_disable(&(p->dev)); + pci_dev_put(p); return 0; } -static void amdgpu_device_recheck_guilty_jobs( - struct amdgpu_device *adev, struct list_head *device_list_handle, - struct amdgpu_reset_context *reset_context) -{ - int i, r = 0; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - int ret = 0; - struct drm_sched_job *s_job; - - if (!ring || !ring->sched.thread) - continue; - - s_job = list_first_entry_or_null(&ring->sched.pending_list, - struct drm_sched_job, list); - if (s_job == NULL) - continue; - - /* clear job's guilty and depend the folowing step to decide the real one */ - drm_sched_reset_karma(s_job); - drm_sched_resubmit_jobs_ext(&ring->sched, 1); - - if (!s_job->s_fence->parent) { - DRM_WARN("Failed to get a HW fence for job!"); - continue; - } - - ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); - if (ret == 0) { /* timeout */ - DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", - ring->sched.name, s_job->id); - - - amdgpu_fence_driver_isr_toggle(adev, true); - - /* Clear this failed job from fence array */ - amdgpu_fence_driver_clear_job_fences(ring); - - amdgpu_fence_driver_isr_toggle(adev, false); - - /* Since the job won't signal and we go for - * another resubmit drop this parent pointer - */ - dma_fence_put(s_job->s_fence->parent); - s_job->s_fence->parent = NULL; - - /* set guilty */ - drm_sched_increase_karma(s_job); - amdgpu_reset_prepare_hwcontext(adev, reset_context); -retry: - /* do hw reset */ - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_fini_data_exchange(adev); - r = amdgpu_device_reset_sriov(adev, false); - if (r) - adev->asic_reset_res = r; - } else { - clear_bit(AMDGPU_SKIP_HW_RESET, - &reset_context->flags); - r = amdgpu_do_asic_reset(device_list_handle, - reset_context); - if (r && r == -EAGAIN) - goto retry; - } - - /* - * add reset counter so that the following - * resubmitted job could flush vmid - */ - atomic_inc(&adev->gpu_reset_counter); - continue; - } - - /* got the hw fence, signal finished fence */ - atomic_dec(ring->sched.score); - dma_fence_get(&s_job->s_fence->finished); - dma_fence_signal(&s_job->s_fence->finished); - dma_fence_put(&s_job->s_fence->finished); - - /* remove node from list and free the job */ - spin_lock(&ring->sched.job_list_lock); - list_del_init(&s_job->list); - spin_unlock(&ring->sched.job_list_lock); - ring->sched.ops->free_job(s_job); - } -} - static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -5183,7 +5123,6 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) } - /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -5206,7 +5145,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int i, r = 0; bool need_emergency_restart = false; bool audio_suspended = false; - int tmp_vram_lost_counter; bool gpu_reset_for_dev_remove = false; gpu_reset_for_dev_remove = @@ -5352,7 +5290,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_device_stop_pending_resets(tmp_adev); } - tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter)); /* Actual ASIC resets if needed.*/ /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { @@ -5377,29 +5314,13 @@ skip_hw_reset: /* Post ASIC reset for all devs .*/ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* - * Sometimes a later bad compute job can block a good gfx job as gfx - * and compute ring share internal GC HW mutually. We add an additional - * guilty jobs recheck step to find the real guilty job, it synchronously - * submits and pends for the first job being signaled. If it gets timeout, - * we identify it as a real guilty job. - */ - if (amdgpu_gpu_recovery == 2 && - !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter))) - amdgpu_device_recheck_guilty_jobs( - tmp_adev, device_list_handle, reset_context); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; if (!ring || !ring->sched.thread) continue; - /* No point to resubmit jobs if we didn't HW reset*/ - if (!tmp_adev->asic_reset_res && !job_signaled) - drm_sched_resubmit_jobs(&ring->sched); - - drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); + drm_sched_start(&ring->sched, true); } if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) @@ -5441,6 +5362,8 @@ skip_sched_resume: amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_unset_mp1_state(tmp_adev); + + amdgpu_ras_set_error_query_ready(tmp_adev, true); } recover_end: @@ -5852,8 +5775,6 @@ void amdgpu_pci_resume(struct pci_dev *pdev) if (!ring || !ring->sched.thread) continue; - - drm_sched_resubmit_jobs(&ring->sched); drm_sched_start(&ring->sched, true); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 3993e6134914..1bbd56029a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -305,8 +305,13 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { - dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); + if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) { + /* ignore the discovery binary from vram if discovery=2 in kernel module parameter */ + if (amdgpu_discovery == 2) + dev_info(adev->dev,"force read ip discovery binary from file"); + else + dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); + /* retry read ip discovery binary from file */ r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); if (r) { @@ -1507,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: @@ -1551,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: @@ -1636,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): @@ -1686,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; default: @@ -1697,9 +1706,17 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) return 0; } +#if defined(CONFIG_DRM_AMD_DC) +static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev) +{ + amdgpu_device_set_sriov_virtual_display(adev); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); +} +#endif + static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) { - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) { + if (adev->enable_virtual_display) { amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); return 0; } @@ -1727,7 +1744,10 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): - amdgpu_device_ip_block_add(adev, &dm_ip_block); + if (amdgpu_sriov_vf(adev)) + amdgpu_discovery_set_sriov_display(adev); + else + amdgpu_device_ip_block_add(adev, &dm_ip_block); break; default: dev_err(adev->dev, @@ -1740,7 +1760,10 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): case IP_VERSION(12, 1, 0): - amdgpu_device_ip_block_add(adev, &dm_ip_block); + if (amdgpu_sriov_vf(adev)) + amdgpu_discovery_set_sriov_display(adev); + else + amdgpu_device_ip_block_add(adev, &dm_ip_block); break; default: dev_err(adev->dev, @@ -1785,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: @@ -1948,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2161,6 +2186,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(10, 3, 1): adev->family = AMDGPU_FAMILY_VGH; + adev->apu_flags |= AMD_APU_IS_VANGOGH; break; case IP_VERSION(10, 3, 3): adev->family = AMDGPU_FAMILY_YC; @@ -2177,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->family = AMDGPU_FAMILY_GC_11_0_0; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->family = AMDGPU_FAMILY_GC_11_0_1; break; default: @@ -2194,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 7): case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->flags |= AMD_IS_APU; break; default: @@ -2250,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg; break; case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 1a06b8d724f3..b22471b3bd63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -39,11 +39,46 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> +/** + * amdgpu_display_hotplug_work_func - work handler for display hotplug event + * + * @work: work struct pointer + * + * This is the hotplug event work handler (all ASICs). + * The work gets scheduled from the IRQ handler if there + * was a hotplug interrupt. It walks through the connector table + * and calls hotplug handler for each connector. After this, it sends + * a DRM hotplug event to alert userspace. + * + * This design approach is required in order to defer hotplug event handling + * from the IRQ handler to a work handler because hotplug handler has to use + * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may + * sleep). + */ +void amdgpu_display_hotplug_work_func(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + hotplug_work); + struct drm_device *dev = adev_to_drm(adev); + struct drm_mode_config *mode_config = &dev->mode_config; + struct drm_connector *connector; + struct drm_connector_list_iter iter; + + mutex_lock(&mode_config->mutex); + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + amdgpu_connector_hotplug(connector); + drm_connector_list_iter_end(&iter); + mutex_unlock(&mode_config->mutex); + /* Just fire off a uevent and let userspace tell us what to do */ + drm_helper_hpd_irq_event(dev); +} + static int amdgpu_display_framebuffer_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, const struct drm_mode_fb_cmd2 *mode_cmd, @@ -514,7 +549,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, */ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && - amdgpu_device_asic_has_dc_support(adev->asic_type) && + adev->dc_enabled && adev->mode_info.gpu_vm_support) domain |= AMDGPU_GEM_DOMAIN_GTT; #endif @@ -1214,7 +1249,6 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, const struct drm_mode_config_funcs amdgpu_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, }; static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = @@ -1281,7 +1315,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) "dither", amdgpu_dither_enum_list, sz); - if (amdgpu_device_has_dc_support(adev)) { + if (adev->dc_enabled) { adev->mode_info.abm_level_property = drm_property_create_range(adev_to_drm(adev), 0, "abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 560352f7c317..9d19940f73c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -35,6 +35,7 @@ #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) +void amdgpu_display_hotplug_work_func(struct work_struct *work); void amdgpu_display_update_priority(struct amdgpu_device *adev); uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, uint64_t bo_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 7bd8e33b14be..271e30e34d93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -328,7 +328,9 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) if (dma_buf->ops == &amdgpu_dmabuf_ops) { struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv); - flags |= other->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC; + flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED); } ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bf2d50c8c92a..7383272c6a3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -25,6 +25,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_aperture.h> #include <drm/drm_drv.h> +#include <drm/drm_fbdev_generic.h> #include <drm/drm_gem.h> #include <drm/drm_vblank.h> #include <drm/drm_managed.h> @@ -230,17 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); /** * DOC: gartsize (uint) - * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic). + * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing. + * The default is -1 (The size depends on asic). */ -MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); +MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)"); module_param_named(gartsize, amdgpu_gart_size, uint, 0600); /** * DOC: gttsize (int) - * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM, - * otherwise 3/4 RAM size). + * Restrict the size of GTT domain (for userspace use) in MiB for testing. + * The default is -1 (Use 1/2 RAM, minimum value is 3GB). */ -MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); +MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); /** @@ -533,7 +535,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * DOC: gpu_recovery (int) * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** @@ -1924,9 +1926,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, - /* Van Gogh */ - {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, - /* Yellow Carp */ {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, @@ -2471,7 +2470,7 @@ static int amdgpu_runtime_idle_check_display(struct device *dev) if (ret) return ret; - if (amdgpu_device_has_dc_support(adev)) { + if (adev->dc_enabled) { struct drm_crtc *crtc; drm_for_each_crtc(crtc, drm_dev) { @@ -2572,6 +2571,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) amdgpu_device_baco_enter(drm_dev); } + dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c index 4d9eb0137f8c..7d2a908438e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -79,13 +79,15 @@ * That is, for an I2C EEPROM driver everything is controlled by * the "eeprom_addr". * + * See also top of amdgpu_ras_eeprom.c. + * * P.S. If you need to write, lock and read the Identification Page, * (M24M02-DR device only, which we do not use), change the "7" to * "0xF" in the macro below, and let the client set bit 20 to 1 in * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to * 1 to lock it permanently. */ -#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 7)) +#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF)) static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, u8 *eeprom_buf, u16 buf_size, bool read) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d0d99ed607dd..00444203220d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -55,6 +55,7 @@ struct amdgpu_fence { /* RB, DMA, etc. */ struct amdgpu_ring *ring; + ktime_t start_timestamp; }; static struct kmem_cache *amdgpu_fence_slab; @@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd } } + to_amdgpu_fence(fence)->start_timestamp = ktime_get(); + /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ @@ -407,6 +410,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) } /** + * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now + * @ring: ring the fence is associated with + * + * Find the earliest fence unsignaled until now, calculate the time delta + * between the time fence emitted and now. + */ +u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct dma_fence *fence; + uint32_t last_seq, sync_seq; + + last_seq = atomic_read(&ring->fence_drv.last_seq); + sync_seq = READ_ONCE(ring->fence_drv.sync_seq); + if (last_seq == sync_seq) + return 0; + + ++last_seq; + last_seq &= drv->num_fences_mask; + fence = drv->fences[last_seq]; + if (!fence) + return 0; + + return ktime_us_delta(ktime_get(), + to_amdgpu_fence(fence)->start_timestamp); +} + +/** + * amdgpu_fence_update_start_timestamp - update the timestamp of the fence + * @ring: ring the fence is associated with + * @seq: the fence seq number to update. + * @timestamp: the start timestamp to update. + * + * The function called at the time the fence and related ib is about to + * resubmit to gpu in MCBP scenario. Thus we do not consider race condition + * with amdgpu_fence_process to modify the same fence. + */ +void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct dma_fence *fence; + + seq &= drv->num_fences_mask; + fence = drv->fences[seq]; + if (!fence) + return; + + to_amdgpu_fence(fence)->start_timestamp = timestamp; +} + +/** * amdgpu_fence_driver_start_ring - make the fence driver * ready for use on the requested ring. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index e325150879df..2c38ac7bc643 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -29,9 +29,10 @@ #include "amdgpu_fru_eeprom.h" #include "amdgpu_eeprom.h" -#define FRU_EEPROM_MADDR 0x60000 +#define FRU_EEPROM_MADDR_6 0x60000 +#define FRU_EEPROM_MADDR_8 0x80000 -static bool is_fru_eeprom_supported(struct amdgpu_device *adev) +static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) { /* Only server cards have the FRU EEPROM * TODO: See if we can figure this out dynamically instead of @@ -45,6 +46,11 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return false; + /* The default I2C EEPROM address of the FRU. + */ + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_8; + /* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification, * we can use just the "DXXX" portion. If there were more models, we * could convert the 3 characters to a hex integer and use a switch @@ -57,21 +63,29 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) if (strnstr(atom_ctx->vbios_version, "D161", sizeof(atom_ctx->vbios_version)) || strnstr(atom_ctx->vbios_version, "D163", - sizeof(atom_ctx->vbios_version))) + sizeof(atom_ctx->vbios_version))) { + *fru_addr = FRU_EEPROM_MADDR_6; return true; - else + } else { return false; + } case CHIP_ALDEBARAN: - /* All Aldebaran SKUs have the FRU */ + /* All Aldebaran SKUs have an FRU */ + if (!strnstr(atom_ctx->vbios_version, "D673", + sizeof(atom_ctx->vbios_version))) + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_6; return true; case CHIP_SIENNA_CICHLID: if (strnstr(atom_ctx->vbios_version, "D603", - sizeof(atom_ctx->vbios_version))) { + sizeof(atom_ctx->vbios_version))) { if (strnstr(atom_ctx->vbios_version, "D603GLXE", - sizeof(atom_ctx->vbios_version))) + sizeof(atom_ctx->vbios_version))) { return false; - else + } else { + *fru_addr = FRU_EEPROM_MADDR_6; return true; + } } else { return false; } @@ -80,41 +94,14 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) } } -static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, - unsigned char *buf, size_t buf_size) -{ - int ret; - u8 size; - - ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr, buf, 1); - if (ret < 1) { - DRM_WARN("FRU: Failed to get size field"); - return ret; - } - - /* The size returned by the i2c requires subtraction of 0xC0 since the - * size apparently always reports as 0xC0+actual size. - */ - size = buf[0] & 0x3F; - size = min_t(size_t, size, buf_size); - - ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr + 1, - buf, size); - if (ret < 1) { - DRM_WARN("FRU: Failed to get data field"); - return ret; - } - - return size; -} - int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { - unsigned char buf[AMDGPU_PRODUCT_NAME_LEN]; - u32 addrptr; + unsigned char buf[8], *pia; + u32 addr, fru_addr; int size, len; + u8 csum; - if (!is_fru_eeprom_supported(adev)) + if (!is_fru_eeprom_supported(adev, &fru_addr)) return 0; /* If algo exists, it means that the i2c_adapter's initialized */ @@ -123,88 +110,102 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) return -ENODEV; } - /* There's a lot of repetition here. This is due to the FRU having - * variable-length fields. To get the information, we have to find the - * size of each field, and then keep reading along and reading along - * until we get all of the data that we want. We use addrptr to track - * the address as we go - */ - - /* The first fields are all of size 1-byte, from 0-7 are offsets that - * contain information that isn't useful to us. - * Bytes 8-a are all 1-byte and refer to the size of the entire struct, - * and the language field, so just start from 0xb, manufacturer size - */ - addrptr = FRU_EEPROM_MADDR + 0xb; - size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); - if (size < 1) { - DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size); - return -EINVAL; + /* Read the IPMI Common header */ + len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf, + sizeof(buf)); + if (len != 8) { + DRM_ERROR("Couldn't read the IPMI Common Header: %d", len); + return len < 0 ? len : -EIO; } - /* Increment the addrptr by the size of the field, and 1 due to the - * size field being 1 byte. This pattern continues below. - */ - addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); - if (size < 1) { - DRM_ERROR("Failed to read FRU product name, ret:%d", size); - return -EINVAL; + if (buf[0] != 1) { + DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]); + return -EIO; } - len = size; - if (len >= AMDGPU_PRODUCT_NAME_LEN) { - DRM_WARN("FRU Product Name is larger than %d characters. This is likely a mistake", - AMDGPU_PRODUCT_NAME_LEN); - len = AMDGPU_PRODUCT_NAME_LEN - 1; - } - memcpy(adev->product_name, buf, len); - adev->product_name[len] = '\0'; - - addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); - if (size < 1) { - DRM_ERROR("Failed to read FRU product number, ret:%d", size); - return -EINVAL; + for (csum = 0; len > 0; len--) + csum += buf[len - 1]; + if (csum) { + DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum); + return -EIO; } - len = size; - /* Product number should only be 16 characters. Any more, - * and something could be wrong. Cap it at 16 to be safe - */ - if (len >= sizeof(adev->product_number)) { - DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake"); - len = sizeof(adev->product_number) - 1; - } - memcpy(adev->product_number, buf, len); - adev->product_number[len] = '\0'; + /* Get the offset to the Product Info Area (PIA). */ + addr = buf[4] * 8; + if (!addr) + return 0; - addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); + /* Get the absolute address to the PIA. */ + addr += fru_addr; - if (size < 1) { - DRM_ERROR("Failed to read FRU product version, ret:%d", size); - return -EINVAL; + /* Read the header of the PIA. */ + len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3); + if (len != 3) { + DRM_ERROR("Couldn't read the Product Info Area header: %d", len); + return len < 0 ? len : -EIO; } - addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); + if (buf[0] != 1) { + DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]); + return -EIO; + } - if (size < 1) { - DRM_ERROR("Failed to read FRU serial number, ret:%d", size); - return -EINVAL; + size = buf[1] * 8; + pia = kzalloc(size, GFP_KERNEL); + if (!pia) + return -ENOMEM; + + /* Read the whole PIA. */ + len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size); + if (len != size) { + kfree(pia); + DRM_ERROR("Couldn't read the Product Info Area: %d", len); + return len < 0 ? len : -EIO; } - len = size; - /* Serial number should only be 16 characters. Any more, - * and something could be wrong. Cap it at 16 to be safe - */ - if (len >= sizeof(adev->serial)) { - DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake"); - len = sizeof(adev->serial) - 1; + for (csum = 0; size > 0; size--) + csum += pia[size - 1]; + if (csum) { + DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + return -EIO; } - memcpy(adev->serial, buf, len); - adev->serial[len] = '\0'; + /* Now extract useful information from the PIA. + * + * Skip the Manufacturer Name at [3] and go directly to + * the Product Name field. + */ + addr = 3 + 1 + (pia[3] & 0x3F); + if (addr + 1 >= len) + goto Out; + memcpy(adev->product_name, pia + addr + 1, + min_t(size_t, + sizeof(adev->product_name), + pia[addr] & 0x3F)); + adev->product_name[sizeof(adev->product_name) - 1] = '\0'; + + /* Go to the Product Part/Model Number field. */ + addr += 1 + (pia[addr] & 0x3F); + if (addr + 1 >= len) + goto Out; + memcpy(adev->product_number, pia + addr + 1, + min_t(size_t, + sizeof(adev->product_number), + pia[addr] & 0x3F)); + adev->product_number[sizeof(adev->product_number) - 1] = '\0'; + + /* Go to the Product Version field. */ + addr += 1 + (pia[addr] & 0x3F); + + /* Go to the Product Serial Number field. */ + addr += 1 + (pia[addr] & 0x3F); + if (addr + 1 >= len) + goto Out; + memcpy(adev->serial, pia + addr + 1, min_t(size_t, + sizeof(adev->serial), + pia[addr] & 0x3F)); + adev->serial[sizeof(adev->serial) - 1] = '\0'; +Out: + kfree(pia); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 91571b1324f2..62e98f1ad770 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -38,6 +38,7 @@ #include "amdgpu.h" #include "amdgpu_display.h" #include "amdgpu_dma_buf.h" +#include "amdgpu_hmm.h" #include "amdgpu_xgmi.h" static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; @@ -87,7 +88,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj) struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); if (robj) { - amdgpu_mn_unregister(robj); + amdgpu_hmm_unregister(robj); amdgpu_bo_unref(&robj); } } @@ -112,7 +113,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bp.resv = resv; bp.preferred_domain = initial_domain; bp.flags = flags; - bp.domain = initial_domain; + bp.domain = initial_domain | AMDGPU_GEM_DOMAIN_CPU; bp.bo_ptr_size = sizeof(struct amdgpu_bo); r = amdgpu_bo_create_user(adev, &bp, &ubo); @@ -331,20 +332,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, } initial_domain = (u32)(0xffffffff & args->in.domains); -retry: r = amdgpu_gem_object_create(adev, size, args->in.alignment, - initial_domain, - flags, ttm_bo_type_device, resv, &gobj); + initial_domain, flags, ttm_bo_type_device, + resv, &gobj); if (r && r != -ERESTARTSYS) { - if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { - flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - goto retry; - } - - if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { - initial_domain |= AMDGPU_GEM_DOMAIN_GTT; - goto retry; - } DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", size, initial_domain, args->in.alignment, r); } @@ -414,7 +405,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; - r = amdgpu_mn_register(bo, args->addr); + r = amdgpu_hmm_register(bo, args->addr); if (r) goto release_object; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9546adc8a76f..23692e5d4d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -583,10 +583,14 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { /* If going to s2idle, no need to wait */ - if (adev->in_s0ix) - delay = GFX_OFF_NO_DELAY; - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 832b3807f1d6..b3df4787877e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -33,6 +33,7 @@ #include "amdgpu_imu.h" #include "soc15.h" #include "amdgpu_ras.h" +#include "amdgpu_ring_mux.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -352,6 +353,9 @@ struct amdgpu_gfx { struct amdgpu_gfx_ras *ras; bool is_poweron; + + struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; + struct amdgpu_ring_mux muxer; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 28612e56d0d4..02a4c93673ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -548,6 +548,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 1f3302aebeff..44367f03316f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -144,7 +144,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, node->base.start = node->mm_nodes[0].start; } else { node->mm_nodes[0].start = 0; - node->mm_nodes[0].size = node->base.num_pages; + node->mm_nodes[0].size = PFN_UP(node->base.size); node->base.start = AMDGPU_BO_INVALID_OFFSET; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index b86c0b8252a5..65715cb395d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -49,9 +49,12 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_hmm.h" + +#define MAX_WALK_BYTE (2UL << 30) /** - * amdgpu_mn_invalidate_gfx - callback to notify about mm change + * amdgpu_hmm_invalidate_gfx - callback to notify about mm change * * @mni: the range (mm) is about to update * @range: details on the invalidation @@ -60,9 +63,9 @@ * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -83,12 +86,12 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, return true; } -static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { - .invalidate = amdgpu_mn_invalidate_gfx, +static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = { + .invalidate = amdgpu_hmm_invalidate_gfx, }; /** - * amdgpu_mn_invalidate_hsa - callback to notify about mm change + * amdgpu_hmm_invalidate_hsa - callback to notify about mm change * * @mni: the range (mm) is about to update * @range: details on the invalidation @@ -97,9 +100,9 @@ static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { * We temporarily evict the BO attached to this range. This necessitates * evicting all user-mode queues of the process. */ -static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -117,12 +120,12 @@ static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, return true; } -static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { - .invalidate = amdgpu_mn_invalidate_hsa, +static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = { + .invalidate = amdgpu_hmm_invalidate_hsa, }; /** - * amdgpu_mn_register - register a BO for notifier updates + * amdgpu_hmm_register - register a BO for notifier updates * * @bo: amdgpu buffer object * @addr: userptr addr we should monitor @@ -130,25 +133,25 @@ static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { * Registers a mmu_notifier for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ -int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) +int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) { if (bo->kfd_bo) return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, amdgpu_bo_size(bo), - &amdgpu_mn_hsa_ops); + &amdgpu_hmm_hsa_ops); return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, amdgpu_bo_size(bo), - &amdgpu_mn_gfx_ops); + &amdgpu_hmm_gfx_ops); } /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_hmm_unregister - unregister a BO for notifier updates * * @bo: amdgpu buffer object * * Remove any registration of mmu notifier updates from the buffer object. */ -void amdgpu_mn_unregister(struct amdgpu_bo *bo) +void amdgpu_hmm_unregister(struct amdgpu_bo *bo) { if (!bo->notifier.mm) return; @@ -157,12 +160,12 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) } int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, - struct mm_struct *mm, struct page **pages, - uint64_t start, uint64_t npages, - struct hmm_range **phmm_range, bool readonly, - bool mmap_locked, void *owner) + uint64_t start, uint64_t npages, bool readonly, + void *owner, struct page **pages, + struct hmm_range **phmm_range) { struct hmm_range *hmm_range; + unsigned long end; unsigned long timeout; unsigned long i; unsigned long *pfns; @@ -184,32 +187,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, hmm_range->default_flags |= HMM_PFN_REQ_WRITE; hmm_range->hmm_pfns = pfns; hmm_range->start = start; - hmm_range->end = start + npages * PAGE_SIZE; + end = start + npages * PAGE_SIZE; hmm_range->dev_private_owner = owner; - /* Assuming 512MB takes maxmium 1 second to fault page address */ - timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; - timeout = jiffies + msecs_to_jiffies(timeout); + do { + hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end); + + pr_debug("hmm range: start = 0x%lx, end = 0x%lx", + hmm_range->start, hmm_range->end); + + /* Assuming 512MB takes maxmium 1 second to fault page address */ + timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL); + timeout *= HMM_RANGE_DEFAULT_TIMEOUT; + timeout = jiffies + msecs_to_jiffies(timeout); retry: - hmm_range->notifier_seq = mmu_interval_read_begin(notifier); - - if (likely(!mmap_locked)) - mmap_read_lock(mm); - - r = hmm_range_fault(hmm_range); - - if (likely(!mmap_locked)) - mmap_read_unlock(mm); - if (unlikely(r)) { - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ - if (r == -EBUSY && !time_after(jiffies, timeout)) - goto retry; - goto out_free_pfns; - } + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); + r = hmm_range_fault(hmm_range); + if (unlikely(r)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if (r == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } + + if (hmm_range->end == end) + break; + hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; + hmm_range->start = hmm_range->end; + schedule(); + } while (hmm_range->end < end); + + hmm_range->start = start; + hmm_range->hmm_pfns = pfns; /* * Due to default_flags, all pages are HMM_PFN_VALID or diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index 14a3c1864085..13ed94d3b01b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -31,23 +31,22 @@ #include <linux/interval_tree.h> int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, - struct mm_struct *mm, struct page **pages, - uint64_t start, uint64_t npages, - struct hmm_range **phmm_range, bool readonly, - bool mmap_locked, void *owner); + uint64_t start, uint64_t npages, bool readonly, + void *owner, struct page **pages, + struct hmm_range **phmm_range); int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); #if defined(CONFIG_HMM_MIRROR) -int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); -void amdgpu_mn_unregister(struct amdgpu_bo *bo); +int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); +void amdgpu_hmm_unregister(struct amdgpu_bo *bo); #else -static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) +static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) { DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " "add CONFIG_ZONE_DEVICE=y in config file to fix this\n"); return -ENODEV; } -static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} +static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {} #endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 258cffe3c06a..bcccc348dbe2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -182,7 +182,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } + amdgpu_ring_ib_begin(ring); if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 03d115d2b5ed..2a9a2593dc18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -170,26 +170,27 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies * @idle: resulting idle VMID + * @fence: fence to wait for if no id could be grabbed * * Try to find an idle VMID, if none is idle add a fence to wait to the sync * object. Returns -ENOMEM when we are out of memory. */ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct amdgpu_vmid **idle) + struct amdgpu_vmid **idle, + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct dma_fence **fences; unsigned i; - int r; - if (!dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(sync, ring->vmid_wait); + if (!dma_fence_is_signaled(ring->vmid_wait)) { + *fence = dma_fence_get(ring->vmid_wait); + return 0; + } fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); if (!fences) @@ -228,10 +229,10 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(sync, &array->base); + *fence = dma_fence_get(&array->base); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; - return r; + return 0; } kfree(fences); @@ -243,19 +244,17 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID * @id: resulting VMID + * @fence: fence to wait for if no id could be grabbed * * Try to assign a reserved VMID. */ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, struct amdgpu_job *job, - struct amdgpu_vmid **id) + struct amdgpu_vmid **id, + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -282,7 +281,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - return amdgpu_sync_fence(sync, tmp); + *fence = dma_fence_get(tmp); + return 0; } needs_flush = true; } @@ -290,7 +290,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence); + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); if (r) return r; @@ -304,19 +304,17 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID * @id: resulting VMID + * @fence: fence to wait for if no id could be grabbed * * Try to reuse a VMID for this submission. */ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, struct amdgpu_job *job, - struct amdgpu_vmid **id) + struct amdgpu_vmid **id, + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -352,7 +350,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence); + r = amdgpu_sync_fence(&(*id)->active, + &job->base.s_fence->finished); if (r) return r; @@ -370,15 +369,13 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID + * @fence: fence to wait for if no id could be grabbed * * Allocate an id for the vm, adding fences to the sync obj as necessary. */ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job) + struct amdgpu_job *job, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -388,16 +385,16 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int r = 0; mutex_lock(&id_mgr->lock); - r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle); + r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence); if (r || !idle) goto error; if (vm->reserved_vmid[vmhub]) { - r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id); + r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; } else { - r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id); + r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence); if (r) goto error; @@ -406,7 +403,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(&id->active, fence); + r = amdgpu_sync_fence(&id->active, + &job->base.s_fence->finished); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 06c8a0034fa5..57efe61dceed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -84,8 +84,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job); + struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid); void amdgpu_vmid_reset_all(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 89011bae7588..a6aef488a822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -101,41 +101,6 @@ const char *soc15_ih_clientid_name[] = { }; /** - * amdgpu_hotplug_work_func - work handler for display hotplug event - * - * @work: work struct pointer - * - * This is the hotplug event work handler (all ASICs). - * The work gets scheduled from the IRQ handler if there - * was a hotplug interrupt. It walks through the connector table - * and calls hotplug handler for each connector. After this, it sends - * a DRM hotplug event to alert userspace. - * - * This design approach is required in order to defer hotplug event handling - * from the IRQ handler to a work handler because hotplug handler has to use - * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may - * sleep). - */ -static void amdgpu_hotplug_work_func(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - hotplug_work); - struct drm_device *dev = adev_to_drm(adev); - struct drm_mode_config *mode_config = &dev->mode_config; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - - mutex_lock(&mode_config->mutex); - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) - amdgpu_connector_hotplug(connector); - drm_connector_list_iter_end(&iter); - mutex_unlock(&mode_config->mutex); - /* Just fire off a uevent and let userspace tell us what to do */ - drm_helper_hpd_irq_event(dev); -} - -/** * amdgpu_irq_disable_all - disable *all* interrupts * * @adev: amdgpu device pointer @@ -317,21 +282,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev) } } - if (!amdgpu_device_has_dc_support(adev)) { - if (!adev->enable_virtual_display) - /* Disable vblank IRQs aggressively for power-saving */ - /* XXX: can this be enabled for DC? */ - adev_to_drm(adev)->vblank_disable_immediate = true; - - r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); - if (r) - return r; - - /* Pre-DCE11 */ - INIT_WORK(&adev->hotplug_work, - amdgpu_hotplug_work_func); - } - INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1); INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft); @@ -345,11 +295,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) /* PCI devices require shared interrupts. */ r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name, adev_to_drm(adev)); - if (r) { - if (!amdgpu_device_has_dc_support(adev)) - flush_work(&adev->hotplug_work); + if (r) return r; - } adev->irq.installed = true; adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; @@ -366,9 +313,6 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev) adev->irq.installed = false; if (adev->irq.msi_enabled) pci_free_irq_vectors(adev->pdev); - - if (!amdgpu_device_has_dc_support(adev)) - flush_work(&adev->hotplug_work); } amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index adac650cf544..9e549923622b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -88,8 +88,9 @@ exit: return DRM_GPU_SCHED_STAT_NOMINAL; } -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job, struct amdgpu_vm *vm) +int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct drm_sched_entity *entity, void *owner, + unsigned int num_ibs, struct amdgpu_job **job) { if (num_ibs == 0) return -EINVAL; @@ -105,28 +106,34 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; - amdgpu_sync_create(&(*job)->sync); - amdgpu_sync_create(&(*job)->sched_sync); + amdgpu_sync_create(&(*job)->explicit_sync); (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET; - return 0; + if (!entity) + return 0; + + return drm_sched_job_init(&(*job)->base, entity, owner); } -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, - enum amdgpu_ib_pool_type pool_type, - struct amdgpu_job **job) +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, + struct drm_sched_entity *entity, void *owner, + size_t size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_job **job) { int r; - r = amdgpu_job_alloc(adev, 1, job, NULL); + r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job); if (r) return r; (*job)->num_ibs = 1; r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]); - if (r) + if (r) { + if (entity) + drm_sched_job_cleanup(&(*job)->base); kfree(*job); + } return r; } @@ -166,8 +173,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) drm_sched_job_cleanup(s_job); - amdgpu_sync_free(&job->sync); - amdgpu_sync_free(&job->sched_sync); + amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ if (!job->hw_fence.ops) @@ -194,9 +200,11 @@ void amdgpu_job_set_gang_leader(struct amdgpu_job *job, void amdgpu_job_free(struct amdgpu_job *job) { + if (job->base.entity) + drm_sched_job_cleanup(&job->base); + amdgpu_job_free_resources(job); - amdgpu_sync_free(&job->sync); - amdgpu_sync_free(&job->sched_sync); + amdgpu_sync_free(&job->explicit_sync); if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); @@ -206,25 +214,16 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(&job->hw_fence); } -int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, - void *owner, struct dma_fence **f) +struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) { - int r; - - if (!f) - return -EINVAL; - - r = drm_sched_job_init(&job->base, entity, owner); - if (r) - return r; + struct dma_fence *f; drm_sched_job_arm(&job->base); - - *f = dma_fence_get(&job->base.s_fence->finished); + f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); drm_sched_entity_push_job(&job->base); - return 0; + return f; } int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, @@ -242,33 +241,22 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, return 0; } -static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, - struct drm_sched_entity *s_entity) +static struct dma_fence * +amdgpu_job_prepare_job(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) { struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct amdgpu_vm *vm = job->vm; - struct dma_fence *fence; + struct dma_fence *fence = NULL; int r; - fence = amdgpu_sync_get_fence(&job->sync); - if (fence && drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(&job->sched_sync, fence); - if (r) - DRM_ERROR("Error adding fence (%d)\n", r); - } - if (!fence && job->gang_submit) fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); - while (fence == NULL && vm && !job->vmid) { - r = amdgpu_vmid_grab(vm, ring, &job->sync, - &job->base.s_fence->finished, - job); + while (!fence && job->vm && !job->vmid) { + r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - - fence = amdgpu_sync_get_fence(&job->sync); } return fence; @@ -285,8 +273,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; - BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); - trace_amdgpu_sched_run_job(job); /* Skip job if VRAM is lost and never resubmit gangs */ @@ -345,7 +331,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) } const struct drm_sched_backend_ops amdgpu_sched_ops = { - .dependency = amdgpu_job_dependency, + .prepare_job = amdgpu_job_prepare_job, .run_job = amdgpu_job_run, .timedout_job = amdgpu_job_timedout, .free_job = amdgpu_job_free_cb diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ab7b150e5d50..a372802ea4e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -47,8 +47,7 @@ enum amdgpu_ib_pool_type; struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; - struct amdgpu_sync sync; - struct amdgpu_sync sched_sync; + struct amdgpu_sync explicit_sync; struct dma_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; @@ -78,18 +77,20 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) return to_amdgpu_ring(job->base.entity->rq->sched); } -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job, struct amdgpu_vm *vm); -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, - enum amdgpu_ib_pool_type pool, struct amdgpu_job **job); +int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct drm_sched_entity *entity, void *owner, + unsigned int num_ibs, struct amdgpu_job **job); +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, + struct drm_sched_entity *entity, void *owner, + size_t size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_job **job); void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); void amdgpu_job_set_gang_leader(struct amdgpu_job *job, struct amdgpu_job *leader); void amdgpu_job_free(struct amdgpu_job *job); -int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, - void *owner, struct dma_fence **f); +struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job); int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 518eb0e40d32..6f81ed4fb0d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -150,14 +150,15 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, const unsigned ib_size_dw = 16; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, + PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -234,3 +235,20 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, return 0; } + +void jpeg_set_ras_funcs(struct amdgpu_device *adev) +{ + if (!adev->jpeg.ras) + return; + + amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); + + strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); + adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; + adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->jpeg.ras->ras_block.ras_late_init) + adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 635dca59a70a..e8ca3e32ad52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -72,5 +72,6 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +void jpeg_set_ras_funcs(struct amdgpu_device *adev); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4e42dcb1950f..7aa7e52ca784 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> +#include <drm/drm_fb_helper.h> #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "atom.h" @@ -430,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) + if (adev->vcn.harvest_config & (1 << i)) continue; if (adev->vcn.inst[i].ring_dec.sched.ready) @@ -442,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) + if (adev->vcn.harvest_config & (1 << i)) continue; for (j = 0; j < adev->vcn.num_enc_rings; j++) @@ -796,7 +797,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) + if (amdgpu_mcbp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; @@ -1172,7 +1173,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, @@ -1236,7 +1237,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL) amdgpu_vce_free_handles(adev, file_priv); - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); amdgpu_vm_bo_del(adev, fpriv->csa_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index ad980f4b66e1..97c05d08a551 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -91,14 +91,12 @@ struct amdgpu_mes { struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES]; uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES]; uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES]; - uint32_t ucode_fw_version[AMDGPU_MAX_MES_PIPES]; uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES]; /* mes ucode data */ struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES]; uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES]; uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES]; - uint32_t data_fw_version[AMDGPU_MAX_MES_PIPES]; uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES]; /* eop gpu obj */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 37322550d750..8a39300b1a84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -36,7 +36,6 @@ #include <drm/drm_encoder.h> #include <drm/drm_fixed.h> #include <drm/drm_crtc_helper.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_probe_helper.h> #include <linux/i2c.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2e8f6cd7a729..919bbea2e3ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -542,6 +542,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, /* GWS and OA don't need any alignment. */ page_align = bp->byte_align; size <<= PAGE_SHIFT; + } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) { /* Both size and alignment must be a multiple of 4. */ page_align = ALIGN(bp->byte_align, 4); @@ -580,11 +581,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; bo->tbo.bdev = &adev->mman.bdev; - if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | - AMDGPU_GEM_DOMAIN_GDS)) - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - else - amdgpu_bo_placement_from_domain(bo, bp->domain); + amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) bo->tbo.priority = 1; @@ -776,7 +773,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); + r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7978307e1d6d..7a2fc920739b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -52,6 +52,32 @@ static int psp_load_smu_fw(struct psp_context *psp); static int psp_rap_terminate(struct psp_context *psp); static int psp_securedisplay_terminate(struct psp_context *psp); +static int psp_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + /* * Due to DF Cstate management centralized to PMFW, the firmware * loading sequence will be updated as below: @@ -139,6 +165,7 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -486,26 +513,22 @@ static int psp_sw_fini(void *handle) struct psp_gfx_cmd_resp *cmd = psp->cmd; psp_memory_training_fini(psp); - if (psp->sos_fw) { - release_firmware(psp->sos_fw); - psp->sos_fw = NULL; - } - if (psp->asd_fw) { - release_firmware(psp->asd_fw); - psp->asd_fw = NULL; - } - if (psp->ta_fw) { - release_firmware(psp->ta_fw); - psp->ta_fw = NULL; - } - if (psp->cap_fw) { - release_firmware(psp->cap_fw); - psp->cap_fw = NULL; - } - if (psp->toc_fw) { - release_firmware(psp->toc_fw); - psp->toc_fw = NULL; - } + + release_firmware(psp->sos_fw); + psp->sos_fw = NULL; + + release_firmware(psp->asd_fw); + psp->asd_fw = NULL; + + release_firmware(psp->ta_fw); + psp->ta_fw = NULL; + + release_firmware(psp->cap_fw); + psp->cap_fw = NULL; + + release_firmware(psp->toc_fw); + psp->toc_fw = NULL; + if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) psp_sysfs_fini(adev); @@ -835,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp) struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_unload_cmd_buf(psp, cmd); - dev_info(psp->adev->dev, "free PSP TMR buffer\n"); + dev_dbg(psp->adev->dev, "free PSP TMR buffer\n"); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -992,6 +1015,8 @@ int psp_ta_unload(struct psp_context *psp, struct ta_context *context) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + release_psp_cmd_buf(psp); return ret; @@ -1073,42 +1098,6 @@ int psp_ta_init_shared_buf(struct psp_context *psp, &mem_ctx->shared_buf); } -static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - struct ta_context *context) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = context->session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - - cmd->cmd.cmd_invoke_cmd.buf.num_desc = 1; - cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size; - cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size; - cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo = - lower_32_bits(context->mem_context.shared_mc_addr); - cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi = - upper_32_bits(context->mem_context.shared_mc_addr); -} - -int psp_ta_invoke_indirect(struct psp_context *psp, - uint32_t ta_cmd_id, - struct ta_context *context) -{ - int ret; - struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - - psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr); - - context->resp_status = cmd->resp.status; - - release_psp_cmd_buf(psp); - - return ret; -} - static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -1551,7 +1540,7 @@ int psp_ras_terminate(struct psp_context *psp) return ret; } -static int psp_ras_initialize(struct psp_context *psp) +int psp_ras_initialize(struct psp_context *psp) { int ret; uint32_t boot_cfg = 0xFF; @@ -1614,7 +1603,7 @@ static int psp_ras_initialize(struct psp_context *psp) psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE; psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA; - if (!psp->ras_context.context.initialized) { + if (!psp->ras_context.context.mem_context.shared_buf) { ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context); if (ret) return ret; @@ -1635,7 +1624,9 @@ static int psp_ras_initialize(struct psp_context *psp) else { if (ras_cmd->ras_status) dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); - amdgpu_ras_fini(psp->adev); + + /* fail to load RAS TA */ + psp->ras_context.context.initialized = false; } return ret; @@ -1942,10 +1933,15 @@ static int psp_securedisplay_initialize(struct psp_context *psp) } else return ret; + mutex_lock(&psp->securedisplay_context.mutex); + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__QUERY_TA); ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA); + + mutex_unlock(&psp->securedisplay_context.mutex); + if (ret) { psp_securedisplay_terminate(psp); /* free securedisplay shared memory */ @@ -1994,12 +1990,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC) return -EINVAL; - mutex_lock(&psp->securedisplay_context.mutex); - ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context); - mutex_unlock(&psp->securedisplay_context.mutex); - return ret; } /* SECUREDISPLAY end */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 58ce3ebb446c..cf4f60c66122 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -118,7 +118,6 @@ struct psp_funcs int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); - int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_stop)(struct psp_context *psp, @@ -136,6 +135,12 @@ struct psp_funcs int (*vbflash_stat)(struct psp_context *psp); }; +struct ta_funcs { + int (*fn_ta_initialize)(struct psp_context *psp); + int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id); + int (*fn_ta_terminate)(struct psp_context *psp); +}; + #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 struct psp_xgmi_node_info { uint64_t node_id; @@ -309,6 +314,7 @@ struct psp_context struct psp_gfx_cmd_resp *cmd; const struct psp_funcs *funcs; + const struct ta_funcs *ta_funcs; /* firmware buffer */ struct amdgpu_bo *fw_pri_bo; @@ -389,7 +395,6 @@ struct amdgpu_psp_funcs { }; -#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) @@ -463,9 +468,6 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context); int psp_ta_invoke(struct psp_context *psp, uint32_t ta_cmd_id, struct ta_context *context); -int psp_ta_invoke_indirect(struct psp_context *psp, - uint32_t ta_cmd_id, - struct ta_context *context); int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); @@ -479,7 +481,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, int psp_xgmi_set_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology); - +int psp_ras_initialize(struct psp_context *psp); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0988e00612e5..468a67b302d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -41,30 +41,46 @@ static uint32_t get_bin_version(const uint8_t *bin) return hdr->ucode_version; } -static void prep_ta_mem_context(struct psp_context *psp, - struct ta_context *context, +static int prep_ta_mem_context(struct ta_mem_context *mem_context, uint8_t *shared_buf, uint32_t shared_buf_len) { - context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len); - psp_ta_init_shared_buf(psp, &context->mem_context); + if (mem_context->shared_mem_size < shared_buf_len) + return -EINVAL; + memset(mem_context->shared_buf, 0, mem_context->shared_mem_size); + memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len); - memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len); + return 0; } static bool is_ta_type_valid(enum ta_type_id ta_type) { - bool ret = false; + switch (ta_type) { + case TA_TYPE_RAS: + return true; + default: + return false; + } +} + +static const struct ta_funcs ras_ta_funcs = { + .fn_ta_initialize = psp_ras_initialize, + .fn_ta_invoke = psp_ras_invoke, + .fn_ta_terminate = psp_ras_terminate +}; +static void set_ta_context_funcs(struct psp_context *psp, + enum ta_type_id ta_type, + struct ta_context **pcontext) +{ switch (ta_type) { case TA_TYPE_RAS: - ret = true; + *pcontext = &psp->ras_context.context; + psp->ta_funcs = &ras_ta_funcs; break; default: break; } - - return ret; } static const struct file_operations ta_load_debugfs_fops = { @@ -85,8 +101,7 @@ static const struct file_operations ta_invoke_debugfs_fops = { .owner = THIS_MODULE }; - -/** +/* * DOC: AMDGPU TA debugfs interfaces * * Three debugfs interfaces can be opened by a program to @@ -111,15 +126,18 @@ static const struct file_operations ta_invoke_debugfs_fops = { * * - For TA invoke debugfs interface: * Transmit buffer: + * - TA type (4bytes) * - TA ID (4bytes) * - TA CMD ID (4bytes) - * - TA shard buf length (4bytes) + * - TA shard buf length + * (4bytes, value not beyond TA shared memory size) * - TA shared buf * Receive buffer: * - TA shared buf * * - For TA unload debugfs interface: * Transmit buffer: + * - TA type (4bytes) * - TA ID (4bytes) */ @@ -131,59 +149,92 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t uint32_t copy_pos = 0; int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; - struct psp_context *psp = &adev->psp; - struct ta_context context = {0}; + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context *context = NULL; if (!buf) return -EINVAL; ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); if (ret || (!is_ta_type_valid(ta_type))) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); if (!ta_bin) - ret = -ENOMEM; + return -ENOMEM; if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) { ret = -EFAULT; goto err_free_bin; } - ret = psp_ras_terminate(psp); - if (ret) { - dev_err(adev->dev, "Failed to unload embedded RAS TA\n"); + /* Set TA context and functions */ + set_ta_context_funcs(psp, ta_type, &context); + + if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) { + dev_err(adev->dev, "Unsupported function to terminate TA\n"); + ret = -EOPNOTSUPP; goto err_free_bin; } - context.ta_type = ta_type; - context.ta_load_type = GFX_CMD_ID_LOAD_TA; - context.bin_desc.fw_version = get_bin_version(ta_bin); - context.bin_desc.size_bytes = ta_bin_len; - context.bin_desc.start_addr = ta_bin; + /* + * Allocate TA shared buf in case shared buf was freed + * due to loading TA failed before. + */ + if (!context->mem_context.shared_buf) { + ret = psp_ta_init_shared_buf(psp, &context->mem_context); + if (ret) { + ret = -ENOMEM; + goto err_free_bin; + } + } + + ret = psp_fn_ta_terminate(psp); + if (ret || context->resp_status) { + dev_err(adev->dev, + "Failed to unload embedded TA (%d) and status (0x%X)\n", + ret, context->resp_status); + if (!ret) + ret = -EINVAL; + goto err_free_ta_shared_buf; + } + + /* Prepare TA context for TA initialization */ + context->ta_type = ta_type; + context->bin_desc.fw_version = get_bin_version(ta_bin); + context->bin_desc.size_bytes = ta_bin_len; + context->bin_desc.start_addr = ta_bin; - ret = psp_ta_load(psp, &context); + if (!psp->ta_funcs->fn_ta_initialize) { + dev_err(adev->dev, "Unsupported function to initialize TA\n"); + ret = -EOPNOTSUPP; + goto err_free_ta_shared_buf; + } - if (ret || context.resp_status) { - dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n", - ret, context.resp_status); + ret = psp_fn_ta_initialize(psp); + if (ret || context->resp_status) { + dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n", + ret, context->resp_status); if (!ret) ret = -EINVAL; - goto err_free_bin; + goto err_free_ta_shared_buf; } - context.initialized = true; - if (copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t))) + if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t))) ret = -EFAULT; +err_free_ta_shared_buf: + /* Only free TA shared buf when returns error code */ + if (ret && context->mem_context.shared_buf) + psp_ta_free_shared_buf(&context->mem_context); err_free_bin: kfree(ta_bin); @@ -192,58 +243,85 @@ err_free_bin: static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) { - uint32_t ta_id = 0; - int ret = 0; + uint32_t ta_type = 0; + uint32_t ta_id = 0; + uint32_t copy_pos = 0; + int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; - struct psp_context *psp = &adev->psp; - struct ta_context context = {0}; + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context *context = NULL; if (!buf) return -EINVAL; - ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t)); + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret || (!is_ta_type_valid(ta_type))) + return -EFAULT; + + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; - context.session_id = ta_id; + set_ta_context_funcs(psp, ta_type, &context); + context->session_id = ta_id; - ret = psp_ta_unload(psp, &context); - if (!ret) - context.initialized = false; + if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) { + dev_err(adev->dev, "Unsupported function to terminate TA\n"); + return -EOPNOTSUPP; + } + + ret = psp_fn_ta_terminate(psp); + if (ret || context->resp_status) { + dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n", + ret, context->resp_status); + if (!ret) + ret = -EINVAL; + } + + if (context->mem_context.shared_buf) + psp_ta_free_shared_buf(&context->mem_context); return ret; } static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) { + uint32_t ta_type = 0; uint32_t ta_id = 0; uint32_t cmd_id = 0; uint32_t shared_buf_len = 0; - uint8_t *shared_buf = NULL; + uint8_t *shared_buf = NULL; uint32_t copy_pos = 0; int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; - struct psp_context *psp = &adev->psp; - struct ta_context context = {0}; + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context *context = NULL; if (!buf) return -EINVAL; + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EFAULT; + copy_pos += sizeof(uint32_t); + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); @@ -254,26 +332,39 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size goto err_free_shared_buf; } - context.session_id = ta_id; + set_ta_context_funcs(psp, ta_type, &context); + + if (!context->initialized) { + dev_err(adev->dev, "TA is not initialized\n"); + ret = -EINVAL; + goto err_free_shared_buf; + } + + if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) { + dev_err(adev->dev, "Unsupported function to invoke TA\n"); + ret = -EOPNOTSUPP; + goto err_free_shared_buf; + } - prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len); + context->session_id = ta_id; - ret = psp_ta_invoke_indirect(psp, cmd_id, &context); + ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); + if (ret) + goto err_free_shared_buf; - if (ret || context.resp_status) { - dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n", - ret, context.resp_status); - if (!ret) + ret = psp_fn_ta_invoke(psp, cmd_id); + if (ret || context->resp_status) { + dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n", + ret, context->resp_status); + if (!ret) { ret = -EINVAL; - goto err_free_ta_shared_buf; + goto err_free_shared_buf; + } } - if (copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len)) + if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; -err_free_ta_shared_buf: - psp_ta_free_shared_buf(&context.mem_context); - err_free_shared_buf: kfree(shared_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h index cfc1542f63ef..14cd1c81c3e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h @@ -24,6 +24,11 @@ #ifndef __AMDGPU_PSP_TA_H__ #define __AMDGPU_PSP_TA_H__ +/* Calling set_ta_context_funcs is required before using the following macros */ +#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp))) +#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id))) +#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp))) + void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a4b47e1bd111..ad490c1e2f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct amdgpu_ras *con = container_of(attr, struct amdgpu_ras, features_attr); - return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); + return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); } static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) @@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * { bool poison_stat = false; struct amdgpu_device *adev = obj->adev; - struct ras_err_data err_data = {0, 0, 0, NULL}; struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_poison_handler(adev, &err_data, false); + amdgpu_umc_poison_handler(adev, false); if (block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1949,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + /* Perform full reset in fatal error mode */ + if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + else + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } @@ -2344,7 +2348,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) || + adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); else @@ -2848,7 +2853,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, struct amdgpu_device *adev = NULL; uint32_t gpu_id = 0; uint32_t umc_inst = 0, ch_inst = 0; - struct ras_err_data err_data = {0, 0, 0, NULL}; /* * If the error was generated in UMC_V2, which belongs to GPU UMCs, @@ -2887,31 +2891,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", umc_inst, ch_inst); - err_data.err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - if (!err_data.err_addr) { - dev_warn(adev->dev, - "Failed to alloc memory for umc error record in mca notifier!\n"); + if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst)) + return NOTIFY_OK; + else return NOTIFY_DONE; - } - - /* - * Translate UMC channel address to Physical address - */ - if (adev->umc.ras && - adev->umc.ras->convert_ras_error_address) - adev->umc.ras->convert_ras_error_address(adev, - &err_data, m->addr, ch_inst, umc_inst); - - if (amdgpu_bad_page_threshold != 0) { - amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); - amdgpu_ras_save_bad_pages(adev); - } - - kfree(err_data.err_addr); - return NOTIFY_OK; } static struct notifier_block amdgpu_bad_page_nb = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 84c241b9a2a1..2d9f3f4cd79e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -33,12 +33,29 @@ #include "amdgpu_reset.h" -#define EEPROM_I2C_MADDR_VEGA20 0x0 -#define EEPROM_I2C_MADDR_ARCTURUS 0x40000 -#define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 -#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 -#define EEPROM_I2C_MADDR_ALDEBARAN 0x0 -#define EEPROM_I2C_MADDR_SMU_13_0_0 (0x54UL << 16) +/* These are memory addresses as would be seen by one or more EEPROM + * chips strung on the I2C bus, usually by manipulating pins 1-3 of a + * set of EEPROM devices. They form a continuous memory space. + * + * The I2C device address includes the device type identifier, 1010b, + * which is a reserved value and indicates that this is an I2C EEPROM + * device. It also includes the top 3 bits of the 19 bit EEPROM memory + * address, namely bits 18, 17, and 16. This makes up the 7 bit + * address sent on the I2C bus with bit 0 being the direction bit, + * which is not represented here, and sent by the hardware directly. + * + * For instance, + * 50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0. + * 54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h. + * 56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h. + * Depending on the size of the I2C EEPROM device(s), bits 18:16 may + * address memory in a device or a device on the I2C bus, depending on + * the status of pins 1-3. See top of amdgpu_eeprom.c. + * + * The RAS table lives either at address 0 or address 40000h of EEPROM. + */ +#define EEPROM_I2C_MADDR_0 0x0 +#define EEPROM_I2C_MADDR_4 0x40000 /* * The 2 macros bellow represent the actual size in bytes that @@ -90,6 +107,16 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { + if (adev->asic_type == CHIP_IP_DISCOVERY) { + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + return true; + default: + return false; + } + } + return adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS || adev->asic_type == CHIP_SIENNA_CICHLID || @@ -107,16 +134,30 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, if (strnstr(atom_ctx->vbios_version, "D342", sizeof(atom_ctx->vbios_version))) - control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342; + control->i2c_address = EEPROM_I2C_MADDR_0; else - control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS; + control->i2c_address = EEPROM_I2C_MADDR_4; return true; } +static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev, + struct amdgpu_ras_eeprom_control *control) +{ + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; + default: + return false; + } +} + static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, struct amdgpu_ras_eeprom_control *control) { + struct atom_context *atom_ctx = adev->mode_info.atom_context; u8 i2c_addr; if (!control) @@ -139,27 +180,34 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_VEGA20: - control->i2c_address = EEPROM_I2C_MADDR_VEGA20; + control->i2c_address = EEPROM_I2C_MADDR_0; break; case CHIP_ARCTURUS: return __get_eeprom_i2c_addr_arct(adev, control); case CHIP_SIENNA_CICHLID: - control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID; + control->i2c_address = EEPROM_I2C_MADDR_0; break; case CHIP_ALDEBARAN: - control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN; + if (strnstr(atom_ctx->vbios_version, "D673", + sizeof(atom_ctx->vbios_version))) + control->i2c_address = EEPROM_I2C_MADDR_4; + else + control->i2c_address = EEPROM_I2C_MADDR_0; break; + case CHIP_IP_DISCOVERY: + return __get_eeprom_i2c_addr_ip_discovery(adev, control); + default: return false; } switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): - control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0; + control->i2c_address = EEPROM_I2C_MADDR_4; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 6546552e596c..5c4f93ee0c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -62,7 +62,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, if (!res) goto fallback; - BUG_ON(start + size > res->num_pages << PAGE_SHIFT); + BUG_ON(start + size > res->size); cur->mem_type = res->mem_type; @@ -110,7 +110,7 @@ fallback: cur->size = size; cur->remaining = size; cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + WARN_ON(res && start + size > res->size); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d3558c34d406..dc474b809604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring) return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); } + +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_begin(ring); +} + +void amdgpu_ring_ib_end(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_end(ring); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 82c178a9033a..f752c7ae7f60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -39,6 +39,7 @@ struct amdgpu_vm; #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_HWIP_RINGS 8 #define AMDGPU_MAX_GFX_RINGS 2 +#define AMDGPU_MAX_SW_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 @@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) +#define AMDGPU_FENCE_FLAG_EXEC (1 << 3) #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) @@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); + void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); +u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring); +void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, + ktime_t timestamp); + /* * Rings. */ @@ -279,6 +286,10 @@ struct amdgpu_ring { bool is_mes_queue; uint32_t hw_queue_id; struct amdgpu_mes_ctx_data *mes_ctx; + + bool is_sw_ring; + unsigned int entry_index; + }; #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) @@ -307,6 +318,9 @@ struct amdgpu_ring { #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); +void amdgpu_ring_ib_end(struct amdgpu_ring *ring); + void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c new file mode 100644 index 000000000000..62079f0e3ee8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -0,0 +1,514 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/slab.h> +#include <drm/drm_print.h> + +#include "amdgpu_ring_mux.h" +#include "amdgpu_ring.h" +#include "amdgpu.h" + +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2) +#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000 + +static const struct ring_info { + unsigned int hw_pio; + const char *ring_name; +} sw_ring_info[] = { + { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"}, + { AMDGPU_RING_PRIO_2, "gfx_high"}, +}; + +static struct kmem_cache *amdgpu_mux_chunk_slab; + +static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux, + struct amdgpu_ring *ring) +{ + return ring->entry_index < mux->ring_entry_size ? + &mux->ring_entry[ring->entry_index] : NULL; +} + +/* copy packages on sw ring range[begin, end) */ +static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, + struct amdgpu_ring *ring, + u64 s_start, u64 s_end) +{ + u64 start, end; + struct amdgpu_ring *real_ring = mux->real_ring; + + start = s_start & ring->buf_mask; + end = s_end & ring->buf_mask; + + if (start == end) { + DRM_ERROR("no more data copied from sw ring\n"); + return; + } + if (start > end) { + amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start); + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], + (ring->ring_size >> 2) - start); + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end); + } else { + amdgpu_ring_alloc(real_ring, end - start); + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start); + } +} + +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_mux_entry *e = NULL; + struct amdgpu_mux_chunk *chunk; + uint32_t seq, last_seq; + int i; + + /*find low priority entries:*/ + if (!mux->s_resubmit) + return; + + for (i = 0; i < mux->num_ring_entries; i++) { + if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { + e = &mux->ring_entry[i]; + break; + } + } + + if (!e) { + DRM_ERROR("%s no low priority ring found\n", __func__); + return; + } + + last_seq = atomic_read(&e->ring->fence_drv.last_seq); + seq = mux->seqno_to_resubmit; + if (last_seq < seq) { + /*resubmit all the fences between (last_seq, seq]*/ + list_for_each_entry(chunk, &e->list, entry) { + if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) { + amdgpu_fence_update_start_timestamp(e->ring, + chunk->sync_seq, + ktime_get()); + amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, + chunk->start, + chunk->end); + mux->wptr_resubmit = chunk->end; + amdgpu_ring_commit(mux->real_ring); + } + } + } + + del_timer(&mux->resubmit_timer); + mux->s_resubmit = false; +} + +static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux) +{ + mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT); +} + +static void amdgpu_mux_resubmit_fallback(struct timer_list *t) +{ + struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer); + + if (!spin_trylock(&mux->lock)) { + amdgpu_ring_mux_schedule_resubmit(mux); + DRM_ERROR("reschedule resubmit\n"); + return; + } + amdgpu_mux_resubmit_chunks(mux); + spin_unlock(&mux->lock); +} + +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, + unsigned int entry_size) +{ + mux->real_ring = ring; + mux->num_ring_entries = 0; + + mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL); + if (!mux->ring_entry) + return -ENOMEM; + + mux->ring_entry_size = entry_size; + mux->s_resubmit = false; + + amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", + sizeof(struct amdgpu_mux_chunk), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!amdgpu_mux_chunk_slab) { + DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); + return -ENOMEM; + } + + spin_lock_init(&mux->lock); + timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0); + + return 0; +} + +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk, *chunk2; + int i; + + for (i = 0; i < mux->num_ring_entries; i++) { + e = &mux->ring_entry[i]; + list_for_each_entry_safe(chunk, chunk2, &e->list, entry) { + list_del(&chunk->entry); + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); + } + } + kmem_cache_destroy(amdgpu_mux_chunk_slab); + kfree(mux->ring_entry); + mux->ring_entry = NULL; + mux->num_ring_entries = 0; + mux->ring_entry_size = 0; +} + +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + + if (mux->num_ring_entries >= mux->ring_entry_size) { + DRM_ERROR("add sw ring exceeding max entry size\n"); + return -ENOENT; + } + + e = &mux->ring_entry[mux->num_ring_entries]; + ring->entry_index = mux->num_ring_entries; + e->ring = ring; + + INIT_LIST_HEAD(&e->list); + mux->num_ring_entries += 1; + return 0; +} + +void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr) +{ + struct amdgpu_mux_entry *e; + + spin_lock(&mux->lock); + + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) + amdgpu_mux_resubmit_chunks(mux); + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry for sw ring\n"); + spin_unlock(&mux->lock); + return; + } + + /* We could skip this set wptr as preemption in process. */ + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) { + spin_unlock(&mux->lock); + return; + } + + e->sw_cptr = e->sw_wptr; + /* Update cptr if the package already copied in resubmit functions */ + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit) + e->sw_cptr = mux->wptr_resubmit; + e->sw_wptr = wptr; + e->start_ptr_in_hw_ring = mux->real_ring->wptr; + + /* Skip copying for the packages already resubmitted.*/ + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) { + amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr); + e->end_ptr_in_hw_ring = mux->real_ring->wptr; + amdgpu_ring_commit(mux->real_ring); + } else { + e->end_ptr_in_hw_ring = mux->real_ring->wptr; + } + spin_unlock(&mux->lock); +} + +u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry for sw ring\n"); + return 0; + } + + return e->sw_wptr; +} + +/** + * amdgpu_ring_mux_get_rptr - get the readptr of the software ring + * @mux: the multiplexer the software rings attach to + * @ring: the software ring of which we calculate the readptr + * + * The return value of the readptr is not precise while the other rings could + * write data onto the real ring buffer.After overwriting on the real ring, we + * can not decide if our packages have been excuted or not read yet. However, + * this function is only called by the tools such as umr to collect the latest + * packages for the hang analysis. We assume the hang happens near our latest + * submit. Thus we could use the following logic to give the clue: + * If the readptr is between start and end, then we return the copy pointer + * plus the distance from start to readptr. If the readptr is before start, we + * return the copy pointer. Lastly, if the readptr is past end, we return the + * write pointer. + */ +u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + u64 readp, offset, start, end; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("no sw entry found!\n"); + return 0; + } + + readp = amdgpu_ring_get_rptr(mux->real_ring); + + start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask; + end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask; + if (start > end) { + if (readp <= end) + readp += mux->real_ring->ring_size >> 2; + end += mux->real_ring->ring_size >> 2; + } + + if (start <= readp && readp <= end) { + offset = readp - start; + e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask; + } else if (readp < start) { + e->sw_rptr = e->sw_cptr; + } else { + /* end < readptr */ + e->sw_rptr = e->sw_wptr; + } + + return e->sw_rptr; +} + +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + return amdgpu_ring_mux_get_rptr(mux, ring); +} + +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + return amdgpu_ring_mux_get_wptr(mux, ring); +} + +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr); +} + +/* Override insert_nop to prevent emitting nops to the software rings */ +void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + WARN_ON(!ring->is_sw_ring); +} + +const char *amdgpu_sw_ring_name(int idx) +{ + return idx < ARRAY_SIZE(sw_ring_info) ? + sw_ring_info[idx].ring_name : NULL; +} + +unsigned int amdgpu_sw_ring_priority(int idx) +{ + return idx < ARRAY_SIZE(sw_ring_info) ? + sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT; +} + +/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/ +static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_ring *ring; + int i, need_preempt; + + need_preempt = 0; + for (i = 0; i < mux->num_ring_entries; i++) { + ring = mux->ring_entry[i].ring; + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && + amdgpu_fence_count_emitted(ring) > 0) + return 0; + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && + amdgpu_fence_last_unsignaled_time_us(ring) > + AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US) + need_preempt = 1; + } + return need_preempt && !mux->s_resubmit; +} + +/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */ +static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux) +{ + int r; + + spin_lock(&mux->lock); + mux->pending_trailing_fence_signaled = true; + r = amdgpu_ring_preempt_ib(mux->real_ring); + spin_unlock(&mux->lock); + return r; +} + +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { + if (amdgpu_mcbp_scan(mux) > 0) + amdgpu_mcbp_trigger_preempt(mux); + return; + } + + amdgpu_ring_mux_start_ib(mux, ring); +} + +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + + WARN_ON(!ring->is_sw_ring); + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + return; + amdgpu_ring_mux_end_ib(mux, ring); +} + +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk; + + spin_lock(&mux->lock); + amdgpu_mux_resubmit_chunks(mux); + spin_unlock(&mux->lock); + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL); + if (!chunk) { + DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n"); + return; + } + + chunk->start = ring->wptr; + list_add_tail(&chunk->entry, &e->list); +} + +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + uint32_t last_seq = 0; + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk, *tmp; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + last_seq = atomic_read(&ring->fence_drv.last_seq); + + list_for_each_entry_safe(chunk, tmp, &e->list, entry) { + if (chunk->sync_seq <= last_seq) { + list_del(&chunk->entry); + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); + } + } +} + +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); + if (!chunk) { + DRM_ERROR("cannot find chunk!\n"); + return; + } + + chunk->end = ring->wptr; + chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq); + + scan_and_remove_signaled_chunk(mux, ring); +} + +bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_ring *ring = NULL; + int i; + + if (!mux->pending_trailing_fence_signaled) + return false; + + if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr)) + return false; + + for (i = 0; i < mux->num_ring_entries; i++) { + e = &mux->ring_entry[i]; + if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { + ring = e->ring; + break; + } + } + + if (!ring) { + DRM_ERROR("cannot find low priority ring\n"); + return false; + } + + amdgpu_fence_process(ring); + if (amdgpu_fence_count_emitted(ring) > 0) { + mux->s_resubmit = true; + mux->seqno_to_resubmit = ring->fence_drv.sync_seq; + amdgpu_ring_mux_schedule_resubmit(mux); + } + + mux->pending_trailing_fence_signaled = false; + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h new file mode 100644 index 000000000000..4be45fc14954 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h @@ -0,0 +1,103 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_RING_MUX__ +#define __AMDGPU_RING_MUX__ + +#include <linux/timer.h> +#include <linux/spinlock.h> +#include "amdgpu_ring.h" + +struct amdgpu_ring; + +/** + * struct amdgpu_mux_entry - the entry recording software rings copying information. + * @ring: the pointer to the software ring. + * @start_ptr_in_hw_ring: last start location copied to in the hardware ring. + * @end_ptr_in_hw_ring: last end location copied to in the hardware ring. + * @sw_cptr: the position of the copy pointer in the sw ring. + * @sw_rptr: the read pointer in software ring. + * @sw_wptr: the write pointer in software ring. + * @list: list head for amdgpu_mux_chunk + */ +struct amdgpu_mux_entry { + struct amdgpu_ring *ring; + u64 start_ptr_in_hw_ring; + u64 end_ptr_in_hw_ring; + u64 sw_cptr; + u64 sw_rptr; + u64 sw_wptr; + struct list_head list; +}; + +struct amdgpu_ring_mux { + struct amdgpu_ring *real_ring; + + struct amdgpu_mux_entry *ring_entry; + unsigned int num_ring_entries; + unsigned int ring_entry_size; + /*the lock for copy data from different software rings*/ + spinlock_t lock; + bool s_resubmit; + uint32_t seqno_to_resubmit; + u64 wptr_resubmit; + struct timer_list resubmit_timer; + + bool pending_trailing_fence_signaled; +}; + +/** + * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings. + * @entry: the list entry. + * @sync_seq: the fence seqno related with the saved IB. + * @start:- start location on the software ring. + * @end:- end location on the software ring. + */ +struct amdgpu_mux_chunk { + struct list_head entry; + uint32_t sync_seq; + u64 start; + u64 end; +}; + +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, + unsigned int entry_size); +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr); +u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); + +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); +void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); +const char *amdgpu_sw_ring_name(int idx); +unsigned int amdgpu_sw_ring_priority(int idx); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index cc7597a15fe9..2c1d82fc4c34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -121,6 +121,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u switch (op) { case 1: + mutex_lock(&psp->securedisplay_context.mutex); psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__QUERY_TA); ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA); @@ -131,8 +132,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u else psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status); } + mutex_unlock(&psp->securedisplay_context.mutex); break; case 2: + mutex_lock(&psp->securedisplay_context.mutex); psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id; @@ -146,6 +149,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status); } } + mutex_unlock(&psp->securedisplay_context.mutex); break; default: dev_err(adev->dev, "Invalid input: %s\n", str); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 090e66a1b284..bac7976975bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -259,6 +259,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; } +/* Free the entry back to the slab */ +static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) +{ + hash_del(&e->node); + dma_fence_put(e->fence); + kmem_cache_free(amdgpu_sync_slab, e); +} + /** * amdgpu_sync_peek_fence - get the next fence not signaled yet * @@ -280,9 +288,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (dma_fence_is_signaled(f)) { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); + amdgpu_sync_entry_free(e); continue; } if (ring && s_fence) { @@ -355,15 +361,42 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) if (r) return r; } else { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); + amdgpu_sync_entry_free(e); } } return 0; } +/** + * amdgpu_sync_push_to_job - push fences into job + * @sync: sync object to get the fences from + * @job: job to push the fences into + * + * Add all unsignaled fences from sync to job. + */ +int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + struct dma_fence *f; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + f = e->fence; + if (dma_fence_is_signaled(f)) { + amdgpu_sync_entry_free(e); + continue; + } + + dma_fence_get(f); + r = drm_sched_job_add_dependency(&job->base, f); + if (r) + return r; + } + return 0; +} + int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) { struct amdgpu_sync_entry *e; @@ -375,9 +408,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) if (r) return r; - hash_del(&e->node); - dma_fence_put(e->fence); - kmem_cache_free(amdgpu_sync_slab, e); + amdgpu_sync_entry_free(e); } return 0; @@ -396,11 +427,8 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) struct hlist_node *tmp; unsigned int i; - hash_for_each_safe(sync->fences, i, tmp, e, node) { - hash_del(&e->node); - dma_fence_put(e->fence); - kmem_cache_free(amdgpu_sync_slab, e); - } + hash_for_each_safe(sync->fences, i, tmp, e, node) + amdgpu_sync_entry_free(e); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 2d5c613cda10..cf1e9e858efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -30,6 +30,7 @@ struct dma_fence; struct dma_resv; struct amdgpu_device; struct amdgpu_ring; +struct amdgpu_job; enum amdgpu_sync_mode { AMDGPU_SYNC_ALWAYS, @@ -54,6 +55,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 5e6ddc7e101c..677ad2016976 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -127,7 +127,7 @@ TRACE_EVENT(amdgpu_bo_create, TP_fast_assign( __entry->bo = bo; - __entry->pages = bo->tbo.resource->num_pages; + __entry->pages = PFN_UP(bo->tbo.resource->size); __entry->type = bo->tbo.resource->mem_type; __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b64938ed8cb6..b4236572eae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -58,6 +58,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" #include "amdgpu_ras.h" +#include "amdgpu_hmm.h" #include "amdgpu_atomfirmware.h" #include "amdgpu_res_cursor.h" #include "bif/bif_4_1_d.h" @@ -189,7 +190,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct amdgpu_device *adev = ring->adev; unsigned offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; - struct dma_fence *fence; struct amdgpu_job *job; void *cpu_addr; uint64_t flags; @@ -229,7 +229,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; @@ -269,18 +271,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, } } - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - - dma_fence_put(fence); - - return r; - -error_free: - amdgpu_job_free(job); - return r; + dma_fence_put(amdgpu_job_submit(job)); + return 0; } /** @@ -381,7 +373,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, dst.offset = 0; r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, - new_mem->num_pages << PAGE_SHIFT, + new_mem->size, amdgpu_bo_encrypted(abo), bo->base.resv, &fence); if (r) @@ -424,7 +416,7 @@ error: static bool amdgpu_mem_visible(struct amdgpu_device *adev, struct ttm_resource *mem) { - u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT; + u64 mem_size = (u64)mem->size; struct amdgpu_res_cursor cursor; u64 end; @@ -571,7 +563,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; + size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -691,9 +683,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, } readonly = amdgpu_ttm_tt_is_readonly(ttm); - r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, - ttm->num_pages, range, readonly, - true, NULL); + r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, + readonly, NULL, pages, range); out_unlock: mmap_read_unlock(mm); if (r) @@ -1154,8 +1145,9 @@ int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, * @addr: The address in the current tasks VM space to use * @flags: Requirements of userptr object. * - * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages - * to current task + * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to + * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to + * initialize GPU VM for a KFD process. */ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, uint64_t addr, uint32_t flags) @@ -1394,7 +1386,8 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, } static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, - unsigned long offset, void *buf, int len, int write) + unsigned long offset, void *buf, + int len, int write) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -1418,26 +1411,27 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, memcpy(adev->mman.sdma_access_ptr, buf, len); num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4, AMDGPU_IB_POOL_DELAYED, + &job); if (r) goto out; amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); - src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; + src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + + src_mm.start; dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo); if (write) swap(src_addr, dst_addr); - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, + PAGE_SIZE, false); amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) { - amdgpu_job_free(job); - goto out; - } + fence = amdgpu_job_submit(job); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) r = -ETIMEDOUT; @@ -1537,6 +1531,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) NULL, &adev->mman.fw_vram_usage_va); } +/* + * Driver Reservation functions + */ +/** + * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram + * + * @adev: amdgpu_device pointer + * + * free drv reserved vram if it has been reserved. + */ +static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, + NULL, + &adev->mman.drv_vram_usage_va); +} + /** * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw * @@ -1563,6 +1574,32 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) &adev->mman.fw_vram_usage_va); } +/** + * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from drv. + */ +static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) +{ + u64 vram_size = adev->gmc.visible_vram_size; + + adev->mman.drv_vram_usage_va = NULL; + adev->mman.drv_vram_usage_reserved_bo = NULL; + + if (adev->mman.drv_vram_usage_size == 0 || + adev->mman.drv_vram_usage_size > vram_size) + return 0; + + return amdgpu_bo_create_kernel_at(adev, + adev->mman.drv_vram_usage_start_offset, + adev->mman.drv_vram_usage_size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mman.drv_vram_usage_reserved_bo, + &adev->mman.drv_vram_usage_va); +} + /* * Memoy training reservation functions */ @@ -1731,6 +1768,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } /* + *The reserved vram for driver must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_ttm_drv_reserve_vram_init(adev); + if (r) + return r; + + /* * only NAVI10 and onwards ASIC support for IP discovery. * If IP discovery enabled, a block of memory should be * reserved for IP discovey. @@ -1855,6 +1900,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); + amdgpu_ttm_drv_reserve_vram_fini(adev); if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -1936,7 +1982,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, AMDGPU_IB_POOL_DELAYED; int r; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4, pool, job); if (r) return r; @@ -1946,17 +1994,11 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, adev->gart.bo); (*job)->vm_needs_flush = true; } - if (resv) { - r = amdgpu_sync_resv(adev, &(*job)->sync, resv, - AMDGPU_SYNC_ALWAYS, - AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - DRM_ERROR("sync failed (%d).\n", r); - amdgpu_job_free(*job); - return r; - } - } - return 0; + if (!resv) + return 0; + + return drm_sched_job_add_resv_dependencies(&(*job)->base, resv, + DMA_RESV_USAGE_BOOKKEEP); } int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2001,8 +2043,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, if (direct_submit) r = amdgpu_job_submit_direct(job, ring, fence); else - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, fence); + *fence = amdgpu_job_submit(job); if (r) goto error_free; @@ -2047,16 +2088,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, fence); - if (r) - goto error_free; - + *fence = amdgpu_job_submit(job); return 0; - -error_free: - amdgpu_job_free(job); - return r; } int amdgpu_fill_buffer(struct amdgpu_bo *bo, @@ -2272,9 +2305,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, if (p->mapping != adev->mman.bdev.dev_mapping) return -EPERM; - ptr = kmap(p); + ptr = kmap_local_page(p); r = copy_to_user(buf, ptr + off, bytes); - kunmap(p); + kunmap_local(ptr); if (r) return -EFAULT; @@ -2323,9 +2356,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, if (p->mapping != adev->mman.bdev.dev_mapping) return -EPERM; - ptr = kmap(p); + ptr = kmap_local_page(p); r = copy_from_user(ptr + off, buf, bytes); - kunmap(p); + kunmap_local(ptr); if (r) return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index a37207011a69..b4d8ba2789f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -86,6 +86,12 @@ struct amdgpu_mman { struct amdgpu_bo *fw_vram_usage_reserved_bo; void *fw_vram_usage_va; + /* driver VRAM reservation */ + u64 drv_vram_usage_start_offset; + u64 drv_vram_usage_size; + struct amdgpu_bo *drv_vram_usage_reserved_bo; + void *drv_vram_usage_va; + /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ struct amdgpu_bo *sdma_access_bo; void *sdma_access_ptr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 1c36235b4539..552e06929229 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -125,6 +125,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, + PSP_FW_TYPE_MAX_INDEX, }; /* version_major=2, version_minor=0 */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index aad3c8b4c810..f76c19fc0392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -22,6 +22,59 @@ */ #include "amdgpu.h" +#include "umc_v6_7.h" + +static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) +{ + switch (adev->ip_versions[UMC_HWIP][0]) { + case IP_VERSION(6, 7, 0): + umc_v6_7_convert_error_address(adev, + err_data, err_addr, ch_inst, umc_inst); + break; + default: + dev_warn(adev->dev, + "UMC address to Physical address translation is not supported\n"); + return AMDGPU_RAS_FAIL; + } + + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) +{ + struct ras_err_data err_data = {0, 0, 0, NULL}; + int ret = AMDGPU_RAS_FAIL; + + err_data.err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, + "Failed to alloc memory for umc error record in MCA notifier!\n"); + return AMDGPU_RAS_FAIL; + } + + /* + * Translate UMC channel address to Physical address + */ + ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr, + ch_inst, umc_inst); + if (ret) + goto out; + + if (amdgpu_bad_page_threshold != 0) { + amdgpu_ras_add_bad_pages(adev, err_data.err_addr, + err_data.err_addr_cnt); + amdgpu_ras_save_bad_pages(adev); + } + +out: + kfree(err_data.err_addr); + return ret; +} static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, void *ras_error_status, @@ -112,23 +165,29 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - void *ras_error_status, - bool reset) +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) { - int ret; - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + int ret = AMDGPU_RAS_SUCCESS; - ret = - amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); + if (!adev->gmc.xgmi.connected_to_cpu) { + struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data->ue_count; - obj->err_data.ce_count += err_data->ce_count; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + } + } else if (reset) { + /* MCA poison handler is only responsible for GPU reset, + * let MCA notifier do page retirement. + */ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index e46439274f3a..a6951160f13a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -51,9 +51,6 @@ struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); - void (*convert_ras_error_address)(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst); void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, @@ -86,9 +83,7 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - void *ras_error_status, - bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -101,4 +96,6 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry); +int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6eac649499d3..e00bb654e24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1132,7 +1132,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, unsigned offset_idx = 0; unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; - r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + 64, direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; @@ -1175,16 +1177,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; } else { - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, - AMDGPU_SYNC_ALWAYS, - AMDGPU_FENCE_OWNER_UNDEFINED); + r = drm_sched_job_add_resv_dependencies(&job->base, + bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL); if (r) goto err_free; - r = amdgpu_job_submit(job, &adev->uvd.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err_free; + f = amdgpu_job_submit(job); } amdgpu_bo_reserve(bo, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 02cb3a12dd76..b239e874f2d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -450,8 +450,10 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) return r; @@ -538,7 +540,9 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence *f = NULL; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, + r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + ib_size_dw * 4, direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED, &job); if (r) @@ -570,8 +574,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, if (direct) r = amdgpu_job_submit_direct(job, ring, &f); else - r = amdgpu_job_submit(job, &ring->adev->vce.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); + f = amdgpu_job_submit(job); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ce64ca1c6e66..b1622ac9949f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -603,15 +603,16 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, struct dma_fence **fence) { + u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); struct amdgpu_device *adev = ring->adev; struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; - uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); int i, r; - r = amdgpu_job_alloc_with_ib(adev, 64, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + 64, AMDGPU_IB_POOL_DIRECT, + &job); if (r) goto err; @@ -790,8 +791,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, if (sq) ib_size_dw += 8; - r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) goto err; @@ -919,8 +921,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand if (sq) ib_size_dw += 8; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) return r; @@ -985,8 +988,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han if (sq) ib_size_dw += 8; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) return r; @@ -1251,3 +1255,20 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, return 0; } + +void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev) +{ + if (!adev->vcn.ras) + return; + + amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + + strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); + adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; + adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->vcn.ras->ras_block.ras_late_init) + adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 253ea6b159df..dbb8d68a30c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -399,5 +399,6 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index c73abe54d974..15544f262ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -64,6 +64,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) ddev->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; + + /* enable mcbp for sriov asic_type before soc21 */ + amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0; + } void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, @@ -424,11 +428,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, struct eeprom_table_record bp; uint64_t retired_page; uint32_t bp_idx, bp_cnt; + void *vram_usage_va = NULL; + + if (adev->mman.fw_vram_usage_va) + vram_usage_va = adev->mman.fw_vram_usage_va; + else + vram_usage_va = adev->mman.drv_vram_usage_va; if (bp_block_size) { bp_cnt = bp_block_size / sizeof(uint64_t); for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { - retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va + + retired_page = *(uint64_t *)(vram_usage_va + bp_block_offset + bp_idx * sizeof(uint64_t)); bp.retired_page = retired_page; @@ -639,7 +649,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; - if (adev->mman.fw_vram_usage_va != NULL) { + if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { + DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); + } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { /* go through this logic in ip_init and reset to init workqueue*/ amdgpu_virt_exchange_data(adev); @@ -662,32 +674,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) uint32_t bp_block_size = 0; struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; - if (adev->mman.fw_vram_usage_va != NULL) { - - adev->virt.fw_reserve.p_pf2vf = - (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); - adev->virt.fw_reserve.p_vf2pf = - (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { + if (adev->mman.fw_vram_usage_va) { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + } else if (adev->mman.drv_vram_usage_va) { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + } amdgpu_virt_read_pf2vf_data(adev); amdgpu_virt_write_vf2pf_data(adev); /* bad page handling for version 2 */ if (adev->virt.fw_reserve.p_pf2vf->version == 2) { - pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; + pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; - bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | - ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); - bp_block_size = pf2vf_v2->bp_block_size; + bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | + ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); + bp_block_size = pf2vf_v2->bp_block_size; - if (bp_block_size && !adev->virt.ras_init_done) - amdgpu_virt_init_ras_err_handler_data(adev); + if (bp_block_size && !adev->virt.ras_init_done) + amdgpu_virt_init_ras_err_handler_data(adev); - if (adev->virt.ras_init_done) - amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); - } + if (adev->virt.ras_init_done) + amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 49c4347d154c..2b9d806e23af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -75,6 +75,8 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; +enum idh_request; + /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -84,7 +86,8 @@ struct amdgpu_virt_ops { int (*req_init_data)(struct amdgpu_device *adev); int (*reset_gpu)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); - void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); + void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, + u32 data1, u32 data2, u32 data3); }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 500a1dc4fe02..53ff91fc6cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -498,7 +498,7 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; @@ -513,6 +513,10 @@ static int amdgpu_vkms_sw_init(void *handle) return r; } + r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); + if (r) + return r; + drm_kms_helper_poll_init(adev_to_drm(adev)); adev->mode_info.mode_config_initialized = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 003aa9e47085..c05cff979004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -45,22 +45,43 @@ /** * DOC: GPUVM * - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages + * GPUVM is the MMU functionality provided on the GPU. + * GPUVM is similar to the legacy GART on older asics, however + * rather than there being a single global GART table + * for the entire GPU, there can be multiple GPUVM page tables active + * at any given time. The GPUVM page tables can contain a mix + * VRAM pages and system pages (both memory and MMIO) and system pages * can be mapped as snooped (cached system pages) or unsnooped * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When executing a command buffer, - * the kernel tells the ring what VMID to use for that command + * + * Each active GPUVM has an ID associated with it and there is a page table + * linked with each VMID. When executing a command buffer, + * the kernel tells the engine what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel * sets up their pages tables accordingly when they submit their * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. + * The hardware supports up to 16 active GPUVMs at any given time. + * + * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending + * on the ASIC family. GPUVM supports RWX attributes on each page as well + * as other features such as encryption and caching attributes. + * + * VMID 0 is special. It is the GPUVM used for the kernel driver. In + * addition to an aperture managed by a page table, VMID 0 also has + * several other apertures. There is an aperture for direct access to VRAM + * and there is a legacy AGP aperture which just forwards accesses directly + * to the matching system physical addresses (or IOVAs when an IOMMU is + * present). These apertures provide direct access to these memories without + * incurring the overhead of a page table. VMID 0 is used by the kernel + * driver for tasks like memory management. + * + * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. + * For user applications, each application can have their own unique GPUVM + * address space. The application manages the address space and the kernel + * driver manages the GPUVM page tables for each process. If an GPU client + * accesses an invalid page, it will generate a GPU page fault, similar to + * accessing an invalid page on a CPU. */ #define START(node) ((node)->start) @@ -541,6 +562,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; + amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -601,6 +623,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 69e105fa41f6..59cf64216fbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -47,6 +47,32 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) return r; } +/* Allocate a new job for @count PTE updates */ +static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, + unsigned int count) +{ + enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE + : AMDGPU_IB_POOL_DELAYED; + struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate + : &p->vm->delayed; + unsigned int ndw; + int r; + + /* estimate how many dw we need */ + ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + if (p->pages_addr) + ndw += count * 2; + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); + + r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM, + ndw * 4, pool, &p->job); + if (r) + return r; + + p->num_dw_left = ndw; + return 0; +} + /** * amdgpu_vm_sdma_prepare - prepare SDMA command submission * @@ -61,21 +87,22 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode) { - enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE - : AMDGPU_IB_POOL_DELAYED; - unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + struct amdgpu_sync sync; int r; - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job); + r = amdgpu_vm_sdma_alloc_job(p, 0); if (r) return r; - p->num_dw_left = ndw; - if (!resv) return 0; - return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm); + amdgpu_sync_create(&sync); + r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm); + if (!r) + r = amdgpu_sync_push_to_job(&sync, p->job); + amdgpu_sync_free(&sync); + return r; } /** @@ -91,20 +118,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { struct amdgpu_ib *ib = p->job->ibs; - struct drm_sched_entity *entity; struct amdgpu_ring *ring; struct dma_fence *f; - int r; - entity = p->immediate ? &p->vm->immediate : &p->vm->delayed; - ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); + ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, + sched); WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); WARN_ON(ib->length_dw > p->num_dw_left); - r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); - if (r) - goto error; + f = amdgpu_job_submit(p->job); if (p->unlocked) { struct dma_fence *tmp = dma_fence_get(f); @@ -127,10 +150,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, } dma_fence_put(f); return 0; - -error: - amdgpu_job_free(p->job); - return r; } /** @@ -210,8 +229,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, uint64_t flags) { struct amdgpu_bo *bo = &vmbo->bo; - enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE - : AMDGPU_IB_POOL_DELAYED; struct dma_resv_iter cursor; unsigned int i, ndw, nptes; struct dma_fence *fence; @@ -221,7 +238,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, /* Wait for PD/PT moves to be completed */ dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); dma_resv_for_each_fence_unlocked(&cursor, fence) { - r = amdgpu_sync_fence(&p->job->sync, fence); + r = drm_sched_job_add_dependency(&p->job->base, fence); if (r) { dma_resv_iter_end(&cursor); return r; @@ -238,19 +255,9 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (r) return r; - /* estimate how many dw we need */ - ndw = 32; - if (p->pages_addr) - ndw += count * 2; - ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); - ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); - - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, - &p->job); + r = amdgpu_vm_sdma_alloc_job(p, count); if (r) return r; - - p->num_dw_left = ndw; } if (!p->pages_addr) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 80dd1343594c..faa12146635c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -439,7 +439,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT; + remaining_size = (u64)vres->base.size; mutex_lock(&mgr->lock); while (remaining_size) { @@ -498,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, LIST_HEAD(temp); trim_list = &vres->blocks; - original_size = (u64)vres->base.num_pages << PAGE_SHIFT; + original_size = (u64)vres->base.size; /* * If size value is rounded up to min_block_size, trim the last @@ -533,8 +533,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, amdgpu_vram_mgr_block_size(block); start >>= PAGE_SHIFT; - if (start > vres->base.num_pages) - start -= vres->base.num_pages; + if (start > PFN_UP(vres->base.size)) + start -= PFN_UP(vres->base.size); else start = 0; vres->base.start = max(vres->base.start, start); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 47159e9a0884..4b9e7b050ccd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) if (ret) { dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n"); kobject_put(&hive->kobj); - kfree(hive); hive = NULL; goto pro_end; } @@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); ret = -ENOMEM; kobject_put(&hive->kobj); - kfree(hive); hive = NULL; goto pro_end; } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 6be9ac2b9c5b..18ae9433e463 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2081,8 +2081,11 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) } } record += fake_edid_record->ucFakeEDIDLength ? - fake_edid_record->ucFakeEDIDLength + 2 : - sizeof(ATOM_FAKE_EDID_PATCH_RECORD); + struct_size(fake_edid_record, + ucFakeEDIDString, + fake_edid_record->ucFakeEDIDLength) : + /* empty fake edid record must be 3 bytes long */ + sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed..248f1a4e915f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2800,8 +2801,6 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2830,6 +2829,17 @@ static int dce_v10_0_sw_init(void *handle) if (r) return r; + /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ + adev_to_drm(adev)->vblank_disable_immediate = true; + + r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); + if (r) + return r; + + INIT_WORK(&adev->hotplug_work, + amdgpu_display_hotplug_work_func); + drm_kms_helper_poll_init(adev_to_drm(adev)); adev->mode_info.mode_config_initialized = true; @@ -2892,6 +2902,8 @@ static int dce_v10_0_hw_fini(void *handle) dce_v10_0_pageflip_interrupt_fini(adev); + flush_work(&adev->hotplug_work); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb..cd9c19060d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2918,8 +2919,6 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2949,6 +2948,17 @@ static int dce_v11_0_sw_init(void *handle) if (r) return r; + /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ + adev_to_drm(adev)->vblank_disable_immediate = true; + + r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); + if (r) + return r; + + INIT_WORK(&adev->hotplug_work, + amdgpu_display_hotplug_work_func); + drm_kms_helper_poll_init(adev_to_drm(adev)); adev->mode_info.mode_config_initialized = true; @@ -3022,6 +3032,8 @@ static int dce_v11_0_hw_fini(void *handle) dce_v11_0_pageflip_interrupt_fini(adev); + flush_work(&adev->hotplug_work); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index b1c44fab074f..76323deecc58 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -23,6 +23,7 @@ #include <linux/pci.h> +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2675,7 +2676,6 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; r = amdgpu_display_modeset_create_props(adev); if (r) @@ -2706,6 +2706,18 @@ static int dce_v6_0_sw_init(void *handle) if (r) return r; + /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ + adev_to_drm(adev)->vblank_disable_immediate = true; + + r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); + if (r) + return r; + + /* Pre-DCE11 */ + INIT_WORK(&adev->hotplug_work, + amdgpu_display_hotplug_work_func); + drm_kms_helper_poll_init(adev_to_drm(adev)); return r; @@ -2764,6 +2776,8 @@ static int dce_v6_0_hw_fini(void *handle) dce_v6_0_pageflip_interrupt_fini(adev); + flush_work(&adev->hotplug_work); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a22b45c92792..01cf3ab111cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2701,8 +2702,6 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2731,6 +2730,18 @@ static int dce_v8_0_sw_init(void *handle) if (r) return r; + /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ + adev_to_drm(adev)->vblank_disable_immediate = true; + + r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); + if (r) + return r; + + /* Pre-DCE11 */ + INIT_WORK(&adev->hotplug_work, + amdgpu_display_hotplug_work_func); + drm_kms_helper_poll_init(adev_to_drm(adev)); adev->mode_info.mode_config_initialized = true; @@ -2791,6 +2802,8 @@ static int dce_v8_0_hw_fini(void *handle) dce_v8_0_pageflip_interrupt_fini(adev); + flush_work(&adev->hotplug_work); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index af94ac580d3e..49d34c7bbf20 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4453,8 +4453,6 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - adev->gfx.funcs = &gfx_v10_0_gfx_funcs; - switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): @@ -6911,6 +6909,8 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(*mqd)); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); amdgpu_ring_init_mqd(ring); @@ -7593,6 +7593,8 @@ static int gfx_v10_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.funcs = &gfx_v10_0_gfx_funcs; + switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): @@ -8489,7 +8491,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) @@ -8664,7 +8666,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, { uint32_t dw2 = 0; - if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev)) + if (amdgpu_mcbp) gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0fecc5bf45bc..a56c6e106d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): soc15_program_register_sequence(adev, golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); @@ -843,7 +848,6 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) { - adev->gfx.funcs = &gfx_v11_0_gfx_funcs; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): @@ -856,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1285,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1626,7 +1632,8 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + if (!amdgpu_sriov_vf(adev)) + WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v11_0_setup_rb(adev); gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); @@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) for (i = 0; i < adev->usec_timeout; i++) { cp_status = RREG32_SOC15(GC, 0, regCP_STAT); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1)) + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -4004,6 +4012,8 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(*mqd)); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); amdgpu_ring_init_mqd(ring); @@ -4390,7 +4400,6 @@ static int gfx_v11_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -4409,15 +4418,14 @@ static int gfx_v11_0_hw_fini(void *handle) amdgpu_mes_kiq_hw_fini(adev); } - if (amdgpu_sriov_vf(adev)) { - gfx_v11_0_cp_gfx_enable(adev, false); - /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ - tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); - tmp &= 0xffffff00; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - + if (amdgpu_sriov_vf(adev)) + /* Remove the steps disabling CPG and clearing KIQ position, + * so that CP could perform IDLE-SAVE during switch. Those + * steps are necessary to avoid a DMAR error in gfx9 but it is + * not reproduced on gfx11. + */ return 0; - } + gfx_v11_0_cp_enable(adev, false); gfx_v11_0_enable_gui_idle_interrupt(adev, false); @@ -4656,6 +4664,8 @@ static int gfx_v11_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.funcs = &gfx_v11_0_gfx_funcs; + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); @@ -4673,6 +4683,26 @@ static int gfx_v11_0_early_init(void *handle) return 0; } +static int gfx_v11_0_ras_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if *gfx_common_if; + int ret; + + gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!gfx_common_if) + return -ENOMEM; + + gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; + + ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); + if (ret) + dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); + + kfree(gfx_common_if); + return 0; +} + static int gfx_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4686,6 +4716,12 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { + r = gfx_v11_0_ras_late_init(handle); + if (r) + return r; + } + return 0; } @@ -5022,6 +5058,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5055,6 +5092,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, amdgpu_gfx_off_ctrl(adev, enable); break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): gfx_v11_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -5078,6 +5116,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5299,7 +5338,7 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) @@ -6060,6 +6099,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7f0b18b0d4c4..d47135606e3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4643,6 +4643,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0320be4a5fc6..f202b45c413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -47,6 +47,7 @@ #include "amdgpu_ras.h" +#include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" #include "gfx_v9_4_2.h" @@ -56,6 +57,7 @@ #include "asic_reg/gc/gc_9_0_default.h" #define GFX9_NUM_GFX_RINGS 1 +#define GFX9_NUM_SW_GFX_RINGS 2 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L @@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); if (action == PREEMPT_QUEUES_NO_UNMAP) { - amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); - amdgpu_ring_write(kiq_ring, seq); + amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } else { amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0); @@ -1564,7 +1567,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) mask = 1; cu_bitmap = 0; counter = 0; - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (cu_info->bitmap[i][j] & mask) { @@ -1583,7 +1586,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = cu_bitmap; } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1605,7 +1608,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1654,7 +1657,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1919,8 +1922,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) u32 gb_addr_config; int err; - adev->gfx.funcs = &gfx_v9_0_gfx_funcs; - switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): adev->gfx.config.max_hw_contexts = 8; @@ -2105,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned int hw_prio; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): @@ -2188,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle) sprintf(ring->name, "gfx_%d", i); ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + + /* disable scheduler on the real ring */ + ring->no_scheduler = true; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2195,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle) return r; } + /* set up the software rings */ + if (adev->gfx.num_gfx_rings) { + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { + ring = &adev->gfx.sw_gfx_ring[i]; + ring->ring_obj = NULL; + sprintf(ring->name, amdgpu_sw_ring_name(i)); + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + ring->is_sw_ring = true; + hw_prio = amdgpu_sw_ring_priority(i); + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, + NULL); + if (r) + return r; + ring->wptr = 0; + } + + /* init the muxer and add software rings */ + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], + GFX9_NUM_SW_GFX_RINGS); + if (r) { + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); + return r; + } + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, + &adev->gfx.sw_gfx_ring[i]); + if (r) { + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); + return r; + } + } + } + /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; for (i = 0; i < adev->gfx.mec.num_mec; ++i) { @@ -2245,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->gfx.num_gfx_rings) { + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); + amdgpu_ring_mux_fini(&adev->gfx.muxer); + } + for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -2324,13 +2370,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v9_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -2467,14 +2513,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } if (k == adev->usec_timeout) { - gfx_v9_0_select_se_sh(adev, 0xffffffff, + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", @@ -2483,7 +2529,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3583,6 +3629,8 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v9_0_mqd_init(ring); @@ -4539,6 +4587,8 @@ static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.funcs = &gfx_v9_0_gfx_funcs; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) adev->gfx.num_gfx_rings = 0; @@ -5155,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { control |= INDIRECT_BUFFER_PRE_ENB(1); + if (flags & AMDGPU_IB_PREEMPTED) + control |= INDIRECT_BUFFER_PRE_RESUME(1); + if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) - gfx_v9_0_ring_emit_de_meta(ring); + gfx_v9_0_ring_emit_de_meta(ring, + (!amdgpu_sriov_vf(ring->adev) && + flags & AMDGPU_IB_PREEMPTED) ? + true : false); } amdgpu_ring_write(ring, header); @@ -5214,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + uint32_t dw2 = 0; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | - EOP_TC_NC_ACTION_EN) : - (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN)) | - EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + + if (writeback) { + dw2 = EOP_TC_NC_ACTION_EN; + } else { + dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | + EOP_TC_MD_ACTION_EN; + } + dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5); + if (exec) + dw2 |= EOP_EXEC; + + amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); /* @@ -5329,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } -static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) { + struct amdgpu_device *adev = ring->adev; struct v9_ce_ib_state ce_payload = {0}; - uint64_t csa_addr; + uint64_t offset, ce_payload_gpu_addr; + void *ce_payload_cpu_addr; int cnt; cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - csa_addr = amdgpu_csa_vaddr(ring->adev); + + if (ring->is_mes_queue) { + offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = + amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ce_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + } else { + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + } amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | WRITE_DATA_DST_SEL(8) | WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0)); - amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); - amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); - amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); + amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + + if (resume) + amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, + sizeof(ce_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, + sizeof(ce_payload) >> 2); +} + +static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + ring->trail_seq += 1; + amdgpu_ring_alloc(ring, 13); + gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); + /*reset the CP_VMID_PREEMPT after trailing fence*/ + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), + 0x0); + + /* assert IB preemption, emit the trailing fence */ + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, + ring->trail_fence_gpu_addr, + ring->trail_seq); + + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*ring->trail_fence_cpu_addr)) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); + } + + amdgpu_ring_commit(ring); + + /* deassert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) { + struct amdgpu_device *adev = ring->adev; struct v9_de_ib_state de_payload = {0}; - uint64_t csa_addr, gds_addr; + uint64_t offset, gds_addr, de_payload_gpu_addr; + void *de_payload_cpu_addr; int cnt; - csa_addr = amdgpu_csa_vaddr(ring->adev); - gds_addr = csa_addr + 4096; + if (ring->is_mes_queue) { + offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = + amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + de_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gds_backup) + + offsetof(struct v9_gfx_meta_data, de_payload); + gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + } else { + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); + } + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -5365,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) WRITE_DATA_DST_SEL(8) | WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0)); - amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); - amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); - amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); + amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + + if (resume) + amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, + sizeof(de_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&de_payload, + sizeof(de_payload) >> 2); } static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, @@ -5383,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; - if (amdgpu_sriov_vf(ring->adev)) - gfx_v9_0_ring_emit_ce_meta(ring); + gfx_v9_0_ring_emit_ce_meta(ring, + (!amdgpu_sriov_vf(ring->adev) && + flags & AMDGPU_IB_PREEMPTED) ? true : false); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -5710,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + if (adev->gfx.num_gfx_rings && + !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + } break; case 1: case 2: @@ -6482,7 +6659,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0x0, k); + amdgpu_gfx_select_se_sh(adev, j, 0x0, k); RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); } } @@ -6544,7 +6721,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0, k); + amdgpu_gfx_select_se_sh(adev, j, 0, k); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) @@ -6559,7 +6736,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -6707,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, + .preempt_ib = gfx_v9_0_ring_preempt_ib, + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, + .emit_wreg = gfx_v9_0_ring_emit_wreg, + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, + .emit_mem_sync = gfx_v9_0_emit_mem_sync, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .secure_submission_supported = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = amdgpu_sw_ring_get_rptr_gfx, + .get_wptr = amdgpu_sw_ring_get_wptr_gfx, + .set_wptr = amdgpu_sw_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + * the first COND_EXEC jump to the place just + * prior to this double SWITCH_BUFFER + */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2 + /* SWITCH_BUFFER */ + 7, /* gfx_v9_0_emit_mem_sync */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_sw_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v9_ring_emit_sb, + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -6792,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; + if (adev->gfx.num_gfx_rings) { + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; } @@ -6963,7 +7201,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); gfx_v9_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -6996,7 +7234,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 8cf53e039c11..3f8676d23a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -397,6 +397,9 @@ static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 0); WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); + if (amdgpu_sriov_vf(adev)) + return; + /* Setup L2 cache */ WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index 5d3fffd4929f..080ff11ca305 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -154,6 +154,9 @@ static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; + if (amdgpu_sriov_vf(adev)) + return; + /* Disable AGP. */ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0); @@ -354,18 +357,6 @@ static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev) static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) { - /* - * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are - * VF copy registers so vbios post doesn't program them, for - * SRIOV driver need to program them - */ - WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE, - adev->gmc.vram_start >> 24); - WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP, - adev->gmc.vram_end >> 24); - } - /* GART Enable. */ gfxhub_v3_0_3_init_gart_aperture_regs(adev); gfxhub_v3_0_3_init_system_aperture_regs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f513e2c2e964..21e46817d82d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -371,7 +371,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, &job); if (r) goto error_alloc; @@ -380,10 +382,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, job->vm_needs_flush = true; job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_submit; + fence = amdgpu_job_submit(job); mutex_unlock(&adev->mman.gtt_window_lock); @@ -392,9 +391,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; -error_submit: - amdgpu_job_free(job); - error_alloc: mutex_unlock(&adev->mman.gtt_window_lock); DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); @@ -612,6 +608,8 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags) { + struct amdgpu_bo *bo = mapping->bo_va->base.bo; + *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -628,6 +626,11 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, *flags |= AMDGPU_PTE_SYSTEM; *flags &= ~AMDGPU_PTE_VALID; } + + if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED)) + *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | + AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); } static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 66dfb574cc7d..4326078689cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -503,6 +503,8 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags) { + struct amdgpu_bo *bo = mapping->bo_va->base.bo; + *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -519,6 +521,11 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, *flags |= AMDGPU_PTE_SYSTEM; *flags &= ~AMDGPU_PTE_VALID; } + + if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED)) + *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | + AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); } static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -551,7 +558,10 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.node_inst_num = adev->gmc.num_umc; adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; - adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + if (adev->umc.node_inst_num == 4) + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0]; + else + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; adev->umc.ras = &umc_v8_10_ras; break; case IP_VERSION(8, 11, 0): @@ -749,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): adev->num_vmhubs = 2; /* * To fulfill 4-level page support, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 67ca16a8027c..50386eb2eec8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1113,6 +1113,74 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } +static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; + bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; + unsigned int mtype; + bool snoop = false; + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + if (is_vram) { + if (bo_adev == adev) { + if (uncached) + mtype = MTYPE_UC; + else if (coherent) + mtype = MTYPE_CC; + else + mtype = MTYPE_RW; + /* FIXME: is this still needed? Or does + * amdgpu_ttm_tt_pde_flags already handle this? + */ + if (adev->ip_versions[GC_HWIP][0] == + IP_VERSION(9, 4, 2) && + adev->gmc.xgmi.connected_to_cpu) + snoop = true; + } else { + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + if (mapping->bo_va->is_xgmi) + snoop = true; + } + } else { + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + /* FIXME: is this still needed? Or does + * amdgpu_ttm_tt_pde_flags already handle this? + */ + snoop = true; + } + break; + default: + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + + /* FIXME: is this still needed? Or does + * amdgpu_ttm_tt_pde_flags already handle this? + */ + if (!is_vram) + snoop = true; + } + + if (mtype != MTYPE_NC) + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(mtype); + *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; +} + static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags) @@ -1128,14 +1196,9 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) && - !(*flags & AMDGPU_PTE_SYSTEM) && - mapping->bo_va->is_xgmi) - *flags |= AMDGPU_PTE_SNOOPED; - - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) - *flags |= mapping->flags & AMDGPU_PTE_SNOOPED; + if (mapping->bo_va->base.bo) + gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo, + mapping, flags); } static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index f87d0f6ffc93..f2b743a93915 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -807,16 +807,5 @@ static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev) break; } - if (adev->jpeg.ras) { - amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); - - strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); - adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; - adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; - adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->jpeg.ras->ras_block.ras_late_init) - adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; - } + jpeg_set_ras_funcs(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 63b0d0b810ec..3beb731b2ce5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -27,6 +27,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v2_0.h" +#include "jpeg_v4_0.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -38,6 +39,7 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev); /** * jpeg_v4_0_early_init - set function pointers @@ -55,6 +57,7 @@ static int jpeg_v4_0_early_init(void *handle) jpeg_v4_0_set_dec_ring_funcs(adev); jpeg_v4_0_set_irq_funcs(adev); + jpeg_v4_0_set_ras_funcs(adev); return 0; } @@ -78,6 +81,18 @@ static int jpeg_v4_0_sw_init(void *handle) if (r) return r; + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); + if (r) + return r; + r = amdgpu_jpeg_sw_init(adev); if (r) return r; @@ -167,6 +182,8 @@ static int jpeg_v4_0_hw_fini(void *handle) RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); + return 0; } @@ -524,6 +541,10 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, case VCN_4_0__SRCID__JPEG_DECODE: amdgpu_fence_process(&adev->jpeg.inst->ring_dec); break; + case VCN_4_0__SRCID_DJPEG0_POISON: + case VCN_4_0__SRCID_EJPEG0_POISON: + amdgpu_jpeg_process_poison_irq(adev, source, entry); + break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -607,3 +628,63 @@ const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = { .rev = 0, .funcs = &jpeg_v4_0_ip_funcs, }; + +static uint32_t jpeg_v4_0_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V4_0_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V4_0_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v4_0_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v4_0_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = { + .query_poison_status = jpeg_v4_0_query_ras_poison_status, +}; + +static struct amdgpu_jpeg_ras jpeg_v4_0_ras = { + .ras_block = { + .hw_ops = &jpeg_v4_0_ras_hw_ops, + }, +}; + +static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[JPEG_HWIP][0]) { + case IP_VERSION(4, 0, 0): + adev->jpeg.ras = &jpeg_v4_0_ras; + break; + default: + break; + } + + jpeg_set_ras_funcs(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h index f1ed6ccfedca..07d36c2abd6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h @@ -24,6 +24,13 @@ #ifndef __JPEG_V4_0_H__ #define __JPEG_V4_0_H__ +enum amdgpu_jpeg_v4_0_sub_block { + AMDGPU_JPEG_V4_0_JPEG0 = 0, + AMDGPU_JPEG_V4_0_JPEG1, + + AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block; #endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 067d10073a56..614394118a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -121,6 +121,10 @@ static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r < 1) { DRM_ERROR("MES failed to response msg=%d\n", x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + return -ETIMEDOUT; } @@ -415,10 +419,6 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev, mes_hdr = (const struct mes_firmware_header_v1_0 *) adev->mes.fw[pipe]->data; - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_version); - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_data_version); adev->mes.uc_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f141fadd2d86..5459366f49ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -129,6 +129,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r < 1) { DRM_ERROR("MES failed to response msg=%d\n", x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + return -ETIMEDOUT; } @@ -384,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_reset = 1; mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; return mes_v11_0_submit_pkt_and_poll_completion(mes, @@ -485,10 +490,6 @@ static int mes_v11_0_init_microcode(struct amdgpu_device *adev, mes_hdr = (const struct mes_firmware_header_v1_0 *) adev->mes.fw[pipe]->data; - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_version); - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_data_version); adev->mes.uc_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); @@ -1253,7 +1254,9 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) if (adev->mes.ring.sched.ready) mes_v11_0_kiq_dequeue_sched(adev); - mes_v11_0_enable(adev, false); + if (!amdgpu_sriov_vf(adev)) + mes_v11_0_enable(adev, false); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 998b5d17b271..0e664d0cc8d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -319,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) tmp = mmMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1b027d069ab4..4638ea7c2eec 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev) tmp = mmMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index a1d26c4d80b8..16cc82215e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -275,7 +275,7 @@ static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index e8058edc1d10..6bdf2ef0298d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -269,7 +269,7 @@ static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 770be0a8f7ce..45465acaa943 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -268,7 +268,7 @@ static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b3fba8dea63c..6853b93ac82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = /* Sienna Cichlid */ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode = /* Beige Goby*/ static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = { /* Yellow Carp*/ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index ed2293686f0d..9de46fa8f46c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -126,32 +126,6 @@ out: return err; } -static int psp_v10_0_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -245,7 +219,6 @@ static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value) static const struct psp_funcs psp_v10_0_funcs = { .init_microcode = psp_v10_0_init_microcode, - .ring_init = psp_v10_0_ring_init, .ring_create = psp_v10_0_ring_create, .ring_stop = psp_v10_0_ring_stop, .ring_destroy = psp_v10_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 9518b4394a6e..bd3e3e23a939 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -360,32 +360,6 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; } -static int psp_v11_0_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static int psp_v11_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -779,7 +753,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .bootloader_load_spl = psp_v11_0_bootloader_load_spl, .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, .bootloader_load_sos = psp_v11_0_bootloader_load_sos, - .ring_init = psp_v11_0_ring_init, .ring_create = psp_v11_0_ring_create, .ring_stop = psp_v11_0_ring_stop, .ring_destroy = psp_v11_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c index ff13e1beb49b..5697760a819b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -28,32 +28,6 @@ #include "mp/mp_11_0_8_offset.h" -static int psp_v11_0_8_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static int psp_v11_0_8_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -194,7 +168,6 @@ static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value) } static const struct psp_funcs psp_v11_0_8_funcs = { - .ring_init = psp_v11_0_8_ring_init, .ring_create = psp_v11_0_8_ring_create, .ring_stop = psp_v11_0_8_ring_stop, .ring_destroy = psp_v11_0_8_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 0b2ac418e4ac..8ed2281b6557 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -236,34 +236,6 @@ static void psp_v12_0_reroute_ih(struct psp_context *psp) 0x80000000, 0x8000FFFF, false); } -static int psp_v12_0_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - psp_v12_0_reroute_ih(psp); - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static int psp_v12_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -272,6 +244,8 @@ static int psp_v12_0_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; + psp_v12_0_reroute_ih(psp); + if (amdgpu_sriov_vf(psp->adev)) { /* Write low address of the ring to C2PMSG_102 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); @@ -425,7 +399,6 @@ static const struct psp_funcs psp_v12_0_funcs = { .init_microcode = psp_v12_0_init_microcode, .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, .bootloader_load_sos = psp_v12_0_bootloader_load_sos, - .ring_init = psp_v12_0_ring_init, .ring_create = psp_v12_0_ring_create, .ring_stop = psp_v12_0_ring_stop, .ring_destroy = psp_v12_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 88f9b327183a..e6a26a7e5e5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): + case IP_VERSION(13, 0, 11): err = psp_init_toc_microcode(psp, chip_name); if (err) return err; @@ -268,32 +271,6 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) return ret; } -static int psp_v13_0_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static int psp_v13_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -729,7 +706,6 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, - .ring_init = psp_v13_0_ring_init, .ring_create = psp_v13_0_ring_create, .ring_stop = psp_v13_0_ring_stop, .ring_destroy = psp_v13_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c index 321089dfa7db..9d4e24e518e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -199,32 +199,6 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp) return ret; } -static int psp_v13_0_4_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static int psp_v13_0_4_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -373,7 +347,6 @@ static const struct psp_funcs psp_v13_0_4_funcs = { .bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv, .bootloader_load_sos = psp_v13_0_4_bootloader_load_sos, - .ring_init = psp_v13_0_4_ring_init, .ring_create = psp_v13_0_4_ring_create, .ring_stop = psp_v13_0_4_ring_stop, .ring_destroy = psp_v13_0_4_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 01f3bcc62a6c..157147c6c94e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -160,32 +160,6 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) return ret; } -static int psp_v3_1_ring_init(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct psp_ring *ring; - struct amdgpu_device *adev = psp->adev; - - ring = &psp->km_ring; - - ring->ring_type = ring_type; - - /* allocate 4k Page of Local Frame Buffer memory for ring */ - ring->ring_size = 0x1000; - ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); - if (ret) { - ring->ring_size = 0; - return ret; - } - - return 0; -} - static void psp_v3_1_reroute_ih(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -401,7 +375,6 @@ static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, .bootloader_load_sos = psp_v3_1_bootloader_load_sos, - .ring_init = psp_v3_1_ring_init, .ring_create = psp_v3_1_ring_create, .ring_stop = psp_v3_1_ring_stop, .ring_destroy = psp_v3_1_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index da3beb0bf2fa..049c26a45d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -455,6 +455,9 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable) sdma_v6_0_rlc_stop(adev); } + if (amdgpu_sriov_vf(adev)) + return; + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); @@ -1523,6 +1526,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v6_0_ring_get_rptr, .get_wptr = sdma_v6_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 4d5e718540aa..abca8b529721 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -112,14 +112,12 @@ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, static void si_dma_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; u32 rb_cntl; unsigned i; amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; /* dma0 */ rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); rb_cntl &= ~DMA_RB_ENABLE; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e3b2b6b4f1a6..7cd17dda32ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; @@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, @@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 799925d22fc8..2357ff39323f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -162,6 +162,7 @@ * 2 - Bypass */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) +#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) #define PACKET3_COPY_DATA 0x40 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_COND_WRITE 0x45 @@ -184,6 +185,7 @@ #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ #define EOP_TC_NC_ACTION_EN (1 << 19) #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index e08044008186..7d5fdf450d0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -43,6 +43,7 @@ #include "soc15.h" #include "soc15_common.h" #include "soc21.h" +#include "mxgpu_nv.h" static const struct amd_ip_funcs soc21_common_ip_funcs; @@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode = static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -322,8 +323,10 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -584,10 +587,6 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB | AMD_PG_SUPPORT_MMHUB; - if (amdgpu_sriov_vf(adev)) { - adev->cg_flags = 0; - adev->pg_flags = 0; - } adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update break; case IP_VERSION(11, 0, 2): @@ -645,28 +644,63 @@ static int soc21_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; - if (amdgpu_sriov_vf(adev)) { - /* hypervisor control CG and PG enablement */ - adev->cg_flags = 0; - adev->pg_flags = 0; - } adev->external_rev_id = adev->rev_id + 0x20; break; + case IP_VERSION(11, 0, 4): + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0x1; + break; + default: /* FIXME: not supported yet */ return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } + return 0; } static int soc21_common_late_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + return 0; } static int soc21_common_sw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + return 0; } @@ -704,6 +738,9 @@ static int soc21_common_hw_fini(void *handle) /* disable the doorbell aperture */ soc21_enable_doorbell_aperture(adev, false); + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_put_irq(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 5d5d031c9e7d..72fd963f178b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } -static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst) +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) { uint32_t channel_index; uint64_t soc_pa, retired_page, column; @@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = { .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, - .convert_ras_error_address = umc_v6_7_convert_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index fe41ed2f5945..105245d5b6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -71,5 +71,7 @@ extern const uint32_t umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; extern const uint32_t umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; - +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 91235df54e22..b7da4528cf0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -46,6 +46,16 @@ const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { }; const uint32_t + umc_v8_10_channel_idx_tbl_ext0[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{1, 5}, {7, 3}}, + {{14, 15}, {13, 12}}, + {{10, 11}, {9, 8}}, + {{6, 2}, {0, 4}} + }; + +const uint32_t umc_v8_10_channel_idx_tbl[] [UMC_V8_10_UMC_INSTANCE_NUM] [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h index 849ede88e111..25eaf4af5fcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -66,5 +66,9 @@ extern const uint32_t [UMC_V8_10_UMC_INSTANCE_NUM] [UMC_V8_10_CHANNEL_INSTANCE_NUM]; +extern const uint32_t + umc_v8_10_channel_idx_tbl_ext0[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 375c440957dc..5fe872f4bea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -216,8 +216,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -280,8 +280,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e668b3baa8c6..e407be6cb63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -213,7 +213,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) * * Open up a stream for HW test */ -static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, +static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle, struct amdgpu_bo *bo, struct dma_fence **fence) { @@ -224,8 +224,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -276,7 +276,7 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, +static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle, struct amdgpu_bo *bo, struct dma_fence **fence) { @@ -287,8 +287,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 8a7006d62a87..ec87b00f2e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -770,6 +770,33 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) } } +static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx, + bool indirect) +{ + uint32_t tmp; + + if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0)) + return; + + tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL), + tmp, 0, indirect); + + tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN), + tmp, 0, indirect); + + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN), + tmp, 0, indirect); +} + static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; @@ -849,6 +876,8 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); + vcn_v2_6_enable_ras(adev, inst_idx, indirect); + /* unblock VCPU register access */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); @@ -2002,16 +2031,5 @@ static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) break; } - if (adev->vcn.ras) { - amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); - - strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); - adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; - adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; - adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->vcn.ras->ras_block.ras_late_init) - adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; - } + amdgpu_vcn_set_ras_funcs(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index dcc49b01bd59..1e2b22299975 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -31,6 +31,7 @@ #include "soc15_hw_ip.h" #include "vcn_v2_0.h" #include "mmsch_v4_0.h" +#include "vcn_v4_0.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -64,6 +65,7 @@ static int vcn_v4_0_set_powergating_state(void *handle, static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev); /** * vcn_v4_0_early_init - set function pointers @@ -84,6 +86,7 @@ static int vcn_v4_0_early_init(void *handle) vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); + vcn_v4_0_set_ras_funcs(adev); return 0; } @@ -125,6 +128,12 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + if (r) + return r; + ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) @@ -289,6 +298,7 @@ static int vcn_v4_0_hw_fini(void *handle) } } + amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); } return 0; @@ -852,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst) return; } +static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, + bool indirect) +{ + uint32_t tmp; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + return; + + tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), + tmp, 0, indirect); + + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), + tmp, 0, indirect); +} + /** * vcn_v4_0_start_dpg_mode - VCN start with dpg mode * @@ -940,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); + vcn_v4_0_enable_ras(adev, inst_idx, indirect); + /* enable master interrupt */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_MASTINT_EN), @@ -1932,6 +1966,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; + case VCN_4_0__SRCID_UVD_POISON: + amdgpu_vcn_process_poison_irq(adev, source, entry); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -1994,3 +2031,60 @@ const struct amdgpu_ip_block_version vcn_v4_0_ip_block = .rev = 0, .funcs = &vcn_v4_0_ip_funcs, }; + +static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V4_0_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v4_0_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = { + .query_poison_status = vcn_v4_0_query_ras_poison_status, +}; + +static struct amdgpu_vcn_ras vcn_v4_0_ras = { + .ras_block = { + .hw_ops = &vcn_v4_0_ras_hw_ops, + }, +}; + +static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[VCN_HWIP][0]) { + case IP_VERSION(4, 0, 0): + adev->vcn.ras = &vcn_v4_0_ras; + break; + default: + break; + } + + amdgpu_vcn_set_ras_funcs(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h index 7c5c9d91bb52..7d3d11f40f27 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h @@ -24,6 +24,12 @@ #ifndef __VCN_V4_0_H__ #define __VCN_V4_0_H__ +enum amdgpu_vcn_v4_0_sub_block { + AMDGPU_VCN_V4_0_VCPU_VCODEC = 0, + + AMDGPU_VCN_V4_0_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v4_0_ip_block; #endif /* __VCN_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 59dfca093155..1706081d054d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -267,7 +267,7 @@ static void vega20_ih_reroute_ih(struct amdgpu_device *adev) /* vega20 ih reroute will go through psp this * function is used for newer asics starting arcturus */ - if (adev->asic_type >= CHIP_ARCTURUS) { + if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) { /* Reroute to IH ring 1 for VMC */ WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); @@ -308,7 +308,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_control(adev); - if (adev->asic_type == CHIP_ARCTURUS && + if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); if (adev->irq.ih.use_bus_addr) { @@ -321,7 +321,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* psp firmware won't program IH_CHICKEN for aldebaran * driver needs to program it properly according to * MC_SPACE type in IH_RB_CNTL */ - if (adev->asic_type == CHIP_ALDEBARAN) { + if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); if (adev->irq.ih.use_bus_addr) { ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index f6ffd7c96ff9..12ef782eb478 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2111,6 +2111,8 @@ void vi_set_virt_ops(struct amdgpu_device *adev) int vi_set_ip_blocks(struct amdgpu_device *adev) { + amdgpu_device_set_sriov_virtual_display(adev); + switch (adev->asic_type) { case CHIP_TOPAZ: /* topaz has no DCE, UVD, VCE */ @@ -2130,7 +2132,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) @@ -2150,7 +2152,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 8bfdfd062ff6..3251f4783ba1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_gpu_processor_id( return current_id; } -/* Static table to describe GPU Cache information */ -struct kfd_gpu_cache_info { - uint32_t cache_size; - uint32_t cache_level; - uint32_t flags; - /* Indicates how many Compute Units share this cache - * within a SA. Value = 1 indicates the cache is not shared - */ - uint32_t num_cu_shared; -}; static struct kfd_gpu_cache_info kaveri_cache_info[] = { { @@ -891,6 +881,54 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { }, }; +static struct kfd_gpu_cache_info dummy_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 6, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 2048, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 6, + }, +}; + static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { @@ -1071,8 +1109,12 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, props->cachelines_per_tag = cache->lines_per_tag; props->cache_assoc = cache->associativity; props->cache_latency = cache->cache_latency; + memcpy(props->sibling_map, cache->sibling_map, - sizeof(props->sibling_map)); + CRAT_SIBLINGMAP_SIZE); + + /* set the sibling_map_size as 32 for CRAT from ACPI */ + props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) props->cache_type |= HSA_CACHE_TYPE_DATA; @@ -1083,7 +1125,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) props->cache_type |= HSA_CACHE_TYPE_HSACU; - dev->cache_count++; dev->node_props.caches_count++; list_add_tail(&props->list, &dev->cache_props); @@ -1291,125 +1332,6 @@ err: return ret; } -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ -static int fill_in_l1_pcache(struct crat_subtype_cache *pcache, - struct kfd_gpu_cache_info *pcache_info, - struct kfd_cu_info *cu_info, - int mem_available, - int cu_bitmask, - int cache_type, unsigned int cu_processor_id, - int cu_block) -{ - unsigned int cu_sibling_map_mask; - int first_active_cu; - - /* First check if enough memory is available */ - if (sizeof(struct crat_subtype_cache) > mem_available) - return -ENOMEM; - - cu_sibling_map_mask = cu_bitmask; - cu_sibling_map_mask >>= cu_block; - cu_sibling_map_mask &= - ((1 << pcache_info[cache_type].num_cu_shared) - 1); - first_active_cu = ffs(cu_sibling_map_mask); - - /* CU could be inactive. In case of shared cache find the first active - * CU. and incase of non-shared cache check if the CU is inactive. If - * inactive active skip it - */ - if (first_active_cu) { - memset(pcache, 0, sizeof(struct crat_subtype_cache)); - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; - pcache->length = sizeof(struct crat_subtype_cache); - pcache->flags = pcache_info[cache_type].flags; - pcache->processor_id_low = cu_processor_id - + (first_active_cu - 1); - pcache->cache_level = pcache_info[cache_type].cache_level; - pcache->cache_size = pcache_info[cache_type].cache_size; - - /* Sibling map is w.r.t processor_id_low, so shift out - * inactive CU - */ - cu_sibling_map_mask = - cu_sibling_map_mask >> (first_active_cu - 1); - - pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[1] = - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[2] = - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[3] = - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - return 0; - } - return 1; -} - -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ -static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, - struct kfd_gpu_cache_info *pcache_info, - struct kfd_cu_info *cu_info, - int mem_available, - int cache_type, unsigned int cu_processor_id) -{ - unsigned int cu_sibling_map_mask; - int first_active_cu; - int i, j, k; - - /* First check if enough memory is available */ - if (sizeof(struct crat_subtype_cache) > mem_available) - return -ENOMEM; - - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; - cu_sibling_map_mask &= - ((1 << pcache_info[cache_type].num_cu_shared) - 1); - first_active_cu = ffs(cu_sibling_map_mask); - - /* CU could be inactive. In case of shared cache find the first active - * CU. and incase of non-shared cache check if the CU is inactive. If - * inactive active skip it - */ - if (first_active_cu) { - memset(pcache, 0, sizeof(struct crat_subtype_cache)); - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; - pcache->length = sizeof(struct crat_subtype_cache); - pcache->flags = pcache_info[cache_type].flags; - pcache->processor_id_low = cu_processor_id - + (first_active_cu - 1); - pcache->cache_level = pcache_info[cache_type].cache_level; - pcache->cache_size = pcache_info[cache_type].cache_size; - - /* Sibling map is w.r.t processor_id_low, so shift out - * inactive CU - */ - cu_sibling_map_mask = - cu_sibling_map_mask >> (first_active_cu - 1); - k = 0; - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; - j++) { - pcache->sibling_map[k] = - (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[k+1] = - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[k+2] = - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[k+3] = - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - k += 4; - cu_sibling_map_mask = - cu_info->cu_bitmap[i % 4][j + i / 4]; - cu_sibling_map_mask &= ( - (1 << pcache_info[cache_type].num_cu_shared) - - 1); - } - } - return 0; - } - return 1; -} - -#define KFD_MAX_CACHE_TYPES 6 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, struct kfd_gpu_cache_info *pcache_info) @@ -1483,228 +1405,134 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, return i; } -/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info - * tables - * - * @kdev - [IN] GPU device - * @gpu_processor_id - [IN] GPU processor ID to which these caches - * associate - * @available_size - [IN] Amount of memory available in pcache - * @cu_info - [IN] Compute Unit info obtained from KGD - * @pcache - [OUT] memory into which cache data is to be filled in. - * @size_filled - [OUT] amount of data used up in pcache. - * @num_of_entries - [OUT] number of caches added - */ -static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, - int gpu_processor_id, - int available_size, - struct kfd_cu_info *cu_info, - struct crat_subtype_cache *pcache, - int *size_filled, - int *num_of_entries) +int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) { - struct kfd_gpu_cache_info *pcache_info; - struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; int num_of_cache_types = 0; - int i, j, k; - int ct = 0; - int mem_available = available_size; - unsigned int cu_processor_id; - int ret; - unsigned int num_cu_shared; switch (kdev->adev->asic_type) { case CHIP_KAVERI: - pcache_info = kaveri_cache_info; + *pcache_info = kaveri_cache_info; num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); break; case CHIP_HAWAII: - pcache_info = hawaii_cache_info; + *pcache_info = hawaii_cache_info; num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); break; case CHIP_CARRIZO: - pcache_info = carrizo_cache_info; + *pcache_info = carrizo_cache_info; num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); break; case CHIP_TONGA: - pcache_info = tonga_cache_info; + *pcache_info = tonga_cache_info; num_of_cache_types = ARRAY_SIZE(tonga_cache_info); break; case CHIP_FIJI: - pcache_info = fiji_cache_info; + *pcache_info = fiji_cache_info; num_of_cache_types = ARRAY_SIZE(fiji_cache_info); break; case CHIP_POLARIS10: - pcache_info = polaris10_cache_info; + *pcache_info = polaris10_cache_info; num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); break; case CHIP_POLARIS11: - pcache_info = polaris11_cache_info; + *pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; case CHIP_POLARIS12: - pcache_info = polaris12_cache_info; + *pcache_info = polaris12_cache_info; num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); break; case CHIP_VEGAM: - pcache_info = vegam_cache_info; + *pcache_info = vegam_cache_info; num_of_cache_types = ARRAY_SIZE(vegam_cache_info); break; default: switch (KFD_GC_VERSION(kdev)) { case IP_VERSION(9, 0, 1): - pcache_info = vega10_cache_info; + *pcache_info = vega10_cache_info; num_of_cache_types = ARRAY_SIZE(vega10_cache_info); break; case IP_VERSION(9, 2, 1): - pcache_info = vega12_cache_info; + *pcache_info = vega12_cache_info; num_of_cache_types = ARRAY_SIZE(vega12_cache_info); break; case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): - pcache_info = vega20_cache_info; + *pcache_info = vega20_cache_info; num_of_cache_types = ARRAY_SIZE(vega20_cache_info); break; case IP_VERSION(9, 4, 2): - pcache_info = aldebaran_cache_info; + *pcache_info = aldebaran_cache_info; num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); break; case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): - pcache_info = raven_cache_info; + *pcache_info = raven_cache_info; num_of_cache_types = ARRAY_SIZE(raven_cache_info); break; case IP_VERSION(9, 3, 0): - pcache_info = renoir_cache_info; + *pcache_info = renoir_cache_info; num_of_cache_types = ARRAY_SIZE(renoir_cache_info); break; case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): case IP_VERSION(10, 1, 4): - pcache_info = navi10_cache_info; + *pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; case IP_VERSION(10, 1, 1): - pcache_info = navi14_cache_info; + *pcache_info = navi14_cache_info; num_of_cache_types = ARRAY_SIZE(navi14_cache_info); break; case IP_VERSION(10, 3, 0): - pcache_info = sienna_cichlid_cache_info; + *pcache_info = sienna_cichlid_cache_info; num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); break; case IP_VERSION(10, 3, 2): - pcache_info = navy_flounder_cache_info; + *pcache_info = navy_flounder_cache_info; num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); break; case IP_VERSION(10, 3, 4): - pcache_info = dimgrey_cavefish_cache_info; + *pcache_info = dimgrey_cavefish_cache_info; num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); break; case IP_VERSION(10, 3, 1): - pcache_info = vangogh_cache_info; + *pcache_info = vangogh_cache_info; num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); break; case IP_VERSION(10, 3, 5): - pcache_info = beige_goby_cache_info; + *pcache_info = beige_goby_cache_info; num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); break; case IP_VERSION(10, 3, 3): - pcache_info = yellow_carp_cache_info; + *pcache_info = yellow_carp_cache_info; num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); break; case IP_VERSION(10, 3, 6): - pcache_info = gc_10_3_6_cache_info; + *pcache_info = gc_10_3_6_cache_info; num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); break; case IP_VERSION(10, 3, 7): - pcache_info = gfx1037_cache_info; + *pcache_info = gfx1037_cache_info; num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info); break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - pcache_info = cache_info; + case IP_VERSION(11, 0, 4): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); break; default: - return -EINVAL; - } - } - - *size_filled = 0; - *num_of_entries = 0; - - /* For each type of cache listed in the kfd_gpu_cache_info table, - * go through all available Compute Units. - * The [i,j,k] loop will - * if kfd_gpu_cache_info.num_cu_shared = 1 - * will parse through all available CU - * If (kfd_gpu_cache_info.num_cu_shared != 1) - * then it will consider only one CU from - * the shared unit - */ - - for (ct = 0; ct < num_of_cache_types; ct++) { - cu_processor_id = gpu_processor_id; - if (pcache_info[ct].cache_level == 1) { - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { - for (k = 0; k < cu_info->num_cu_per_sh; - k += pcache_info[ct].num_cu_shared) { - ret = fill_in_l1_pcache(pcache, - pcache_info, - cu_info, - mem_available, - cu_info->cu_bitmap[i % 4][j + i / 4], - ct, - cu_processor_id, - k); - - if (ret < 0) + *pcache_info = dummy_cache_info; + num_of_cache_types = ARRAY_SIZE(dummy_cache_info); + pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); break; - - if (!ret) { - pcache++; - (*num_of_entries)++; - mem_available -= sizeof(*pcache); - (*size_filled) += sizeof(*pcache); - } - - /* Move to next CU block */ - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= - cu_info->num_cu_per_sh) ? - pcache_info[ct].num_cu_shared : - (cu_info->num_cu_per_sh - k); - cu_processor_id += num_cu_shared; } - } - } - } else { - ret = fill_in_l2_l3_pcache(pcache, - pcache_info, - cu_info, - mem_available, - ct, - cu_processor_id); - - if (ret < 0) - break; - - if (!ret) { - pcache++; - (*num_of_entries)++; - mem_available -= sizeof(*pcache); - (*size_filled) += sizeof(*pcache); - } - } } - - pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); - - return 0; + return num_of_cache_types; } static bool kfd_ignore_crat(void) @@ -2063,8 +1891,8 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) struct acpi_table_header *table_header = NULL; struct acpi_subtable_header *sub_header = NULL; unsigned long table_end, subtable_len; - u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 | - pci_dev_id(kdev->pdev); + u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | + pci_dev_id(kdev->adev->pdev); u32 bdf; acpi_status status; struct acpi_srat_cpu_affinity *cpu; @@ -2139,7 +1967,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) numa_node = 0; if (numa_node != NUMA_NO_NODE) - set_dev_node(&kdev->pdev->dev, numa_node); + set_dev_node(&kdev->adev->pdev->dev, numa_node); } #endif @@ -2200,14 +2028,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->proximity_domain_from = proximity_domain; #ifdef CONFIG_ACPI_NUMA - if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) kfd_find_numa_node_in_srat(kdev); #endif #ifdef CONFIG_NUMA - if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) sub_type_hdr->proximity_domain_to = 0; else - sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; + sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; #else sub_type_hdr->proximity_domain_to = 0; #endif @@ -2263,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct kfd_cu_info cu_info; int avail_size = *size; uint32_t total_num_of_cu; - int num_of_cache_entries = 0; - int cache_mem_filled = 0; uint32_t nid = 0; int ret = 0; @@ -2365,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, crat_table->length += sizeof(struct crat_subtype_memory); crat_table->total_entries++; - /* TODO: Fill in cache information. This information is NOT readily - * available in KGD - */ - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length); - ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, - avail_size, - &cu_info, - (struct crat_subtype_cache *)sub_type_hdr, - &cache_mem_filled, - &num_of_cache_entries); - - if (ret < 0) - return ret; - - crat_table->length += cache_mem_filled; - crat_table->total_entries += num_of_cache_entries; - avail_size -= cache_mem_filled; - /* Fill in Subtype: IO_LINKS * Only direct links are added here which is Link from GPU to * its NUMA node. Indirect links are added by userspace. */ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - cache_mem_filled); + sub_type_hdr->length); ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 482ba84a728d..8d1e8ba58dee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -29,11 +29,10 @@ #pragma pack(1) /* - * 4CC signature values for the CRAT and CDIT ACPI tables + * 4CC signature value for the CRAT ACPI table */ #define CRAT_SIGNATURE "CRAT" -#define CDIT_SIGNATURE "CDIT" /* * Component Resource Association Table (CRAT) @@ -292,31 +291,22 @@ struct crat_subtype_generic { uint32_t flags; }; -/* - * Component Locality Distance Information Table (CDIT) - */ -#define CDIT_OEMID_LENGTH 6 -#define CDIT_OEMTABLEID_LENGTH 8 - -struct cdit_header { - uint32_t signature; - uint32_t length; - uint8_t revision; - uint8_t checksum; - uint8_t oem_id[CDIT_OEMID_LENGTH]; - uint8_t oem_table_id[CDIT_OEMTABLEID_LENGTH]; - uint32_t oem_revision; - uint32_t creator_id; - uint32_t creator_revision; - uint32_t total_entries; - uint16_t num_domains; - uint8_t entry[1]; -}; - #pragma pack() struct kfd_dev; +/* Static table to describe GPU Cache information */ +struct kfd_gpu_cache_info { + uint32_t cache_size; + uint32_t cache_level; + uint32_t flags; + /* Indicates how many Compute Units share this cache + * within a SA. Value = 1 indicates the cache is not shared + */ + uint32_t num_cu_shared; +}; +int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info); + int kfd_create_crat_image_acpi(void **crat_image, size_t *size); void kfd_destroy_crat_image(void *crat_image); int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 65a1d4f9004b..b8936340742b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; break; default: @@ -227,7 +228,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) { struct kfd_dev *kfd = NULL; const struct kfd2kgd_calls *f2g = NULL; - struct pci_dev *pdev = adev->pdev; uint32_t gfx_target_version = 0; switch (adev->asic_type) { @@ -395,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &gfx_v11_kfd2kgd; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): gfx_target_version = 110003; f2g = &gfx_v11_kfd2kgd; break; @@ -429,7 +430,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) kfd->adev = adev; kfd_device_info_init(kfd, vf, gfx_target_version); - kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; atomic_set(&kfd->compute_profile, 0); @@ -497,7 +497,10 @@ static int kfd_gws_init(struct kfd_dev *kfd) (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) - && kfd->mec2_fw_version >= 0x28)))) + && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(kfd) >= IP_VERSION(10, 3, 0) + && KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0) + && kfd->mec2_fw_version >= 0x6b)))) ret = amdgpu_amdkfd_alloc_gws(kfd->adev, kfd->adev->gds.gws_size, &kfd->gws); @@ -511,12 +514,10 @@ static void kfd_smi_init(struct kfd_dev *dev) } bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size, map_process_packet_size; - kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, @@ -541,7 +542,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { dev_info(kfd_device, "skipped device %x:%x, PCI rejects atomics %d<%d\n", - kfd->pdev->vendor, kfd->pdev->device, + kfd->adev->pdev->vendor, kfd->adev->pdev->device, kfd->mec_fw_version, kfd->device_info.no_atomic_fw_version); return false; @@ -650,8 +651,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_smi_init(kfd); kfd->init_complete = true; - dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, - kfd->pdev->device); + dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, + kfd->adev->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", kfd->dqm->sched_policy); @@ -676,7 +677,7 @@ alloc_gtt_mem_failure: amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", - kfd->pdev->vendor, kfd->pdev->device); + kfd->adev->pdev->vendor, kfd->adev->pdev->device); out: return kfd->init_complete; } @@ -789,7 +790,7 @@ int kgd2kfd_resume_iommu(struct kfd_dev *kfd) if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); + kfd->adev->pdev->vendor, kfd->adev->pdev->device); return err; } @@ -801,7 +802,7 @@ static int kfd_resume(struct kfd_dev *kfd) if (err) dev_err(kfd_device, "Error starting queue manager for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); + kfd->adev->pdev->vendor, kfd->adev->pdev->device); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index fbd0afe4da42..ec1bf611624e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -49,7 +49,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd) return -ENODEV; iommu_info.flags = 0; - err = amd_iommu_device_info(kfd->pdev, &iommu_info); + err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info); if (err) return err; @@ -71,7 +71,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) return 0; iommu_info.flags = 0; - err = amd_iommu_device_info(kfd->pdev, &iommu_info); + err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info); if (err < 0) { dev_err(kfd_device, "error getting iommu info. is the iommu enabled?\n"); @@ -121,7 +121,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) return -EINVAL; } - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); + err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread); if (!err) pdd->bound = PDD_BOUND; @@ -139,7 +139,8 @@ void kfd_iommu_unbind_process(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) if (p->pdds[i]->bound == PDD_BOUND) - amd_iommu_unbind_pasid(p->pdds[i]->dev->pdev, p->pasid); + amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev, + p->pasid); } /* Callback for process shutdown invoked by the IOMMU driver */ @@ -222,7 +223,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd) continue; } - err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, + err = amd_iommu_bind_pasid(kfd->adev->pdev, p->pasid, p->lead_thread); if (err < 0) { pr_err("Unexpected pasid 0x%x binding failure\n", @@ -282,9 +283,9 @@ void kfd_iommu_suspend(struct kfd_dev *kfd) kfd_unbind_processes_from_device(kfd); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); + amd_iommu_free_device(kfd->adev->pdev); } /** kfd_iommu_resume - Restore IOMMU after resume @@ -302,20 +303,20 @@ int kfd_iommu_resume(struct kfd_dev *kfd) pasid_limit = kfd_get_pasid_limit(); - err = amd_iommu_init_device(kfd->pdev, pasid_limit); + err = amd_iommu_init_device(kfd->adev->pdev, pasid_limit); if (err) return -ENXIO; - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, + amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, iommu_invalid_ppr_cb); err = kfd_bind_processes_to_device(kfd); if (err) { - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); + amd_iommu_free_device(kfd->adev->pdev); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 22b077ac9a19..10048ce16aea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -28,7 +28,6 @@ #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" -#include "amdgpu_mn.h" #include "amdgpu_res_cursor.h" #include "kfd_priv.h" #include "kfd_svm.h" @@ -65,8 +64,11 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = npages * 8; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_DELAYED, &job); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, + &job); if (r) return r; @@ -89,18 +91,10 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, cpu_addr = &job->ibs[0].ptr[num_dw]; amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - + fence = amdgpu_job_submit(job); dma_fence_put(fence); return r; - -error_free: - amdgpu_job_free(job); - return r; } /** @@ -529,8 +523,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, for (addr = start; addr < end;) { unsigned long next; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) + vma = vma_lookup(mm, addr); + if (!vma) break; next = min(vma->vm_end, end); @@ -798,8 +792,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, for (addr = start; addr < end;) { unsigned long next; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) { + vma = vma_lookup(mm, addr); + if (!vma) { pr_debug("failed to find vma for prange %p\n", prange); r = -EFAULT; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h deleted file mode 100644 index f9cd28690151..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h +++ /dev/null @@ -1,291 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Copyright 2014-2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef KFD_PM4_HEADERS_DIQ_H_ -#define KFD_PM4_HEADERS_DIQ_H_ - -/*--------------------_INDIRECT_BUFFER-------------------- */ - -#ifndef _PM4__INDIRECT_BUFFER_DEFINED -#define _PM4__INDIRECT_BUFFER_DEFINED -enum _INDIRECT_BUFFER_cache_policy_enum { - cache_policy___indirect_buffer__lru = 0, - cache_policy___indirect_buffer__stream = 1, - cache_policy___indirect_buffer__bypass = 2 -}; - -enum { - IT_INDIRECT_BUFFER_PASID = 0x5C -}; - -struct pm4__indirect_buffer_pasid { - union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int reserved1:2; - unsigned int ib_base_lo:30; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int ib_base_hi:16; - unsigned int reserved2:16; - } bitfields3; - unsigned int ordinal3; - }; - - union { - unsigned int control; - unsigned int ordinal4; - }; - - union { - struct { - unsigned int pasid:10; - unsigned int reserved4:22; - } bitfields5; - unsigned int ordinal5; - }; - -}; - -#endif - -/*--------------------_RELEASE_MEM-------------------- */ - -#ifndef _PM4__RELEASE_MEM_DEFINED -#define _PM4__RELEASE_MEM_DEFINED -enum _RELEASE_MEM_event_index_enum { - event_index___release_mem__end_of_pipe = 5, - event_index___release_mem__shader_done = 6 -}; - -enum _RELEASE_MEM_cache_policy_enum { - cache_policy___release_mem__lru = 0, - cache_policy___release_mem__stream = 1, - cache_policy___release_mem__bypass = 2 -}; - -enum _RELEASE_MEM_dst_sel_enum { - dst_sel___release_mem__memory_controller = 0, - dst_sel___release_mem__tc_l2 = 1, - dst_sel___release_mem__queue_write_pointer_register = 2, - dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 -}; - -enum _RELEASE_MEM_int_sel_enum { - int_sel___release_mem__none = 0, - int_sel___release_mem__send_interrupt_only = 1, - int_sel___release_mem__send_interrupt_after_write_confirm = 2, - int_sel___release_mem__send_data_after_write_confirm = 3 -}; - -enum _RELEASE_MEM_data_sel_enum { - data_sel___release_mem__none = 0, - data_sel___release_mem__send_32_bit_low = 1, - data_sel___release_mem__send_64_bit_data = 2, - data_sel___release_mem__send_gpu_clock_counter = 3, - data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, - data_sel___release_mem__store_gds_data_to_memory = 5 -}; - -struct pm4__release_mem { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int event_type:6; - unsigned int reserved1:2; - enum _RELEASE_MEM_event_index_enum event_index:4; - unsigned int tcl1_vol_action_ena:1; - unsigned int tc_vol_action_ena:1; - unsigned int reserved2:1; - unsigned int tc_wb_action_ena:1; - unsigned int tcl1_action_ena:1; - unsigned int tc_action_ena:1; - unsigned int reserved3:6; - unsigned int atc:1; - enum _RELEASE_MEM_cache_policy_enum cache_policy:2; - unsigned int reserved4:5; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int reserved5:16; - enum _RELEASE_MEM_dst_sel_enum dst_sel:2; - unsigned int reserved6:6; - enum _RELEASE_MEM_int_sel_enum int_sel:3; - unsigned int reserved7:2; - enum _RELEASE_MEM_data_sel_enum data_sel:3; - } bitfields3; - unsigned int ordinal3; - }; - - union { - struct { - unsigned int reserved8:2; - unsigned int address_lo_32b:30; - } bitfields4; - struct { - unsigned int reserved9:3; - unsigned int address_lo_64b:29; - } bitfields5; - unsigned int ordinal4; - }; - - unsigned int address_hi; - - unsigned int data_lo; - - unsigned int data_hi; - -}; -#endif - - -/*--------------------_SET_CONFIG_REG-------------------- */ - -#ifndef _PM4__SET_CONFIG_REG_DEFINED -#define _PM4__SET_CONFIG_REG_DEFINED - -struct pm4__set_config_reg { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int reg_offset:16; - unsigned int reserved1:7; - unsigned int vmid_shift:5; - unsigned int insert_vmid:1; - unsigned int reserved2:3; - } bitfields2; - unsigned int ordinal2; - }; - - unsigned int reg_data[1]; /*1..N of these fields */ - -}; -#endif - -/*--------------------_WAIT_REG_MEM-------------------- */ - -#ifndef _PM4__WAIT_REG_MEM_DEFINED -#define _PM4__WAIT_REG_MEM_DEFINED -enum _WAIT_REG_MEM_function_enum { - function___wait_reg_mem__always_pass = 0, - function___wait_reg_mem__less_than_ref_value = 1, - function___wait_reg_mem__less_than_equal_to_the_ref_value = 2, - function___wait_reg_mem__equal_to_the_reference_value = 3, - function___wait_reg_mem__not_equal_reference_value = 4, - function___wait_reg_mem__greater_than_or_equal_reference_value = 5, - function___wait_reg_mem__greater_than_reference_value = 6, - function___wait_reg_mem__reserved = 7 -}; - -enum _WAIT_REG_MEM_mem_space_enum { - mem_space___wait_reg_mem__register_space = 0, - mem_space___wait_reg_mem__memory_space = 1 -}; - -enum _WAIT_REG_MEM_operation_enum { - operation___wait_reg_mem__wait_reg_mem = 0, - operation___wait_reg_mem__wr_wait_wr_reg = 1 -}; - -struct pm4__wait_reg_mem { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - enum _WAIT_REG_MEM_function_enum function:3; - unsigned int reserved1:1; - enum _WAIT_REG_MEM_mem_space_enum mem_space:2; - enum _WAIT_REG_MEM_operation_enum operation:2; - unsigned int reserved2:24; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int reserved3:2; - unsigned int memory_poll_addr_lo:30; - } bitfields3; - struct { - unsigned int register_poll_addr:16; - unsigned int reserved4:16; - } bitfields4; - struct { - unsigned int register_write_addr:16; - unsigned int reserved5:16; - } bitfields5; - unsigned int ordinal3; - }; - - union { - struct { - unsigned int poll_address_hi:16; - unsigned int reserved6:16; - } bitfields6; - struct { - unsigned int register_write_addr:16; - unsigned int reserved7:16; - } bitfields7; - unsigned int ordinal4; - }; - - unsigned int reference; - - unsigned int mask; - - union { - struct { - unsigned int poll_interval:16; - unsigned int reserved8:16; - } bitfields8; - unsigned int ordinal7; - }; - -}; -#endif - - -#endif /* KFD_PM4_HEADERS_DIQ_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bf610e3b683b..552c3ac85a13 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -254,8 +254,6 @@ struct kfd_dev { struct amdgpu_device *adev; struct kfd_device_info device_info; - struct pci_dev *pdev; - struct drm_device *ddev; unsigned int id; /* topology stub index */ @@ -1365,7 +1363,7 @@ void kfd_dec_compute_active(struct kfd_dev *dev); static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) { #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) - struct drm_device *ddev = kfd->ddev; + struct drm_device *ddev = adev_to_drm(kfd->adev); return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 951b63677248..a26257171ab7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1050,8 +1050,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) * for auto suspend */ if (pdd->runtime_inuse) { - pm_runtime_mark_last_busy(pdd->dev->ddev->dev); - pm_runtime_put_autosuspend(pdd->dev->ddev->dev); + pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev); pdd->runtime_inuse = false; } @@ -1633,9 +1633,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, * pdd is destroyed. */ if (!pdd->runtime_inuse) { - err = pm_runtime_get_sync(dev->ddev->dev); + err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev); if (err < 0) { - pm_runtime_put_autosuspend(dev->ddev->dev); + pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); return ERR_PTR(err); } } @@ -1655,8 +1655,8 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, out: /* balance runpm reference count and exit with error */ if (!pdd->runtime_inuse) { - pm_runtime_mark_last_busy(dev->ddev->dev); - pm_runtime_put_autosuspend(dev->ddev->dev); + pm_runtime_mark_last_busy(adev_to_drm(dev->adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); } return ERR_PTR(err); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 64fdf63093a0..814f99888ab1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -26,7 +26,7 @@ #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" -#include "amdgpu_mn.h" +#include "amdgpu_hmm.h" #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "kfd_priv.h" @@ -259,7 +259,7 @@ void svm_range_free_dma_mappings(struct svm_range *prange) pr_debug("failed to find device idx %d\n", gpuidx); continue; } - dev = &pdd->dev->pdev->dev; + dev = &pdd->dev->adev->pdev->dev; svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); kvfree(dma_addr); prange->dma_addr[gpuidx] = NULL; @@ -1586,8 +1586,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, unsigned long npages; bool readonly; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) { + vma = vma_lookup(mm, addr); + if (!vma) { r = -EFAULT; goto unreserve_out; } @@ -1596,9 +1596,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm, next = min(vma->vm_end, end); npages = (next - addr) >> PAGE_SHIFT; WRITE_ONCE(p->svms.faulting_task, current); - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, - addr, npages, &hmm_range, - readonly, true, owner); + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, + readonly, owner, NULL, + &hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) { pr_debug("failed %d to get svm range pages\n", r); @@ -2542,8 +2542,8 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, struct interval_tree_node *node; unsigned long start_limit, end_limit; - vma = find_vma(p->mm, addr << PAGE_SHIFT); - if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + vma = vma_lookup(p->mm, addr << PAGE_SHIFT); + if (!vma) { pr_debug("VMA does not exist in address [0x%llx]\n", addr); return -EFAULT; } @@ -2871,8 +2871,8 @@ retry_write_locked: /* __do_munmap removed VMA, return success as we are handling stale * retry fault. */ - vma = find_vma(mm, addr << PAGE_SHIFT); - if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + vma = vma_lookup(mm, addr << PAGE_SHIFT); + if (!vma) { pr_debug("address 0x%llx VMA is removed\n", addr); r = 0; goto out_unlock_range; @@ -3152,9 +3152,8 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size) start <<= PAGE_SHIFT; end = start + (size << PAGE_SHIFT); do { - vma = find_vma(p->mm, start); - if (!vma || start < vma->vm_start || - (vma->vm_flags & device_vma)) + vma = vma_lookup(p->mm, start); + if (!vma || (vma->vm_flags & device_vma)) return -EFAULT; start = min(end, vma->vm_end); } while (start < end); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3f0a4a415907..bceb1a5b2518 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -115,7 +115,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->pdev == pdev) { + if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { device = top_dev->gpu; break; } @@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, /* Making sure that the buffer is an empty string */ buffer[0] = 0; - cache = container_of(attr, struct kfd_cache_properties, attr); if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) return -EPERM; @@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); - for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) + for (i = 0; i < cache->sibling_map_size; i++) for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) /* Check each bit */ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", - (cache->sibling_map[i] >> j) & 1); + (cache->sibling_map[i] >> j) & 1); /* Replace the last "," with end of line */ buffer[offs-1] = '\n'; @@ -1169,13 +1169,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) local_mem_size = gpu->local_mem_info.local_mem_size_private + gpu->local_mem_info.local_mem_size_public; - - buf[0] = gpu->pdev->devfn; - buf[1] = gpu->pdev->subsystem_vendor | - (gpu->pdev->subsystem_device << 16); - buf[2] = pci_domain_nr(gpu->pdev->bus); - buf[3] = gpu->pdev->device; - buf[4] = gpu->pdev->bus->number; + buf[0] = gpu->adev->pdev->devfn; + buf[1] = gpu->adev->pdev->subsystem_vendor | + (gpu->adev->pdev->subsystem_device << 16); + buf[2] = pci_domain_nr(gpu->adev->pdev->bus); + buf[3] = gpu->adev->pdev->device; + buf[4] = gpu->adev->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); buf[6] = upper_32_bits(local_mem_size); @@ -1198,7 +1197,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_iolink_properties *iolink; struct kfd_iolink_properties *p2plink; - down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) { /* Discrete GPUs need their own topology device list * entries. Don't assign them to CPU/APU nodes. @@ -1222,7 +1220,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) break; } } - up_write(&topology_lock); return out_dev; } @@ -1269,7 +1266,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, if (target_gpu_dev) { uint32_t cap; - pcie_capability_read_dword(target_gpu_dev->gpu->pdev, + pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, PCI_EXP_DEVCAP2, &cap); if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | @@ -1593,21 +1590,290 @@ out: return ret; } + +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int cu_bitmask, + int cache_type, unsigned int cu_processor_id, + int cu_block) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + struct kfd_cache_properties *pcache = NULL; + + cu_sibling_map_mask = cu_bitmask; + cu_sibling_map_mask >>= cu_block; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + pcache = kfd_alloc_struct(pcache); + if (!pcache) + return -ENOMEM; + + memset(pcache, 0, sizeof(struct kfd_cache_properties)); + pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_DATA; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_CPU; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_HSACU; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = + cu_sibling_map_mask >> (first_active_cu - 1); + + pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[1] = + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[2] = + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[3] = + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + + pcache->sibling_map_size = 4; + *props_ext = pcache; + + return 0; + } + return 1; +} + +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int cache_type, unsigned int cu_processor_id) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + int i, j, k; + struct kfd_cache_properties *pcache = NULL; + + cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + cu_sibling_map_mask &= + ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + pcache = kfd_alloc_struct(pcache); + if (!pcache) + return -ENOMEM; + + memset(pcache, 0, sizeof(struct kfd_cache_properties)); + pcache->processor_id_low = cu_processor_id + + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_DATA; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_CPU; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_HSACU; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); + k = 0; + + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; + + cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + } + } + pcache->sibling_map_size = k; + *props_ext = pcache; + return 0; + } + return 1; +} + +#define KFD_MAX_CACHE_TYPES 6 + +/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info + * tables + */ +static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) +{ + struct kfd_gpu_cache_info *pcache_info = NULL; + int i, j, k; + int ct = 0; + unsigned int cu_processor_id; + int ret; + unsigned int num_cu_shared; + struct kfd_cu_info cu_info; + struct kfd_cu_info *pcu_info; + int gpu_processor_id; + struct kfd_cache_properties *props_ext; + int num_of_entries = 0; + int num_of_cache_types = 0; + struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; + + amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); + pcu_info = &cu_info; + + gpu_processor_id = dev->node_props.simd_id_base; + + pcache_info = cache_info; + num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info); + if (!num_of_cache_types) { + pr_warn("no cache info found\n"); + return; + } + + /* For each type of cache listed in the kfd_gpu_cache_info table, + * go through all available Compute Units. + * The [i,j,k] loop will + * if kfd_gpu_cache_info.num_cu_shared = 1 + * will parse through all available CU + * If (kfd_gpu_cache_info.num_cu_shared != 1) + * then it will consider only one CU from + * the shared unit + */ + for (ct = 0; ct < num_of_cache_types; ct++) { + cu_processor_id = gpu_processor_id; + if (pcache_info[ct].cache_level == 1) { + for (i = 0; i < pcu_info->num_shader_engines; i++) { + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { + + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, + pcu_info->cu_bitmap[i % 4][j + i / 4], ct, + cu_processor_id, k); + + if (ret < 0) + break; + + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } + + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + pcu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (pcu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; + } + } + } + } else { + ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, + pcu_info, ct, cu_processor_id); + + if (ret < 0) + break; + + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } + } + } + dev->node_props.caches_count += num_of_entries; + pr_debug("Added [%d] GPU cache entries\n", num_of_entries); +} + +static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id, + struct kfd_topology_device **dev) +{ + int proximity_domain = ++topology_crat_proximity_domain; + struct list_head temp_topology_device_list; + void *crat_image = NULL; + size_t image_size = 0; + int res; + + res = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_GPU, gpu, + proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + topology_crat_proximity_domain--; + goto err; + } + + INIT_LIST_HEAD(&temp_topology_device_list); + + res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + topology_crat_proximity_domain--; + goto err; + } + + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + + *dev = kfd_assign_gpu(gpu); + if (WARN_ON(!*dev)) { + res = -ENODEV; + goto err; + } + + /* Fill the cache affinity information here for the GPUs + * using VCRAT + */ + kfd_fill_cache_non_crat_info(*dev, gpu); + + /* Update the SYSFS tree, since we added another topology + * device + */ + res = kfd_topology_update_sysfs(); + if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + +err: + kfd_destroy_crat_image(crat_image); + return res; +} + int kfd_topology_add_device(struct kfd_dev *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; struct kfd_cu_info cu_info; int res = 0; - struct list_head temp_topology_device_list; - void *crat_image = NULL; - size_t image_size = 0; - int proximity_domain; int i; const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; - INIT_LIST_HEAD(&temp_topology_device_list); - gpu_id = kfd_generate_gpu_id(gpu); pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); @@ -1617,50 +1883,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * CRAT to create a new topology device. Once created assign the gpu to * that topology device */ + down_write(&topology_lock); dev = kfd_assign_gpu(gpu); - if (!dev) { - down_write(&topology_lock); - proximity_domain = ++topology_crat_proximity_domain; - - res = kfd_create_crat_image_virtual(&crat_image, &image_size, - COMPUTE_UNIT_GPU, gpu, - proximity_domain); - if (res) { - pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", - gpu_id); - topology_crat_proximity_domain--; - return res; - } - res = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, - proximity_domain); - if (res) { - pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", - gpu_id); - topology_crat_proximity_domain--; - goto err; - } - - kfd_topology_update_device_list(&temp_topology_device_list, - &topology_device_list); - - /* Update the SYSFS tree, since we added another topology - * device - */ - res = kfd_topology_update_sysfs(); - up_write(&topology_lock); - - if (!res) - sys_props.generation_count++; - else - pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", - gpu_id, res); - dev = kfd_assign_gpu(gpu); - if (WARN_ON(!dev)) { - res = -ENODEV; - goto err; - } - } + if (!dev) + res = kfd_topology_add_device_locked(gpu, gpu_id, &dev); + up_write(&topology_lock); + if (res) + return res; dev->gpu_id = gpu_id; gpu->id = gpu_id; @@ -1688,13 +1917,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) cu_info.num_shader_arrays_per_engine; dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; - dev->node_props.vendor_id = gpu->pdev->vendor; - dev->node_props.device_id = gpu->pdev->device; + dev->node_props.vendor_id = gpu->adev->pdev->vendor; + dev->node_props.device_id = gpu->adev->pdev->device; dev->node_props.capability |= ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); - dev->node_props.location_id = pci_dev_id(gpu->pdev); - dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); + dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); + dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = @@ -1783,11 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) kfd_debug_print_topology(); - if (!res) - kfd_notify_gpu_change(gpu_id, 1); -err: - kfd_destroy_crat_image(crat_image); - return res; + kfd_notify_gpu_change(gpu_id, 1); + + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 9f6c949186c1..fca30d00a9bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -80,6 +80,8 @@ struct kfd_mem_properties { struct attribute attr; }; +#define CACHE_SIBLINGMAP_SIZE 64 + struct kfd_cache_properties { struct list_head list; uint32_t processor_id_low; @@ -90,10 +92,11 @@ struct kfd_cache_properties { uint32_t cache_assoc; uint32_t cache_latency; uint32_t cache_type; - uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE]; struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; + uint32_t sibling_map_size; }; struct kfd_iolink_properties { @@ -128,7 +131,6 @@ struct kfd_topology_device { uint32_t proximity_domain; struct kfd_node_properties node_props; struct list_head mem_props; - uint32_t cache_count; struct list_head cache_props; struct list_head io_link_props; struct list_head p2p_link_props; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index f4f3d2665a6b..2efe93f74f84 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -7,7 +7,8 @@ config DRM_AMD_DC default y depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) + # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 + select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128 || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG)) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 512c32327eb1..77277d90b6e2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -82,7 +82,6 @@ #include <drm/drm_atomic_uapi.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_edid.h> #include <drm/drm_vblank.h> @@ -147,14 +146,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); /* Number of bytes in PSP footer for firmware. */ #define PSP_FOOTER_BYTES 0x100 -/* - * DMUB Async to Sync Mechanism Status - */ -#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 -#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 -#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 -#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4 - /** * DOC: overview * @@ -1105,7 +1096,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) /* Initialize hardware. */ memset(&hw_params, 0, sizeof(hw_params)); hw_params.fb_base = adev->gmc.fb_start; - hw_params.fb_offset = adev->gmc.aper_base; + hw_params.fb_offset = adev->vm_manager.vram_base_offset; /* backdoor load firmware and trigger dmub running */ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) @@ -1227,7 +1218,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24; pa_config->system_aperture.fb_base = adev->gmc.fb_start; - pa_config->system_aperture.fb_offset = adev->gmc.aper_base; + pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset; pa_config->system_aperture.fb_top = adev->gmc.fb_end; pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12; @@ -1442,9 +1433,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) memset(&init_params, 0, sizeof(init_params)); #endif + mutex_init(&adev->dm.dpia_aux_lock); mutex_init(&adev->dm.dc_lock); mutex_init(&adev->dm.audio_lock); - spin_lock_init(&adev->dm.vblank_lock); if(amdgpu_dm_irq_init(adev)) { DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n"); @@ -1807,6 +1798,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) mutex_destroy(&adev->dm.audio_lock); mutex_destroy(&adev->dm.dc_lock); + mutex_destroy(&adev->dm.dpia_aux_lock); return; } @@ -2858,7 +2850,6 @@ const struct amdgpu_ip_block_version dm_ip_block = static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, .get_format_info = amd_get_format_info, - .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = amdgpu_dm_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -3864,8 +3855,6 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) /* indicates support for immediate flip */ adev_to_drm(adev)->mode_config.async_page_flip = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; @@ -4640,6 +4629,7 @@ static int dm_early_init(void *handle) adev_to_drm(adev)->dev, &dev_attr_s3_debug); #endif + adev->dc_enabled = true; return 0; } @@ -4879,6 +4869,35 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, return 0; } +static inline void fill_dc_dirty_rect(struct drm_plane *plane, + struct rect *dirty_rect, int32_t x, + int32_t y, int32_t width, int32_t height, + int *i, bool ffu) +{ + if (*i > DC_MAX_DIRTY_RECTS) + return; + + if (*i == DC_MAX_DIRTY_RECTS) + goto out; + + dirty_rect->x = x; + dirty_rect->y = y; + dirty_rect->width = width; + dirty_rect->height = height; + + if (ffu) + drm_dbg(plane->dev, + "[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n", + plane->base.id, width, height); + else + drm_dbg(plane->dev, + "[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)", + plane->base.id, x, y, width, height); + +out: + (*i)++; +} + /** * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates * @@ -4899,10 +4918,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - * implicitly provide damage clips without any client support via the plane * bounds. - * - * Today, amdgpu_dm only supports the MPO and cursor usecase. - * - * TODO: Also enable for FB_DAMAGE_CLIPS */ static void fill_dc_dirty_rects(struct drm_plane *plane, struct drm_plane_state *old_plane_state, @@ -4913,12 +4928,11 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); struct rect *dirty_rects = flip_addrs->dirty_rects; uint32_t num_clips; + struct drm_mode_rect *clips; bool bb_changed; bool fb_changed; uint32_t i = 0; - flip_addrs->dirty_rect_count = 0; - /* * Cursor plane has it's own dirty rect update interface. See * dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data @@ -4926,20 +4940,20 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; - /* - * Today, we only consider MPO use-case for PSR SU. If MPO not - * requested, and there is a plane update, do FFU. - */ + num_clips = drm_plane_get_damage_clips_count(new_plane_state); + clips = drm_plane_get_damage_clips(new_plane_state); + if (!dm_crtc_state->mpo_requested) { - dirty_rects[0].x = 0; - dirty_rects[0].y = 0; - dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay; - dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay; - flip_addrs->dirty_rect_count = 1; - DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n", - new_plane_state->plane->base.id, - dm_crtc_state->base.mode.crtc_hdisplay, - dm_crtc_state->base.mode.crtc_vdisplay); + if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS) + goto ffu; + + for (; flip_addrs->dirty_rect_count < num_clips; clips++) + fill_dc_dirty_rect(new_plane_state->plane, + &dirty_rects[i], clips->x1, + clips->y1, clips->x2 - clips->x1, + clips->y2 - clips->y1, + &flip_addrs->dirty_rect_count, + false); return; } @@ -4950,7 +4964,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, * If plane is moved or resized, also add old bounding box to dirty * rects. */ - num_clips = drm_plane_get_damage_clips_count(new_plane_state); fb_changed = old_plane_state->fb->base.id != new_plane_state->fb->base.id; bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x || @@ -4958,36 +4971,51 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, old_plane_state->crtc_w != new_plane_state->crtc_w || old_plane_state->crtc_h != new_plane_state->crtc_h); - DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n", - new_plane_state->plane->base.id, - bb_changed, fb_changed, num_clips); + drm_dbg(plane->dev, + "[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n", + new_plane_state->plane->base.id, + bb_changed, fb_changed, num_clips); - if (num_clips || fb_changed || bb_changed) { - dirty_rects[i].x = new_plane_state->crtc_x; - dirty_rects[i].y = new_plane_state->crtc_y; - dirty_rects[i].width = new_plane_state->crtc_w; - dirty_rects[i].height = new_plane_state->crtc_h; - DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n", - new_plane_state->plane->base.id, - dirty_rects[i].x, dirty_rects[i].y, - dirty_rects[i].width, dirty_rects[i].height); - i += 1; - } - - /* Add old plane bounding-box if plane is moved or resized */ if (bb_changed) { - dirty_rects[i].x = old_plane_state->crtc_x; - dirty_rects[i].y = old_plane_state->crtc_y; - dirty_rects[i].width = old_plane_state->crtc_w; - dirty_rects[i].height = old_plane_state->crtc_h; - DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n", - old_plane_state->plane->base.id, - dirty_rects[i].x, dirty_rects[i].y, - dirty_rects[i].width, dirty_rects[i].height); - i += 1; - } + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], + new_plane_state->crtc_x, + new_plane_state->crtc_y, + new_plane_state->crtc_w, + new_plane_state->crtc_h, &i, false); + + /* Add old plane bounding-box if plane is moved or resized */ + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], + old_plane_state->crtc_x, + old_plane_state->crtc_y, + old_plane_state->crtc_w, + old_plane_state->crtc_h, &i, false); + } + + if (num_clips) { + for (; i < num_clips; clips++) + fill_dc_dirty_rect(new_plane_state->plane, + &dirty_rects[i], clips->x1, + clips->y1, clips->x2 - clips->x1, + clips->y2 - clips->y1, &i, false); + } else if (fb_changed && !bb_changed) { + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], + new_plane_state->crtc_x, + new_plane_state->crtc_y, + new_plane_state->crtc_w, + new_plane_state->crtc_h, &i, false); + } + + if (i > DC_MAX_DIRTY_RECTS) + goto ffu; flip_addrs->dirty_rect_count = i; + return; + +ffu: + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0, + dm_crtc_state->base.mode.crtc_hdisplay, + dm_crtc_state->base.mode.crtc_vdisplay, + &flip_addrs->dirty_rect_count, true); } static void update_stream_scaling_settings(const struct drm_display_mode *mode, @@ -5652,16 +5680,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, { struct drm_connector *drm_connector = &aconnector->base; uint32_t link_bandwidth_kbps; - uint32_t max_dsc_target_bpp_limit_override = 0; struct dc *dc = sink->ctx->dc; uint32_t max_supported_bw_in_kbps, timing_bw_in_kbps; uint32_t dsc_max_supported_bw_in_kbps; + uint32_t max_dsc_target_bpp_limit_override = + drm_connector->display_info.max_dsc_bpp; link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)); - if (stream->link && stream->link->local_sink) - max_dsc_target_bpp_limit_override = - stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit; /* Set DSC policy according to dsc_clock_en */ dc_dsc_policy_set_enable_dsc_when_not_needed( @@ -5734,7 +5760,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct drm_connector_state *con_state = dm_state ? &dm_state->base : NULL; struct dc_stream_state *stream = NULL; - struct drm_display_mode mode = *drm_mode; + struct drm_display_mode mode; struct drm_display_mode saved_mode; struct drm_display_mode *freesync_mode = NULL; bool native_mode_found = false; @@ -5742,12 +5768,14 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; int mode_refresh; int preferred_refresh = 0; + enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif struct dc_sink *sink = NULL; + drm_mode_init(&mode, drm_mode); memset(&saved_mode, 0, sizeof(saved_mode)); if (aconnector == NULL) { @@ -5865,7 +5893,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) stream->use_vsc_sdp_for_colorimetry = true; } - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space); + if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) + tf = TRANSFER_FUNC_GAMMA_22; + mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } @@ -6158,7 +6188,6 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) aconnector->base.name); aconnector->base.force = DRM_FORCE_OFF; - aconnector->base.override_edid = false; return; } @@ -6193,11 +6222,73 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector) link->verified_link_cap.link_rate = LINK_RATE_HIGH2; } - - aconnector->base.override_edid = true; create_eml_sink(aconnector); } +static enum dc_status dm_validate_stream_and_context(struct dc *dc, + struct dc_stream_state *stream) +{ + enum dc_status dc_result = DC_ERROR_UNEXPECTED; + struct dc_plane_state *dc_plane_state = NULL; + struct dc_state *dc_state = NULL; + + if (!stream) + goto cleanup; + + dc_plane_state = dc_create_plane_state(dc); + if (!dc_plane_state) + goto cleanup; + + dc_state = dc_create_state(dc); + if (!dc_state) + goto cleanup; + + /* populate stream to plane */ + dc_plane_state->src_rect.height = stream->src.height; + dc_plane_state->src_rect.width = stream->src.width; + dc_plane_state->dst_rect.height = stream->src.height; + dc_plane_state->dst_rect.width = stream->src.width; + dc_plane_state->clip_rect.height = stream->src.height; + dc_plane_state->clip_rect.width = stream->src.width; + dc_plane_state->plane_size.surface_pitch = ((stream->src.width + 255) / 256) * 256; + dc_plane_state->plane_size.surface_size.height = stream->src.height; + dc_plane_state->plane_size.surface_size.width = stream->src.width; + dc_plane_state->plane_size.chroma_size.height = stream->src.height; + dc_plane_state->plane_size.chroma_size.width = stream->src.width; + dc_plane_state->tiling_info.gfx9.swizzle = DC_SW_UNKNOWN; + dc_plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; + dc_plane_state->tiling_info.gfx9.swizzle = DC_SW_UNKNOWN; + dc_plane_state->rotation = ROTATION_ANGLE_0; + dc_plane_state->is_tiling_rotated = false; + dc_plane_state->tiling_info.gfx8.array_mode = DC_ARRAY_LINEAR_GENERAL; + + dc_result = dc_validate_stream(dc, stream); + if (dc_result == DC_OK) + dc_result = dc_validate_plane(dc, dc_plane_state); + + if (dc_result == DC_OK) + dc_result = dc_add_stream_to_ctx(dc, dc_state, stream); + + if (dc_result == DC_OK && !dc_add_plane_to_context( + dc, + stream, + dc_plane_state, + dc_state)) + dc_result = DC_FAIL_ATTACH_SURFACES; + + if (dc_result == DC_OK) + dc_result = dc_validate_global_state(dc, dc_state, true); + +cleanup: + if (dc_state) + dc_release_state(dc_state); + + if (dc_plane_state) + dc_plane_state_release(dc_plane_state); + + return dc_result; +} + struct dc_stream_state * create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct drm_display_mode *drm_mode, @@ -6224,6 +6315,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream); + if (dc_result == DC_OK) + dc_result = dm_validate_stream_and_context(adev->dm.dc, stream); + if (dc_result != DC_OK) { DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n", drm_mode->hdisplay, @@ -7889,6 +7983,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, */ if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 && acrtc_attach->dm_irq_params.allow_psr_entry && +#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY + !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) && +#endif !acrtc_state->stream->link->psr_settings.psr_allow_active) amdgpu_dm_psr_enable(acrtc_state->stream); } else { @@ -8350,8 +8447,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) if (amdgpu_dm_crc_window_is_activated(crtc)) { spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - acrtc->dm_irq_params.crc_window.update_win = true; - acrtc->dm_irq_params.crc_window.skip_frame_cnt = 2; + acrtc->dm_irq_params.window_param.update_win = true; + acrtc->dm_irq_params.window_param.skip_frame_cnt = 2; spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock); crc_rd_wrk->crtc = crtc; spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock); @@ -10139,91 +10236,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, return value; } -static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, - struct dc_context *ctx, - uint8_t status_type, - uint32_t *operation_result) +int amdgpu_dm_process_dmub_aux_transfer_sync( + struct dc_context *ctx, + unsigned int link_index, + struct aux_payload *payload, + enum aux_return_code_type *operation_result) { struct amdgpu_device *adev = ctx->driver_context; - int return_status = -1; struct dmub_notification *p_notify = adev->dm.dmub_notify; + int ret = -1; - if (is_cmd_aux) { - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { - return_status = p_notify->aux_reply.length; - *operation_result = p_notify->result; - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) { - *operation_result = AUX_RET_ERROR_TIMEOUT; - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) { - *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) { - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - } else { - *operation_result = AUX_RET_ERROR_UNKNOWN; + mutex_lock(&adev->dm.dpia_aux_lock); + if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) { + *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; + goto out; + } + + if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { + DRM_ERROR("wait_for_completion_timeout timeout!"); + *operation_result = AUX_RET_ERROR_TIMEOUT; + goto out; + } + + if (p_notify->result != AUX_RET_SUCCESS) { + /* + * Transient states before tunneling is enabled could + * lead to this error. We can ignore this for now. + */ + if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", + payload->address, payload->length, + p_notify->result); } - } else { - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { - return_status = 0; - *operation_result = p_notify->sc_status; - } else { - *operation_result = SET_CONFIG_UNKNOWN_ERROR; + *operation_result = AUX_RET_ERROR_INVALID_REPLY; + goto out; + } + + + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; + if (!payload->write && p_notify->aux_reply.length && + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { + + if (payload->length != p_notify->aux_reply.length) { + DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", + p_notify->aux_reply.length, + payload->address, payload->length); + *operation_result = AUX_RET_ERROR_INVALID_REPLY; + goto out; } + + memcpy(payload->data, p_notify->aux_reply.data, + p_notify->aux_reply.length); } - return return_status; + /* success */ + ret = p_notify->aux_reply.length; + *operation_result = p_notify->result; +out: + mutex_unlock(&adev->dm.dpia_aux_lock); + return ret; } -int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx, - unsigned int link_index, void *cmd_payload, void *operation_result) +int amdgpu_dm_process_dmub_set_config_sync( + struct dc_context *ctx, + unsigned int link_index, + struct set_config_cmd_payload *payload, + enum set_config_status *operation_result) { struct amdgpu_device *adev = ctx->driver_context; - int ret = 0; + bool is_cmd_complete; + int ret; - if (is_cmd_aux) { - dc_process_dmub_aux_transfer_async(ctx->dc, - link_index, (struct aux_payload *)cmd_payload); - } else if (dc_process_dmub_set_config_async(ctx->dc, link_index, - (struct set_config_cmd_payload *)cmd_payload, - adev->dm.dmub_notify)) { - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, - (uint32_t *)operation_result); - } + mutex_lock(&adev->dm.dpia_aux_lock); + is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc, + link_index, payload, adev->dm.dmub_notify); - ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ); - if (ret == 0) { + if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { + ret = 0; + *operation_result = adev->dm.dmub_notify->sc_status; + } else { DRM_ERROR("wait_for_completion_timeout timeout!"); - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT, - (uint32_t *)operation_result); - } - - if (is_cmd_aux) { - if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) { - struct aux_payload *payload = (struct aux_payload *)cmd_payload; - - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; - if (!payload->write && adev->dm.dmub_notify->aux_reply.length && - payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) { - - if (payload->length != adev->dm.dmub_notify->aux_reply.length) { - DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n", - payload->address, payload->length, - adev->dm.dmub_notify->aux_reply.length); - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx, - DMUB_ASYNC_TO_SYNC_ACCESS_INVALID, - (uint32_t *)operation_result); - } - - memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data, - adev->dm.dmub_notify->aux_reply.length); - } - } + ret = -1; + *operation_result = SET_CONFIG_UNKNOWN_ERROR; } - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, - (uint32_t *)operation_result); + mutex_unlock(&adev->dm.dpia_aux_lock); + return ret; } /* @@ -10235,8 +10333,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context */ bool check_seamless_boot_capability(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_VANGOGH: + switch (adev->ip_versions[DCE_HWIP][0]) { + case IP_VERSION(3, 0, 1): if (!adev->mman.keep_stolen_vga_memory) return true; break; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 635c398fcefe..df3c25e32c65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -59,7 +59,9 @@ #include "signal_types.h" #include "amdgpu_dm_crc.h" struct aux_payload; +struct set_config_cmd_payload; enum aux_return_code_type; +enum set_config_status; /* Forward declarations */ struct amdgpu_device; @@ -360,13 +362,6 @@ struct amdgpu_display_manager { struct mutex audio_lock; /** - * @vblank_lock: - * - * Guards access to deferred vblank work state. - */ - spinlock_t vblank_lock; - - /** * @audio_component: * * Used to notify ELD changes to sound driver. @@ -549,6 +544,13 @@ struct amdgpu_display_manager { * occurred on certain intel platform */ bool aux_hpd_discon_quirk; + + /** + * @dpia_aux_lock: + * + * Guards access to DPIA AUX + */ + struct mutex dpia_aux_lock; }; enum dsc_clock_force_state { @@ -792,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect( extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; -int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, - struct dc_context *ctx, unsigned int link_index, - void *payload, void *operation_result); +int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index, + struct aux_payload *payload, enum aux_return_code_type *operation_result); + +int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index, + struct set_config_cmd_payload *payload, enum set_config_status *operation_result); bool check_seamless_boot_capability(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 8a441a22c46e..66df2394d7e4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -89,13 +89,13 @@ static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.crc_window.x_start = 0; - acrtc->dm_irq_params.crc_window.y_start = 0; - acrtc->dm_irq_params.crc_window.x_end = 0; - acrtc->dm_irq_params.crc_window.y_end = 0; - acrtc->dm_irq_params.crc_window.activated = false; - acrtc->dm_irq_params.crc_window.update_win = false; - acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0; + acrtc->dm_irq_params.window_param.x_start = 0; + acrtc->dm_irq_params.window_param.y_start = 0; + acrtc->dm_irq_params.window_param.x_end = 0; + acrtc->dm_irq_params.window_param.y_end = 0; + acrtc->dm_irq_params.window_param.activated = false; + acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; spin_unlock_irq(&drm_dev->event_lock); } @@ -123,6 +123,8 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) phy_id = crc_rd_wrk->phy_inst; spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock); + mutex_lock(&psp->securedisplay_context.mutex); + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = @@ -133,6 +135,24 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status); } } + + mutex_unlock(&psp->securedisplay_context.mutex); +} + +static void +amdgpu_dm_forward_crc_window(struct work_struct *work) +{ + struct crc_fw_work *crc_fw_wrk; + struct amdgpu_display_manager *dm; + + crc_fw_wrk = container_of(work, struct crc_fw_work, forward_roi_work); + dm = crc_fw_wrk->dm; + + mutex_lock(&dm->dc_lock); + dc_stream_forward_crc_window(dm->dc, &crc_fw_wrk->rect, crc_fw_wrk->stream, crc_fw_wrk->is_stop_cmd); + mutex_unlock(&dm->dc_lock); + + kfree(crc_fw_wrk); } bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc) @@ -142,7 +162,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc) bool ret = false; spin_lock_irq(&drm_dev->event_lock); - ret = acrtc->dm_irq_params.crc_window.activated; + ret = acrtc->dm_irq_params.window_param.activated; spin_unlock_irq(&drm_dev->event_lock); return ret; @@ -187,9 +207,11 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, if (adev->dm.crc_rd_wrk) { flush_work(&adev->dm.crc_rd_wrk->notify_ta_work); spin_lock_irq(&adev->dm.crc_rd_wrk->crc_rd_work_lock); + if (adev->dm.crc_rd_wrk->crtc == crtc) { - dc_stream_stop_dmcu_crc_win_update(stream_state->ctx->dc, - dm_crtc_state->stream); + /* stop ROI update on this crtc */ + dc_stream_forward_crc_window(stream_state->ctx->dc, + NULL, stream_state, true); adev->dm.crc_rd_wrk->crtc = NULL; } spin_unlock_irq(&adev->dm.crc_rd_wrk->crc_rd_work_lock); @@ -439,14 +461,9 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) enum amdgpu_dm_pipe_crc_source cur_crc_src; struct amdgpu_crtc *acrtc = NULL; struct amdgpu_device *adev = NULL; - struct crc_rd_work *crc_rd_wrk = NULL; - struct crc_params *crc_window = NULL, tmp_window; + struct crc_rd_work *crc_rd_wrk; + struct crc_fw_work *crc_fw_wrk; unsigned long flags1, flags2; - struct crtc_position position; - uint32_t v_blank; - uint32_t v_back_porch; - uint32_t crc_window_latch_up_line; - struct dc_crtc_timing *timing_out; if (crtc == NULL) return; @@ -458,74 +475,54 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) spin_lock_irqsave(&drm_dev->event_lock, flags1); stream_state = acrtc->dm_irq_params.stream; cur_crc_src = acrtc->dm_irq_params.crc_src; - timing_out = &stream_state->timing; /* Early return if CRC capture is not enabled. */ if (!amdgpu_dm_is_valid_crc_source(cur_crc_src)) goto cleanup; - if (dm_is_crc_source_crtc(cur_crc_src)) { - if (acrtc->dm_irq_params.crc_window.activated) { - if (acrtc->dm_irq_params.crc_window.update_win) { - if (acrtc->dm_irq_params.crc_window.skip_frame_cnt) { - acrtc->dm_irq_params.crc_window.skip_frame_cnt -= 1; - goto cleanup; - } - crc_window = &tmp_window; - - tmp_window.windowa_x_start = - acrtc->dm_irq_params.crc_window.x_start; - tmp_window.windowa_y_start = - acrtc->dm_irq_params.crc_window.y_start; - tmp_window.windowa_x_end = - acrtc->dm_irq_params.crc_window.x_end; - tmp_window.windowa_y_end = - acrtc->dm_irq_params.crc_window.y_end; - tmp_window.windowb_x_start = - acrtc->dm_irq_params.crc_window.x_start; - tmp_window.windowb_y_start = - acrtc->dm_irq_params.crc_window.y_start; - tmp_window.windowb_x_end = - acrtc->dm_irq_params.crc_window.x_end; - tmp_window.windowb_y_end = - acrtc->dm_irq_params.crc_window.y_end; - - dc_stream_forward_dmcu_crc_window(stream_state->ctx->dc, - stream_state, crc_window); - - acrtc->dm_irq_params.crc_window.update_win = false; - - dc_stream_get_crtc_position(stream_state->ctx->dc, &stream_state, 1, - &position.vertical_count, - &position.nominal_vcount); - - v_blank = timing_out->v_total - timing_out->v_border_top - - timing_out->v_addressable - timing_out->v_border_bottom; - - v_back_porch = v_blank - timing_out->v_front_porch - - timing_out->v_sync_width; - - crc_window_latch_up_line = v_back_porch + timing_out->v_sync_width; - - /* take 3 lines margin*/ - if ((position.vertical_count + 3) >= crc_window_latch_up_line) - acrtc->dm_irq_params.crc_window.skip_frame_cnt = 1; - else - acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0; - } else { - if (acrtc->dm_irq_params.crc_window.skip_frame_cnt == 0) { - if (adev->dm.crc_rd_wrk) { - crc_rd_wrk = adev->dm.crc_rd_wrk; - spin_lock_irqsave(&crc_rd_wrk->crc_rd_work_lock, flags2); - crc_rd_wrk->phy_inst = - stream_state->link->link_enc_hw_inst; - spin_unlock_irqrestore(&crc_rd_wrk->crc_rd_work_lock, flags2); - schedule_work(&crc_rd_wrk->notify_ta_work); - } - } else { - acrtc->dm_irq_params.crc_window.skip_frame_cnt -= 1; - } - } + if (!dm_is_crc_source_crtc(cur_crc_src)) + goto cleanup; + + if (!acrtc->dm_irq_params.window_param.activated) + goto cleanup; + + if (acrtc->dm_irq_params.window_param.update_win) { + if (acrtc->dm_irq_params.window_param.skip_frame_cnt) { + acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1; + goto cleanup; + } + + /* prepare work for dmub to update ROI */ + crc_fw_wrk = kzalloc(sizeof(*crc_fw_wrk), GFP_ATOMIC); + if (!crc_fw_wrk) + goto cleanup; + + INIT_WORK(&crc_fw_wrk->forward_roi_work, amdgpu_dm_forward_crc_window); + crc_fw_wrk->dm = &adev->dm; + crc_fw_wrk->stream = stream_state; + crc_fw_wrk->rect.x = acrtc->dm_irq_params.window_param.x_start; + crc_fw_wrk->rect.y = acrtc->dm_irq_params.window_param.y_start; + crc_fw_wrk->rect.width = acrtc->dm_irq_params.window_param.x_end - + acrtc->dm_irq_params.window_param.x_start; + crc_fw_wrk->rect.height = acrtc->dm_irq_params.window_param.y_end - + acrtc->dm_irq_params.window_param.y_start; + schedule_work(&crc_fw_wrk->forward_roi_work); + + acrtc->dm_irq_params.window_param.update_win = false; + acrtc->dm_irq_params.window_param.skip_frame_cnt = 1; + + } else { + if (acrtc->dm_irq_params.window_param.skip_frame_cnt) { + acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1; + goto cleanup; + } + + if (adev->dm.crc_rd_wrk) { + crc_rd_wrk = adev->dm.crc_rd_wrk; + spin_lock_irqsave(&crc_rd_wrk->crc_rd_work_lock, flags2); + crc_rd_wrk->phy_inst = stream_state->link->link_enc_hw_inst; + spin_unlock_irqrestore(&crc_rd_wrk->crc_rd_work_lock, flags2); + schedule_work(&crc_rd_wrk->notify_ta_work); } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index f07850db60a6..71bce608d751 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -40,7 +40,7 @@ enum amdgpu_dm_pipe_crc_source { }; #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY -struct crc_window_parm { +struct crc_window_param { uint16_t x_start; uint16_t y_start; uint16_t x_end; @@ -53,6 +53,7 @@ struct crc_window_parm { int skip_frame_cnt; }; +/* read_work for driver to call PSP to read */ struct crc_rd_work { struct work_struct notify_ta_work; /* To protect crc_rd_work carried fields*/ @@ -60,6 +61,15 @@ struct crc_rd_work { struct drm_crtc *crtc; uint8_t phy_inst; }; + +/* forward_work for driver to forward ROI to dmu */ +struct crc_fw_work { + struct work_struct forward_roi_work; + struct amdgpu_display_manager *dm; + struct dc_stream_state *stream; + struct rect rect; + bool is_stop_cmd; +}; #endif static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source source) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 64dd02970292..22125daf9dcf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -127,6 +127,9 @@ static void vblank_control_worker(struct work_struct *work) amdgpu_dm_psr_disable(vblank_work->stream); } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && !vblank_work->stream->link->psr_settings.psr_allow_active && +#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY + !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) && +#endif vblank_work->acrtc->dm_irq_params.allow_psr_entry) { amdgpu_dm_psr_enable(vblank_work->stream); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index ee242d9d8b06..461037a3dd75 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -38,6 +38,10 @@ #include "link_hwss.h" #include "dc/dc_dmub_srv.h" +#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY +#include "amdgpu_dm_psr.h" +#endif + struct dmub_debugfs_trace_header { uint32_t entry_count; uint32_t reserved[3]; @@ -299,6 +303,8 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, case LINK_RATE_HIGH2: case LINK_RATE_HIGH3: case LINK_RATE_UHBR10: + case LINK_RATE_UHBR13_5: + case LINK_RATE_UHBR20: break; default: valid_input = false; @@ -2633,6 +2639,25 @@ static int dp_mst_progress_status_show(struct seq_file *m, void *unused) return 0; } +/* + * Reports whether the connected display is a USB4 DPIA tunneled display + * Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link + */ +static int is_dpia_link_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct dc_link *link = aconnector->dc_link; + + if (connector->status != connector_status_connected) + return -ENODEV; + + seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" : + (link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown"); + + return 0; +} + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2644,6 +2669,7 @@ DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); +DEFINE_SHOW_ATTRIBUTE(is_dpia_link); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2788,7 +2814,8 @@ static const struct { {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, {"is_mst_connector", &dp_is_mst_connector_fops}, - {"mst_progress_status", &dp_mst_progress_status_fops} + {"mst_progress_status", &dp_mst_progress_status_fops}, + {"is_dpia_link", &is_dpia_link_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP @@ -3079,8 +3106,8 @@ static int crc_win_x_start_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.crc_window.x_start = (uint16_t) val; - acrtc->dm_irq_params.crc_window.update_win = false; + acrtc->dm_irq_params.window_param.x_start = (uint16_t) val; + acrtc->dm_irq_params.window_param.update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3096,7 +3123,7 @@ static int crc_win_x_start_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.crc_window.x_start; + *val = acrtc->dm_irq_params.window_param.x_start; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3116,8 +3143,8 @@ static int crc_win_y_start_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.crc_window.y_start = (uint16_t) val; - acrtc->dm_irq_params.crc_window.update_win = false; + acrtc->dm_irq_params.window_param.y_start = (uint16_t) val; + acrtc->dm_irq_params.window_param.update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3133,7 +3160,7 @@ static int crc_win_y_start_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.crc_window.y_start; + *val = acrtc->dm_irq_params.window_param.y_start; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3152,8 +3179,8 @@ static int crc_win_x_end_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.crc_window.x_end = (uint16_t) val; - acrtc->dm_irq_params.crc_window.update_win = false; + acrtc->dm_irq_params.window_param.x_end = (uint16_t) val; + acrtc->dm_irq_params.window_param.update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3169,7 +3196,7 @@ static int crc_win_x_end_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.crc_window.x_end; + *val = acrtc->dm_irq_params.window_param.x_end; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3188,8 +3215,8 @@ static int crc_win_y_end_set(void *data, u64 val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - acrtc->dm_irq_params.crc_window.y_end = (uint16_t) val; - acrtc->dm_irq_params.crc_window.update_win = false; + acrtc->dm_irq_params.window_param.y_end = (uint16_t) val; + acrtc->dm_irq_params.window_param.update_win = false; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3205,7 +3232,7 @@ static int crc_win_y_end_get(void *data, u64 *val) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); spin_lock_irq(&drm_dev->event_lock); - *val = acrtc->dm_irq_params.crc_window.y_end; + *val = acrtc->dm_irq_params.window_param.y_end; spin_unlock_irq(&drm_dev->event_lock); return 0; @@ -3228,31 +3255,38 @@ static int crc_win_update_set(void *data, u64 val) return 0; if (val) { + new_acrtc = to_amdgpu_crtc(new_crtc); + mutex_lock(&adev->dm.dc_lock); + /* PSR may write to OTG CRC window control register, + * so close it before starting secure_display. + */ + amdgpu_dm_psr_disable(new_acrtc->dm_irq_params.stream); + spin_lock_irq(&adev_to_drm(adev)->event_lock); spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock); if (crc_rd_wrk->crtc) { old_crtc = crc_rd_wrk->crtc; old_acrtc = to_amdgpu_crtc(old_crtc); } - new_acrtc = to_amdgpu_crtc(new_crtc); if (old_crtc && old_crtc != new_crtc) { - old_acrtc->dm_irq_params.crc_window.activated = false; - old_acrtc->dm_irq_params.crc_window.update_win = false; - old_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0; + old_acrtc->dm_irq_params.window_param.activated = false; + old_acrtc->dm_irq_params.window_param.update_win = false; + old_acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; - new_acrtc->dm_irq_params.crc_window.activated = true; - new_acrtc->dm_irq_params.crc_window.update_win = true; - new_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0; + new_acrtc->dm_irq_params.window_param.activated = true; + new_acrtc->dm_irq_params.window_param.update_win = true; + new_acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; crc_rd_wrk->crtc = new_crtc; } else { - new_acrtc->dm_irq_params.crc_window.activated = true; - new_acrtc->dm_irq_params.crc_window.update_win = true; - new_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0; + new_acrtc->dm_irq_params.window_param.activated = true; + new_acrtc->dm_irq_params.window_param.update_win = true; + new_acrtc->dm_irq_params.window_param.skip_frame_cnt = 0; crc_rd_wrk->crtc = new_crtc; } spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock); spin_unlock_irq(&adev_to_drm(adev)->event_lock); + mutex_unlock(&adev->dm.dc_lock); } return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 6202e31c7e3a..a7fd98f57f94 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -495,7 +495,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) link->dp.mst_enabled = config->mst_enabled; link->dp.usb4_enabled = config->usb4_enabled; display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; - link->adjust.auth_delay = 3; + link->adjust.auth_delay = 0; link->adjust.hdcp1.disable = 0; conn_state = aconnector->base.state; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index f0b01c8dc4a6..6994c9a1ed85 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -42,39 +42,6 @@ #include "dm_helpers.h" #include "ddc_service_types.h" -struct monitor_patch_info { - unsigned int manufacturer_id; - unsigned int product_id; - void (*patch_func)(struct dc_edid_caps *edid_caps, unsigned int param); - unsigned int patch_param; -}; -static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param); - -static const struct monitor_patch_info monitor_patch_table[] = { -{0x6D1E, 0x5BBF, set_max_dsc_bpp_limit, 15}, -{0x6D1E, 0x5B9A, set_max_dsc_bpp_limit, 15}, -}; - -static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param) -{ - if (edid_caps) - edid_caps->panel_patch.max_dsc_target_bpp_limit = param; -} - -static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps) -{ - int i, ret = 0; - - for (i = 0; i < ARRAY_SIZE(monitor_patch_table); i++) - if ((edid_caps->manufacturer_id == monitor_patch_table[i].manufacturer_id) - && (edid_caps->product_id == monitor_patch_table[i].product_id)) { - monitor_patch_table[i].patch_func(edid_caps, monitor_patch_table[i].patch_param); - ret++; - } - - return ret; -} - /* dm_helpers_parse_edid_caps * * Parse edid caps @@ -149,8 +116,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( kfree(sads); kfree(sadb); - amdgpu_dm_patch_edid_caps(edid_caps); - return result; } @@ -852,9 +817,8 @@ int dm_helper_dmub_aux_transfer_sync( struct aux_payload *payload, enum aux_return_code_type *operation_result) { - return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx, - link->link_index, (void *)payload, - (void *)operation_result); + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, + operation_result); } int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, @@ -862,9 +826,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, struct set_config_cmd_payload *payload, enum set_config_status *operation_result) { - return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx, - link->link_index, (void *)payload, - (void *)operation_result); + return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload, + operation_result); } void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks) @@ -1006,3 +969,11 @@ void dm_helpers_enable_periodic_detection(struct dc_context *ctx, bool enable) { /* TODO: add periodic detection implementation */ } + +void dm_helpers_dp_mst_update_branch_bandwidth( + struct dc_context *ctx, + struct dc_link *link) +{ + // TODO +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h index 79b5f9999fec..5c9303241aeb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h @@ -39,7 +39,7 @@ struct dm_irq_params { #ifdef CONFIG_DEBUG_FS enum amdgpu_dm_pipe_crc_source crc_src; #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY - struct crc_window_parm crc_window; + struct crc_window_param window_param; #endif #endif }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6483ba266893..1edf7385f8d8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -642,15 +642,18 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p int count, int k) { + struct drm_connector *drm_connector; int i; for (i = 0; i < count; i++) { + drm_connector = ¶ms[i].aconnector->base; + memset(¶ms[i].timing->dsc_cfg, 0, sizeof(params[i].timing->dsc_cfg)); if (vars[i + k].dsc_enabled && dc_dsc_compute_config( params[i].sink->ctx->dc->res_pool->dscs[0], ¶ms[i].sink->dsc_caps.dsc_dec_caps, params[i].sink->ctx->dc->debug.dsc_min_slice_height_override, - params[i].sink->edid_caps.panel_patch.max_dsc_target_bpp_limit, + drm_connector->display_info.max_dsc_bpp, 0, params[i].timing, ¶ms[i].timing->dsc_cfg)) { @@ -692,12 +695,16 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn) struct dc_dsc_config dsc_config; u64 kbps; + struct drm_connector *drm_connector = ¶m.aconnector->base; + uint32_t max_dsc_target_bpp_limit_override = + drm_connector->display_info.max_dsc_bpp; + kbps = div_u64((u64)pbn * 994 * 8 * 54, 64); dc_dsc_compute_config( param.sink->ctx->dc->res_pool->dscs[0], ¶m.sink->dsc_caps.dsc_dec_caps, param.sink->ctx->dc->debug.dsc_min_slice_height_override, - param.sink->edid_caps.panel_patch.max_dsc_target_bpp_limit, + max_dsc_target_bpp_limit_override, (int) kbps, param.timing, &dsc_config); return dsc_config.bits_per_pixel; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index e6854f7270a6..3c50b3ff7954 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1600,6 +1600,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, supported_rotations); + if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) && + plane->type != DRM_PLANE_TYPE_CURSOR) + drm_plane_enable_fb_damage_clips(plane); + drm_plane_helper_add(plane, &dm_plane_helper_funcs); #ifdef CONFIG_DRM_AMD_DC_HDR diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index d3bc9dc21771..0f580ea37576 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -37,6 +37,7 @@ #include <drm/drm_framebuffer.h> #include <drm/drm_encoder.h> #include <drm/drm_atomic.h> +#include "dcn10/dcn10_optc.h" #include "dc/inc/core_types.h" @@ -662,6 +663,69 @@ TRACE_EVENT(dcn_fpu, ) ); +TRACE_EVENT(dcn_optc_lock_unlock_state, + TP_PROTO(const struct optc *optc_state, int instance, bool lock, const char *function, const int line), + TP_ARGS(optc_state, instance, lock, function, line), + + TP_STRUCT__entry( + __field(const char *, function) + __field(int, instance) + __field(bool, lock) + __field(int, line) + __field(int, opp_count) + __field(int, max_h_total) + __field(int, max_v_total) + __field(int, min_h_blank) + __field(int, min_h_sync_width) + __field(int, min_v_sync_width) + __field(int, min_v_blank) + __field(int, min_v_blank_interlace) + __field(int, vstartup_start) + __field(int, vupdate_offset) + __field(int, vupdate_width) + __field(int, vready_offset) + ), + TP_fast_assign( + __entry->function = function; + __entry->instance = instance; + __entry->lock = lock; + __entry->line = line; + __entry->opp_count = optc_state->opp_count; + __entry->max_h_total = optc_state->max_h_total; + __entry->max_v_total = optc_state->max_v_total; + __entry->min_h_blank = optc_state->min_h_blank; + __entry->min_h_sync_width = optc_state->min_h_sync_width; + __entry->min_v_sync_width = optc_state->min_v_sync_width; + __entry->min_v_blank = optc_state->min_v_blank; + __entry->min_v_blank_interlace = optc_state->min_v_blank_interlace; + __entry->vstartup_start = optc_state->vstartup_start; + __entry->vupdate_offset = optc_state->vupdate_offset; + __entry->vupdate_width = optc_state->vupdate_width; + __entry->vready_offset = optc_state->vupdate_offset; + ), + TP_printk("%s: %s()+%d: optc_instance=%d opp_count=%d max_h_total=%d max_v_total=%d " + "min_h_blank=%d min_h_sync_width=%d min_v_sync_width=%d min_v_blank=%d " + "min_v_blank_interlace=%d vstartup_start=%d vupdate_offset=%d vupdate_width=%d " + "vready_offset=%d", + __entry->lock ? "Lock" : "Unlock", + __entry->function, + __entry->line, + __entry->instance, + __entry->opp_count, + __entry->max_h_total, + __entry->max_v_total, + __entry->min_h_blank, + __entry->min_h_sync_width, + __entry->min_v_sync_width, + __entry->min_v_blank, + __entry->min_v_blank_interlace, + __entry->vstartup_start, + __entry->vupdate_offset, + __entry->vupdate_width, + __entry->vready_offset + ) +); + #endif /* _AMDGPU_DM_TRACE_H_ */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index ab0c6d191038..1743ca0a3641 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -31,6 +31,8 @@ #elif defined(CONFIG_PPC64) #include <asm/switch_to.h> #include <asm/cputable.h> +#elif defined(CONFIG_ARM64) +#include <asm/neon.h> #endif /** @@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line) preempt_disable(); enable_kernel_fp(); } +#elif defined(CONFIG_ARM64) + kernel_neon_begin(); #endif } @@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line) disable_kernel_fp(); preempt_enable(); } +#elif defined(CONFIG_ARM64) + kernel_neon_end(); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 9b8ea6e9a2b9..a1a00f432168 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -138,7 +138,9 @@ static uint8_t get_number_of_objects(struct bios_parser *bp, uint32_t offset) uint32_t object_table_offset = bp->object_info_tbl_offset + offset; - table = GET_IMAGE(ATOM_OBJECT_TABLE, object_table_offset); + table = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base, + object_table_offset, + struct_size(table, asObjects, 1))); if (!table) return 0; @@ -166,8 +168,9 @@ static struct graphics_object_id bios_parser_get_connector_id( uint32_t connector_table_offset = bp->object_info_tbl_offset + le16_to_cpu(bp->object_info_tbl.v1_1->usConnectorObjectTableOffset); - ATOM_OBJECT_TABLE *tbl = - GET_IMAGE(ATOM_OBJECT_TABLE, connector_table_offset); + ATOM_OBJECT_TABLE *tbl = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base, + connector_table_offset, + struct_size(tbl, asObjects, 1))); if (!tbl) { dm_error("Can't get connector table from atom bios.\n"); @@ -662,8 +665,9 @@ static enum bp_result get_ss_info_v3_1( if (!DATA_TABLES(ASIC_InternalSS_Info)) return BP_RESULT_UNSUPPORTED; - ss_table_header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V3, - DATA_TABLES(ASIC_InternalSS_Info)); + ss_table_header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base, + DATA_TABLES(ASIC_InternalSS_Info), + struct_size(ss_table_header_include, asSpreadSpectrum, 1))); table_size = (le16_to_cpu(ss_table_header_include->sHeader.usStructureSize) - sizeof(ATOM_COMMON_TABLE_HEADER)) @@ -1029,8 +1033,10 @@ static enum bp_result get_ss_info_from_internal_ss_info_tbl_V2_1( if (!DATA_TABLES(ASIC_InternalSS_Info)) return result; - header = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V2, - DATA_TABLES(ASIC_InternalSS_Info)); + header = ((ATOM_ASIC_INTERNAL_SS_INFO_V2 *) bios_get_image( + &bp->base, + DATA_TABLES(ASIC_InternalSS_Info), + struct_size(header, asSpreadSpectrum, 1))); memset(info, 0, sizeof(struct spread_spectrum_info)); @@ -1709,8 +1715,10 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_v2_1( if (!DATA_TABLES(ASIC_InternalSS_Info)) return 0; - header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V2, - DATA_TABLES(ASIC_InternalSS_Info)); + header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V2 *) bios_get_image( + &bp->base, + DATA_TABLES(ASIC_InternalSS_Info), + struct_size(header_include, asSpreadSpectrum, 1))); size = (le16_to_cpu(header_include->sHeader.usStructureSize) - sizeof(ATOM_COMMON_TABLE_HEADER)) @@ -1746,8 +1754,9 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_V3_1( if (!DATA_TABLES(ASIC_InternalSS_Info)) return number; - header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V3, - DATA_TABLES(ASIC_InternalSS_Info)); + header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base, + DATA_TABLES(ASIC_InternalSS_Info), + struct_size(header_include, asSpreadSpectrum, 1))); size = (le16_to_cpu(header_include->sHeader.usStructureSize) - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3); @@ -1789,11 +1798,13 @@ static enum bp_result bios_parser_get_gpio_pin_info( if (!DATA_TABLES(GPIO_Pin_LUT)) return BP_RESULT_BADBIOSTABLE; - header = GET_IMAGE(ATOM_GPIO_PIN_LUT, DATA_TABLES(GPIO_Pin_LUT)); + header = ((ATOM_GPIO_PIN_LUT *) bios_get_image(&bp->base, + DATA_TABLES(GPIO_Pin_LUT), + struct_size(header, asGPIO_Pin, 1))); if (!header) return BP_RESULT_BADBIOSTABLE; - if (sizeof(ATOM_COMMON_TABLE_HEADER) + sizeof(ATOM_GPIO_PIN_LUT) + if (sizeof(ATOM_COMMON_TABLE_HEADER) + struct_size(header, asGPIO_Pin, 1) > le16_to_cpu(header->sHeader.usStructureSize)) return BP_RESULT_BADBIOSTABLE; @@ -1978,7 +1989,8 @@ static ATOM_OBJECT *get_bios_object(struct bios_parser *bp, offset += bp->object_info_tbl_offset; - tbl = GET_IMAGE(ATOM_OBJECT_TABLE, offset); + tbl = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base, offset, + struct_size(tbl, asObjects, 1))); if (!tbl) return NULL; @@ -2600,8 +2612,7 @@ static enum bp_result update_slot_layout_info( for (;;) { - record_header = (ATOM_COMMON_RECORD_HEADER *) - GET_IMAGE(ATOM_COMMON_RECORD_HEADER, record_offset); + record_header = GET_IMAGE(ATOM_COMMON_RECORD_HEADER, record_offset); if (record_header == NULL) { result = BP_RESULT_BADBIOSTABLE; break; @@ -2615,7 +2626,7 @@ static enum bp_result update_slot_layout_info( if (record_header->ucRecordType == ATOM_BRACKET_LAYOUT_RECORD_TYPE && - sizeof(ATOM_BRACKET_LAYOUT_RECORD) + struct_size(record, asConnInfo, 1) <= record_header->ucRecordSize) { record = (ATOM_BRACKET_LAYOUT_RECORD *) (record_header); @@ -2709,8 +2720,9 @@ static enum bp_result get_bracket_layout_record( genericTableOffset = bp->object_info_tbl_offset + bp->object_info_tbl.v1_3->usMiscObjectTableOffset; - object_table = (ATOM_OBJECT_TABLE *) - GET_IMAGE(ATOM_OBJECT_TABLE, genericTableOffset); + object_table = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base, + genericTableOffset, + struct_size(object_table, asObjects, 1))); if (!object_table) return BP_RESULT_FAILURE; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index e0c8d6f09bb4..074e70a5c458 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -462,6 +462,7 @@ static enum bp_result get_gpio_i2c_info( uint32_t count = 0; unsigned int table_index = 0; bool find_valid = false; + struct atom_gpio_pin_assignment *pin; if (!info) return BP_RESULT_BADINPUT; @@ -489,20 +490,17 @@ static enum bp_result get_gpio_i2c_info( - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + pin = (struct atom_gpio_pin_assignment *) header->gpio_pin; + for (table_index = 0; table_index < count; table_index++) { - if (((record->i2c_id & I2C_HW_CAP) == ( - header->gpio_pin[table_index].gpio_id & - I2C_HW_CAP)) && - ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) == - (header->gpio_pin[table_index].gpio_id & - I2C_HW_ENGINE_ID_MASK)) && - ((record->i2c_id & I2C_HW_LANE_MUX) == - (header->gpio_pin[table_index].gpio_id & - I2C_HW_LANE_MUX))) { + if (((record->i2c_id & I2C_HW_CAP) == (pin->gpio_id & I2C_HW_CAP)) && + ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) == (pin->gpio_id & I2C_HW_ENGINE_ID_MASK)) && + ((record->i2c_id & I2C_HW_LANE_MUX) == (pin->gpio_id & I2C_HW_LANE_MUX))) { /* still valid */ find_valid = true; break; } + pin = (struct atom_gpio_pin_assignment *)((uint8_t *)pin + sizeof(struct atom_gpio_pin_assignment)); } /* If we don't find the entry that we are looking for then diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h index 3e5df27aa96f..1ce19d875358 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h @@ -26,6 +26,8 @@ #ifndef DAL_DC_RN_CLK_MGR_VBIOS_SMU_H_ #define DAL_DC_RN_CLK_MGR_VBIOS_SMU_H_ +enum dcn_pwr_state; + int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr); int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz); int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr); @@ -33,7 +35,7 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz); void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz); -void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count); +void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, enum dcn_pwr_state); void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable); void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h index 8ea8ee57b39f..61bb1d86182e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT // This is a stripped-down version of the smu11_driver_if.h file for the relevant DAL interfaces. #define SMU11_DRIVER_IF_VERSION 0x40 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index c1eaf571407a..1c0569b1dc8f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -609,8 +609,10 @@ static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk } bw_params->vram_type = bios_info->memory_type; - bw_params->num_channels = bios_info->ma_channel_number; + bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4; + //bw_params->dram_channel_width_bytes = dc->ctx->asic_id.vram_width; + bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4; for (i = 0; i < WM_SET_COUNT; i++) { bw_params->wm_table.entries[i].wm_inst = i; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index 090b2c02aee1..0827c7df2855 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -333,8 +333,8 @@ void dcn31_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY)) support = DCN_ZSTATE_SUPPORT_DISALLOW; - - if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY) + if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY || + support == DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY) param = 1; else param = 0; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c index 2db595672a46..f47cfe6b42bd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c @@ -346,8 +346,6 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs if (!clk_mgr->smu_present) return; - // Arg[15:0] = 8/9/0 for Z8/Z9/disallow -> existing bits - // Arg[16] = Disallow Z9 -> new bit switch (support) { case DCN_ZSTATE_SUPPORT_ALLOW: @@ -366,6 +364,16 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs param = (1 << 10); break; + case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY: + msg_id = VBIOSSMC_MSG_AllowZstatesEntry; + param = (1 << 10) | (1 << 8); + break; + + case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY: + msg_id = VBIOSSMC_MSG_AllowZstatesEntry; + param = (1 << 8); + break; + default: //DCN_ZSTATE_SUPPORT_UNKNOWN msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = 0; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 893991a0eb97..07edd9777edf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -458,19 +458,6 @@ static void dcn315_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, dcn315_smu_transfer_dpm_table_smu_2_dram(clk_mgr); } -static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) -{ - uint32_t max = 0; - int i; - - for (i = 0; i < num_clocks; ++i) { - if (clocks[i] > max) - max = clocks[i]; - } - - return max; -} - static void dcn315_clk_mgr_helper_populate_bw_params( struct clk_mgr_internal *clk_mgr, struct integrated_info *bios_info, @@ -478,29 +465,21 @@ static void dcn315_clk_mgr_helper_populate_bw_params( { int i; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; - uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0; + uint32_t max_pstate = clock_table->NumDfPstatesEnabled - 1; struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - /* Find highest fclk pstate */ - for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { - if (clock_table->DfPstateTable[i].FClk > max_fclk) { - max_fclk = clock_table->DfPstateTable[i].FClk; - max_pstate = i; - } - } - /* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */ for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { int j; - uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; - for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { - if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i] - && clock_table->DfPstateTable[j].FClk < min_fclk) { - min_fclk = clock_table->DfPstateTable[j].FClk; - min_pstate = j; - } + /* DF table is sorted with clocks decreasing */ + for (j = clock_table->NumDfPstatesEnabled - 2; j >= 0; j--) { + if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) + max_pstate = j; } + /* Max DCFCLK should match up with max pstate */ + if (i == clock_table->NumDcfClkLevelsEnabled - 1) + max_pstate = 0; /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) @@ -511,9 +490,9 @@ static void dcn315_clk_mgr_helper_populate_bw_params( bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; /* Now update clocks we do read */ - bw_params->clk_table.entries[i].fclk_mhz = min_fclk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; + bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[max_pstate].FClk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->SocVoltage[i]; bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = clock_table->DispClocks[i]; @@ -521,25 +500,16 @@ static void dcn315_clk_mgr_helper_populate_bw_params( bw_params->clk_table.entries[i].wck_ratio = 1; } - /* Make sure to include at least one entry and highest pstate */ - if (max_pstate != min_pstate || i == 0) { - bw_params->clk_table.entries[i].fclk_mhz = max_fclk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; - bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS); + /* Make sure to include at least one entry */ + if (i == 0) { + bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[0].FClk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[0].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[0].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[0]; bw_params->clk_table.entries[i].wck_ratio = 1; i++; } - bw_params->clk_table.num_entries = i--; - - /* Make sure all highest clocks are included*/ - bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); - bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS); - bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS); - ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS)); - bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; - bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; - bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; + bw_params->clk_table.num_entries = i; /* Set any 0 clocks to max default setting. Not an issue for * power since we aren't doing switching in such case anyway @@ -565,6 +535,11 @@ static void dcn315_clk_mgr_helper_populate_bw_params( if (!bw_params->clk_table.entries[i].dtbclk_mhz) bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; } + + /* Make sure all highest default clocks are included*/ + ASSERT(bw_params->clk_table.entries[i-1].phyclk_mhz == def_max.phyclk_mhz); + ASSERT(bw_params->clk_table.entries[i-1].phyclk_d18_mhz == def_max.phyclk_d18_mhz); + ASSERT(bw_params->clk_table.entries[i-1].dtbclk_mhz == def_max.dtbclk_mhz); ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 187f5b27fdc8..3edc81e2d417 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -553,6 +553,7 @@ static void dcn316_clk_mgr_helper_populate_bw_params( bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; + bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4; for (i = 0; i < WM_SET_COUNT; i++) { bw_params->wm_table.entries[i].wm_inst = i; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 6f77d8e538ab..200fcec19186 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -233,41 +233,6 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) DC_FP_END(); } -static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, - struct dc_state *context, - int ref_dtbclk_khz) -{ - struct dccg *dccg = clk_mgr->dccg; - uint32_t tg_mask = 0; - int i; - - for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - struct dtbclk_dto_params dto_params = {0}; - - /* use mask to program DTO once per tg */ - if (pipe_ctx->stream_res.tg && - !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) { - tg_mask |= (1 << pipe_ctx->stream_res.tg->inst); - - dto_params.otg_inst = pipe_ctx->stream_res.tg->inst; - dto_params.ref_dtbclk_khz = ref_dtbclk_khz; - - if (is_dp_128b_132b_signal(pipe_ctx)) { - dto_params.pixclk_khz = pipe_ctx->stream->phy_pix_clk; - - if (pipe_ctx->stream_res.audio != NULL) - dto_params.req_audio_dtbclk_khz = 24000; - } - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - dto_params.is_hdmi = true; - - dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params); - //dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params); - } - } -} - /* Since DPPCLK request to PMFW needs to be exact (due to DPP DTO programming), * update DPPCLK to be the exact frequency that will be set after the DPPCLK * divider is updated. This will prevent rounding issues that could cause DPP @@ -438,7 +403,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, } if (!new_clocks->dtbclk_en) { - new_clocks->ref_dtbclk_khz = 0; + new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; } /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ @@ -447,8 +412,6 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, /* DCCG requires KHz precision for DTBCLK */ clk_mgr_base->clks.ref_dtbclk_khz = dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); - - dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); } if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h index d30fbbdd1792..d3d5a8caccf8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT // This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces. #define SMU13_DRIVER_IF_VERSION 0x18 diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 997ab031f816..0cb8d1f934d1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -135,9 +135,7 @@ static const char DC_BUILD_ID[] = "production-build"; * one or two (in the pipe-split case). */ -/******************************************************************************* - * Private functions - ******************************************************************************/ +/* Private functions */ static inline void elevate_update_type(enum surface_update_type *original, enum surface_update_type new) { @@ -401,9 +399,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, { int i; - if (memcmp(adjust, &stream->adjust, sizeof(struct dc_crtc_timing_adjust)) == 0) - return true; - stream->adjust.v_total_max = adjust->v_total_max; stream->adjust.v_total_mid = adjust->v_total_mid; stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num; @@ -424,18 +419,14 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, } /** - ***************************************************************************** - * Function: dc_stream_get_last_vrr_vtotal + * dc_stream_get_last_used_drr_vtotal - dc_stream_get_last_vrr_vtotal * - * @brief - * Looks up the pipe context of dc_stream_state and gets the - * last VTOTAL used by DRR (Dynamic Refresh Rate) + * @dc: [in] dc reference + * @stream: [in] Initial dc stream state + * @adjust: [in] Updated parameters for vertical_total_min and * - * @param [in] dc: dc reference - * @param [in] stream: Initial dc stream state - * @param [in] adjust: Updated parameters for vertical_total_min and - * vertical_total_max - ***************************************************************************** + * Looks up the pipe context of dc_stream_state and gets the last VTOTAL used + * by DRR (Dynamic Refresh Rate) */ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc, struct dc_stream_state *stream, @@ -491,86 +482,79 @@ bool dc_stream_get_crtc_position(struct dc *dc, } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) -bool dc_stream_forward_dmcu_crc_window(struct dc *dc, struct dc_stream_state *stream, - struct crc_params *crc_window) +static inline void +dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv, + struct rect *rect, struct otg_phy_mux *mux_mapping, bool is_stop) { - int i; - struct dmcu *dmcu = dc->res_pool->dmcu; - struct pipe_ctx *pipe; - struct crc_region tmp_win, *crc_win; - struct otg_phy_mux mapping_tmp, *mux_mapping; - - /*crc window can't be null*/ - if (!crc_window) - return false; - - if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu))) { - crc_win = &tmp_win; - mux_mapping = &mapping_tmp; - /*set crc window*/ - tmp_win.x_start = crc_window->windowa_x_start; - tmp_win.y_start = crc_window->windowa_y_start; - tmp_win.x_end = crc_window->windowa_x_end; - tmp_win.y_end = crc_window->windowa_y_end; - - for (i = 0; i < MAX_PIPES; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe) - break; - } - - /* Stream not found */ - if (i == MAX_PIPES) - return false; - + union dmub_rb_cmd cmd = {0}; - /*set mux routing info*/ - mapping_tmp.phy_output_num = stream->link->link_enc_hw_inst; - mapping_tmp.otg_output_num = pipe->stream_res.tg->inst; + cmd.secure_display.roi_info.phy_id = mux_mapping->phy_output_num; + cmd.secure_display.roi_info.otg_id = mux_mapping->otg_output_num; - dmcu->funcs->forward_crc_window(dmcu, crc_win, mux_mapping); + if (is_stop) { + cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY; + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE; } else { - DC_LOG_DC("dmcu is not initialized"); - return false; + cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY; + cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY; + cmd.secure_display.roi_info.x_start = rect->x; + cmd.secure_display.roi_info.y_start = rect->y; + cmd.secure_display.roi_info.x_end = rect->x + rect->width; + cmd.secure_display.roi_info.y_end = rect->y + rect->height; } - return true; + dc_dmub_srv_cmd_queue(dmub_srv, &cmd); + dc_dmub_srv_cmd_execute(dmub_srv); } -bool dc_stream_stop_dmcu_crc_win_update(struct dc *dc, struct dc_stream_state *stream) +static inline void +dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu, + struct rect *rect, struct otg_phy_mux *mux_mapping, bool is_stop) { - int i; - struct dmcu *dmcu = dc->res_pool->dmcu; - struct pipe_ctx *pipe; - struct otg_phy_mux mapping_tmp, *mux_mapping; + if (is_stop) + dmcu->funcs->stop_crc_win_update(dmcu, mux_mapping); + else + dmcu->funcs->forward_crc_window(dmcu, rect, mux_mapping); +} - if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu))) { - mux_mapping = &mapping_tmp; +bool +dc_stream_forward_crc_window(struct dc *dc, + struct rect *rect, struct dc_stream_state *stream, bool is_stop) +{ + struct dmcu *dmcu; + struct dc_dmub_srv *dmub_srv; + struct otg_phy_mux mux_mapping; + struct pipe_ctx *pipe; + int i; - for (i = 0; i < MAX_PIPES; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe) - break; - } + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe) + break; + } - /* Stream not found */ - if (i == MAX_PIPES) - return false; + /* Stream not found */ + if (i == MAX_PIPES) + return false; + mux_mapping.phy_output_num = stream->link->link_enc_hw_inst; + mux_mapping.otg_output_num = pipe->stream_res.tg->inst; - /*set mux routing info*/ - mapping_tmp.phy_output_num = stream->link->link_enc_hw_inst; - mapping_tmp.otg_output_num = pipe->stream_res.tg->inst; + dmcu = dc->res_pool->dmcu; + dmub_srv = dc->ctx->dmub_srv; - dmcu->funcs->stop_crc_win_update(dmcu, mux_mapping); - } else { - DC_LOG_DC("dmcu is not initialized"); + /* forward to dmub */ + if (dmub_srv) + dc_stream_forward_dmub_crc_window(dmub_srv, rect, &mux_mapping, is_stop); + /* forward to dmcu */ + else if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) + dc_stream_forward_dmcu_crc_window(dmcu, rect, &mux_mapping, is_stop); + else return false; - } return true; } -#endif +#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */ /** * dc_stream_configure_crc() - Configure CRC capture for the given stream. @@ -1070,6 +1054,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) int i, j; struct dc_state *dangling_context = dc_create_state(dc); struct dc_state *current_ctx; + struct pipe_ctx *pipe; + struct timing_generator *tg; if (dangling_context == NULL) return; @@ -1112,6 +1098,18 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) } if (should_disable && old_stream) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + /* When disabling plane for a phantom pipe, we must turn on the + * phantom OTG so the disable programming gets the double buffer + * update. Otherwise the pipe will be left in a partially disabled + * state that can result in underflow or hang when enabling it + * again for different use. + */ + if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (tg->funcs->enable_crtc) + tg->funcs->enable_crtc(tg); + } dc_rem_all_planes_for_stream(dc, old_stream, dangling_context); disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); @@ -1127,6 +1125,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) dc->hwss.interdependent_update_lock(dc, dc->current_state, false); dc->hwss.post_unlock_program_front_end(dc, dangling_context); } + /* We need to put the phantom OTG back into it's default (disabled) state or we + * can get corruption when transition from one SubVP config to a different one. + * The OTG is set to disable on falling edge of VUPDATE so the plane disable + * will still get it's double buffer update. + */ + if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (tg->funcs->disable_phantom_crtc) + tg->funcs->disable_phantom_crtc(tg); + } } } @@ -1219,9 +1226,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) PERF_TRACE(); } -/******************************************************************************* - * Public functions - ******************************************************************************/ +/* Public functions */ struct dc *dc_create(const struct dc_init_data *init_params) { @@ -1488,17 +1493,19 @@ static void program_timing_sync( } } -static bool context_changed( - struct dc *dc, - struct dc_state *context) +static bool streams_changed(struct dc *dc, + struct dc_stream_state *streams[], + uint8_t stream_count) { uint8_t i; - if (context->stream_count != dc->current_state->stream_count) + if (stream_count != dc->current_state->stream_count) return true; for (i = 0; i < dc->current_state->stream_count; i++) { - if (dc->current_state->streams[i] != context->streams[i]) + if (dc->current_state->streams[i] != streams[i]) + return true; + if (!streams[i]->link->link_state_valid) return true; } @@ -1549,6 +1556,9 @@ bool dc_validate_boot_timing(const struct dc *dc, if (tg_inst >= dc->res_pool->timing_generator_count) return false; + if (tg_inst != link->link_enc->preferred_engine) + return false; + tg = dc->res_pool->timing_generators[tg_inst]; if (!tg->funcs->get_hw_timing) @@ -1722,8 +1732,13 @@ void dc_z10_save_init(struct dc *dc) dc->hwss.z10_save_init(dc); } -/* - * Applies given context to HW and copy it into current context. +/** + * dc_commit_state_no_check - Apply context to the hardware + * + * @dc: DC object with the current status to be updated + * @context: New state that will become the current status at the end of this function + * + * Applies given context to the hardware and copy it into current context. * It's up to the user to release the src context afterwards. */ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *context) @@ -1760,6 +1775,12 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c context->stream_count == 0) dc->hwss.prepare_bandwidth(dc, context); + /* When SubVP is active, all HW programming must be done while + * SubVP lock is acquired + */ + if (dc->hwss.subvp_pipe_control_lock) + dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use); + if (dc->debug.enable_double_buffered_dsc_pg_support) dc->hwss.update_dsc_pg(dc, context, false); @@ -1787,9 +1808,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe); } - if (dc->hwss.subvp_pipe_control_lock) - dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use); - result = dc->hwss.apply_ctx_to_hw(dc, context); if (result != DC_OK) { @@ -1888,12 +1906,108 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c return result; } +/** + * dc_commit_streams - Commit current stream state + * + * @dc: DC object with the commit state to be configured in the hardware + * @streams: Array with a list of stream state + * @stream_count: Total of streams + * + * Function responsible for commit streams change to the hardware. + * + * Return: + * Return DC_OK if everything work as expected, otherwise, return a dc_status + * code. + */ +enum dc_status dc_commit_streams(struct dc *dc, + struct dc_stream_state *streams[], + uint8_t stream_count) +{ + int i, j; + struct dc_state *context; + enum dc_status res = DC_OK; + struct dc_validation_set set[MAX_STREAMS] = {0}; + + if (dc->ctx->dce_environment == DCE_ENV_VIRTUAL_HW) + return res; + + if (!streams_changed(dc, streams, stream_count)) + return res; + + DC_LOG_DC("%s: %d streams\n", __func__, stream_count); + + for (i = 0; i < stream_count; i++) { + struct dc_stream_state *stream = streams[i]; + struct dc_stream_status *status = dc_stream_get_status(stream); + + dc_stream_log(dc, stream); + + set[i].stream = stream; + + if (status) { + set[i].plane_count = status->plane_count; + for (j = 0; j < status->plane_count; j++) + set[i].plane_states[j] = status->plane_states[j]; + } + } + + context = dc_create_state(dc); + if (!context) + goto context_alloc_fail; + + dc_resource_state_copy_construct_current(dc, context); + + res = dc_validate_with_context(dc, set, stream_count, context, false); + if (res != DC_OK) { + BREAK_TO_DEBUGGER(); + goto fail; + } + + res = dc_commit_state_no_check(dc, context); + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < context->stream_count; j++) { + if (streams[i]->stream_id == context->streams[j]->stream_id) + streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst; + + if (dc_is_embedded_signal(streams[i]->signal)) { + struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]); + + if (dc->hwss.is_abm_supported) + status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]); + else + status->is_abm_supported = true; + } + } + } + +fail: + dc_release_state(context); + +context_alloc_fail: + + DC_LOG_DC("%s Finished.\n", __func__); + + return res; +} + +/* TODO: When the transition to the new commit sequence is done, remove this + * function in favor of dc_commit_streams. */ bool dc_commit_state(struct dc *dc, struct dc_state *context) { enum dc_status result = DC_ERROR_UNEXPECTED; int i; - if (!context_changed(dc, context)) + /* TODO: Since change commit sequence can have a huge impact, + * we decided to only enable it for DCN3x. However, as soon as + * we get more confident about this change we'll need to enable + * the new sequence for all ASICs. */ + if (dc->ctx->dce_version >= DCN_VERSION_3_2) { + result = dc_commit_streams(dc, context->streams, context->stream_count); + return result == DC_OK; + } + + if (!streams_changed(dc, context->streams, context->stream_count)) return DC_OK; DC_LOG_DC("%s: %d streams\n", @@ -2950,7 +3064,7 @@ static bool update_planes_and_stream_state(struct dc *dc, * Ensures that we have enough pipes for newly added MPO planes */ if (dc->res_pool->funcs->remove_phantom_pipes) - dc->res_pool->funcs->remove_phantom_pipes(dc, context); + dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); /*remove old surfaces from context */ if (!dc_rem_all_planes_for_stream(dc, stream, context)) { @@ -2987,6 +3101,19 @@ static bool update_planes_and_stream_state(struct dc *dc, if (update_type == UPDATE_TYPE_FULL) { if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + /* For phantom pipes we remove and create a new set of phantom pipes + * for each full update (because we don't know if we'll need phantom + * pipes until after the first round of validation). However, if validation + * fails we need to keep the existing phantom pipes (because we don't update + * the dc->current_state). + * + * The phantom stream/plane refcount is decremented for validation because + * we assume it'll be removed (the free comes when the dc_state is freed), + * but if validation fails we have to increment back the refcount so it's + * consistent. + */ + if (dc->res_pool->funcs->retain_phantom_pipes) + dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state); BREAK_TO_DEBUGGER(); goto fail; } @@ -3297,22 +3424,6 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true); } - if (update_type != UPDATE_TYPE_FAST) { - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - - if ((new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) || - subvp_prev_use) { - // If old context or new context has phantom pipes, apply - // the phantom timings now. We can't change the phantom - // pipe configuration safely without driver acquiring - // the DMCUB lock first. - dc->hwss.apply_ctx_to_hw(dc, context); - break; - } - } - } - dc_dmub_update_dirty_rect(dc, surface_count, stream, srf_updates, context); if (update_type != UPDATE_TYPE_FAST) { @@ -3370,6 +3481,24 @@ static void commit_planes_for_stream(struct dc *dc, return; } + if (update_type != UPDATE_TYPE_FAST) { + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP && + pipe_ctx->stream && pipe_ctx->plane_state) { + /* Only update visual confirm for SUBVP here. + * The bar appears on all pipes, so we need to update the bar on all displays, + * so the information doesn't get stale. + */ + struct mpcc_blnd_cfg blnd_cfg = { 0 }; + + dc->hwss.update_visual_confirm_color(dc, pipe_ctx, &blnd_cfg.black_color, + pipe_ctx->plane_res.hubp->inst); + } + } + } + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { for (i = 0; i < surface_count; i++) { struct dc_plane_state *plane_state = srf_updates[i].surface; @@ -3487,7 +3616,6 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.update_plane_addr(dc, pipe_ctx); } } - } if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { @@ -3524,6 +3652,44 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg); } + /* For phantom pipe OTG enable, it has to be done after any previous pipe + * that was in use has already been programmed at gotten its double buffer + * update for "disable". + */ + if (update_type != UPDATE_TYPE_FAST) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + /* If an active, non-phantom pipe is being transitioned into a phantom + * pipe, wait for the double buffer update to complete first before we do + * ANY phantom pipe programming. + */ + if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM && + old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + old_pipe->stream_res.tg->funcs->wait_for_state( + old_pipe->stream_res.tg, + CRTC_STATE_VBLANK); + old_pipe->stream_res.tg->funcs->wait_for_state( + old_pipe->stream_res.tg, + CRTC_STATE_VACTIVE); + } + } + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + + if ((new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) || + subvp_prev_use) { + // If old context or new context has phantom pipes, apply + // the phantom timings now. We can't change the phantom + // pipe configuration safely without driver acquiring + // the DMCUB lock first. + dc->hwss.apply_ctx_to_hw(dc, context); + break; + } + } + } + if (update_type != UPDATE_TYPE_FAST) dc->hwss.post_unlock_program_front_end(dc, context); if (update_type != UPDATE_TYPE_FAST) @@ -3563,10 +3729,24 @@ static void commit_planes_for_stream(struct dc *dc, } } -/* Determines if the incoming context requires a applying transition state with unnecessary - * pipe splitting and ODM disabled, due to hardware limitations. In a case where - * the OPP associated with an MPCC might change due to plane additions, this function +/** + * could_mpcc_tree_change_for_active_pipes - Check if an OPP associated with MPCC might change + * + * @dc: Used to get the current state status + * @stream: Target stream, which we want to remove the attached planes + * @surface_count: Number of surface update + * @is_plane_addition: [in] Fill out with true if it is a plane addition case + * + * DCN32x and newer support a feature named Dynamic ODM which can conflict with + * the MPO if used simultaneously in some specific configurations (e.g., + * 4k@144). This function checks if the incoming context requires applying a + * transition state with unnecessary pipe splitting and ODM disabled to + * circumvent our hardware limitations to prevent this edge case. If the OPP + * associated with an MPCC might change due to plane additions, this function * returns true. + * + * Return: + * Return true if OPP and MPCC might change, otherwise, return false. */ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, struct dc_stream_state *stream, @@ -3576,6 +3756,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, struct dc_stream_status *cur_stream_status = stream_get_status(dc->current_state, stream); bool force_minimal_pipe_splitting = false; + bool subvp_active = false; uint32_t i; *is_plane_addition = false; @@ -3608,39 +3789,55 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, } } - /* For SubVP pipe split case when adding MPO video - * we need to add a minimal transition. In this case - * there will be 2 streams (1 main stream, 1 phantom - * stream). - */ - if (cur_stream_status && - dc->current_state->stream_count == 2 && - stream->mall_stream_config.type == SUBVP_MAIN) { - bool is_pipe_split = false; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream && - (dc->current_state->res_ctx.pipe_ctx[i].bottom_pipe || - dc->current_state->res_ctx.pipe_ctx[i].next_odm_pipe)) { - is_pipe_split = true; - break; - } + if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + subvp_active = true; + break; } + } + /* For SubVP when adding or removing planes we need to add a minimal transition + * (even when disabling all planes). Whenever disabling a phantom pipe, we + * must use the minimal transition path to disable the pipe correctly. + * + * We want to use the minimal transition whenever subvp is active, not only if + * a plane is being added / removed from a subvp stream (MPO plane can be added + * to a DRR pipe of SubVP + DRR config, in which case we still want to run through + * a min transition to disable subvp. + */ + if (cur_stream_status && subvp_active) { /* determine if minimal transition is required due to SubVP*/ - if (surface_count > 0 && is_pipe_split) { - if (cur_stream_status->plane_count > surface_count) { - force_minimal_pipe_splitting = true; - } else if (cur_stream_status->plane_count < surface_count) { - force_minimal_pipe_splitting = true; - *is_plane_addition = true; - } + if (cur_stream_status->plane_count > surface_count) { + force_minimal_pipe_splitting = true; + } else if (cur_stream_status->plane_count < surface_count) { + force_minimal_pipe_splitting = true; + *is_plane_addition = true; } } return force_minimal_pipe_splitting; } +/** + * commit_minimal_transition_state - Create a transition pipe split state + * + * @dc: Used to get the current state status + * @transition_base_context: New transition state + * + * In some specific configurations, such as pipe split on multi-display with + * MPO and/or Dynamic ODM, removing a plane may cause unsupported pipe + * programming when moving to new planes. To mitigate those types of problems, + * this function adds a transition state that minimizes pipe usage before + * programming the new configuration. When adding a new plane, the current + * state requires the least pipes, so it is applied without splitting. When + * removing a plane, the new state requires the least pipes, so it is applied + * without splitting. + * + * Return: + * Return false if something is wrong in the transition state. + */ static bool commit_minimal_transition_state(struct dc *dc, struct dc_state *transition_base_context) { @@ -3650,9 +3847,48 @@ static bool commit_minimal_transition_state(struct dc *dc, bool temp_subvp_policy; enum dc_status ret = DC_ERROR_UNEXPECTED; unsigned int i, j; + unsigned int pipe_in_use = 0; + bool subvp_in_use = false; if (!transition_context) return false; + /* Setup: + * Store the current ODM and MPC config in some temp variables to be + * restored after we commit the transition state. + */ + + /* check current pipes in use*/ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &transition_base_context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) + pipe_in_use++; + } + + /* If SubVP is enabled and we are adding or removing planes from any main subvp + * pipe, we must use the minimal transition. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + subvp_in_use = true; + break; + } + } + + /* When the OS add a new surface if we have been used all of pipes with odm combine + * and mpc split feature, it need use commit_minimal_transition_state to transition safely. + * After OS exit MPO, it will back to use odm and mpc split with all of pipes, we need + * call it again. Otherwise return true to skip. + * + * Reduce the scenarios to use dc_commit_state_no_check in the stage of flip. Especially + * enter/exit MPO when DCN still have enough resources. + */ + if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use) { + dc_release_state(transition_context); + return true; + } if (!dc->config.is_vmin_only_asic) { tmp_mpc_policy = dc->debug.pipe_split_policy; @@ -3667,7 +3903,7 @@ static bool commit_minimal_transition_state(struct dc *dc, dc_resource_state_copy_construct(transition_base_context, transition_context); - //commit minimal state + /* commit minimal state */ if (dc->res_pool->funcs->validate_bandwidth(dc, transition_context, false)) { for (i = 0; i < transition_context->stream_count; i++) { struct dc_stream_status *stream_status = &transition_context->stream_status[i]; @@ -3685,10 +3921,12 @@ static bool commit_minimal_transition_state(struct dc *dc, ret = dc_commit_state_no_check(dc, transition_context); } - /*always release as dc_commit_state_no_check retains in good case*/ + /* always release as dc_commit_state_no_check retains in good case */ dc_release_state(transition_context); - /*restore previous pipe split and odm policy*/ + /* TearDown: + * Restore original configuration for ODM and MPO. + */ if (!dc->config.is_vmin_only_asic) dc->debug.pipe_split_policy = tmp_mpc_policy; @@ -3696,12 +3934,12 @@ static bool commit_minimal_transition_state(struct dc *dc, dc->debug.force_disable_subvp = temp_subvp_policy; if (ret != DC_OK) { - /*this should never happen*/ + /* this should never happen */ BREAK_TO_DEBUGGER(); return false; } - /*force full surface update*/ + /* force full surface update */ for (i = 0; i < dc->current_state->stream_count; i++) { for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) { dc->current_state->stream_status[i].plane_states[j]->update_flags.raw = 0xFFFFFFFF; @@ -3719,6 +3957,7 @@ bool dc_update_planes_and_stream(struct dc *dc, struct dc_state *context; enum surface_update_type update_type; int i; + struct mall_temp_config mall_temp_config; /* In cases where MPO and split or ODM are used transitions can * cause underflow. Apply stream configuration with minimal pipe @@ -3750,11 +3989,29 @@ bool dc_update_planes_and_stream(struct dc *dc, /* on plane removal, minimal state is the new one */ if (force_minimal_pipe_splitting && !is_plane_addition) { + /* Since all phantom pipes are removed in full validation, + * we have to save and restore the subvp/mall config when + * we do a minimal transition since the flags marking the + * pipe as subvp/phantom will be cleared (dc copy constructor + * creates a shallow copy). + */ + if (dc->res_pool->funcs->save_mall_state) + dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); if (!commit_minimal_transition_state(dc, context)) { dc_release_state(context); return false; } - + if (dc->res_pool->funcs->restore_mall_state) + dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); + + /* If we do a minimal transition with plane removal and the context + * has subvp we also have to retain back the phantom stream / planes + * since the refcount is decremented as part of the min transition + * (we commit a state with no subvp, so the phantom streams / planes + * had to be removed). + */ + if (dc->res_pool->funcs->retain_phantom_pipes) + dc->res_pool->funcs->retain_phantom_pipes(dc, context); update_type = UPDATE_TYPE_FULL; } @@ -3806,6 +4063,18 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_context *dc_ctx = dc->ctx; int i, j; + /* TODO: Since change commit sequence can have a huge impact, + * we decided to only enable it for DCN3x. However, as soon as + * we get more confident about this change we'll need to enable + * the new sequence for all ASICs. + */ + if (dc->ctx->dce_version >= DCN_VERSION_3_2) { + dc_update_planes_and_stream(dc, srf_updates, + surface_count, stream, + stream_update); + return; + } + stream_status = dc_stream_get_status(stream); context = dc->current_state; @@ -4387,21 +4656,17 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) dc->current_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down = true; } -/* - ***************************************************************************** - * Function: dc_is_dmub_outbox_supported - +/** + * dc_is_dmub_outbox_supported - Check if DMUB firmware support outbox notification * - * @brief - * Checks whether DMUB FW supports outbox notifications, if supported - * DM should register outbox interrupt prior to actually enabling interrupts - * via dc_enable_dmub_outbox + * @dc: [in] dc structure * - * @param - * [in] dc: dc structure + * Checks whether DMUB FW supports outbox notifications, if supported DM + * should register outbox interrupt prior to actually enabling interrupts + * via dc_enable_dmub_outbox * - * @return - * True if DMUB FW supports outbox notifications, False otherwise - ***************************************************************************** + * Return: + * True if DMUB FW supports outbox notifications, False otherwise */ bool dc_is_dmub_outbox_supported(struct dc *dc) { @@ -4419,21 +4684,17 @@ bool dc_is_dmub_outbox_supported(struct dc *dc) return dc->debug.enable_dmub_aux_for_legacy_ddc; } -/* - ***************************************************************************** - * Function: dc_enable_dmub_notifications +/** + * dc_enable_dmub_notifications - Check if dmub fw supports outbox * - * @brief - * Calls dc_is_dmub_outbox_supported to check if dmub fw supports outbox - * notifications. All DMs shall switch to dc_is_dmub_outbox_supported. - * This API shall be removed after switching. + * @dc: [in] dc structure * - * @param - * [in] dc: dc structure + * Calls dc_is_dmub_outbox_supported to check if dmub fw supports outbox + * notifications. All DMs shall switch to dc_is_dmub_outbox_supported. This + * API shall be removed after switching. * - * @return - * True if DMUB FW supports outbox notifications, False otherwise - ***************************************************************************** + * Return: + * True if DMUB FW supports outbox notifications, False otherwise */ bool dc_enable_dmub_notifications(struct dc *dc) { @@ -4441,18 +4702,11 @@ bool dc_enable_dmub_notifications(struct dc *dc) } /** - ***************************************************************************** - * Function: dc_enable_dmub_outbox + * dc_enable_dmub_outbox - Enables DMUB unsolicited notification * - * @brief - * Enables DMUB unsolicited notifications to x86 via outbox + * dc: [in] dc structure * - * @param - * [in] dc: dc structure - * - * @return - * None - ***************************************************************************** + * Enables DMUB unsolicited notifications to x86 via outbox. */ void dc_enable_dmub_outbox(struct dc *dc) { @@ -4553,21 +4807,17 @@ uint8_t get_link_index_from_dpia_port_index(const struct dc *dc, } /** - ***************************************************************************** - * Function: dc_process_dmub_set_config_async + * dc_process_dmub_set_config_async - Submits set_config command * - * @brief - * Submits set_config command to dmub via inbox message + * @dc: [in] dc structure + * @link_index: [in] link_index: link index + * @payload: [in] aux payload + * @notify: [out] set_config immediate reply * - * @param - * [in] dc: dc structure - * [in] link_index: link index - * [in] payload: aux payload - * [out] notify: set_config immediate reply + * Submits set_config command to dmub via inbox message. * - * @return - * True if successful, False if failure - ***************************************************************************** + * Return: + * True if successful, False if failure */ bool dc_process_dmub_set_config_async(struct dc *dc, uint32_t link_index, @@ -4603,21 +4853,17 @@ bool dc_process_dmub_set_config_async(struct dc *dc, } /** - ***************************************************************************** - * Function: dc_process_dmub_set_mst_slots + * dc_process_dmub_set_mst_slots - Submits MST solt allocation * - * @brief - * Submits mst slot allocation command to dmub via inbox message + * @dc: [in] dc structure + * @link_index: [in] link index + * @mst_alloc_slots: [in] mst slots to be allotted + * @mst_slots_in_use: [out] mst slots in use returned in failure case * - * @param - * [in] dc: dc structure - * [in] link_index: link index - * [in] mst_alloc_slots: mst slots to be allotted - * [out] mst_slots_in_use: mst slots in use returned in failure case + * Submits mst slot allocation command to dmub via inbox message * - * @return - * DC_OK if successful, DC_ERROR if failure - ***************************************************************************** + * Return: + * DC_OK if successful, DC_ERROR if failure */ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, uint32_t link_index, @@ -4657,19 +4903,12 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, } /** - ***************************************************************************** - * Function: dc_process_dmub_dpia_hpd_int_enable + * dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption * - * @brief - * Submits dpia hpd int enable command to dmub via inbox message + * @dc [in]: dc structure + * @hpd_int_enable [in]: 1 for hpd int enable, 0 to disable * - * @param - * [in] dc: dc structure - * [in] hpd_int_enable: 1 for hpd int enable, 0 to disable - * - * @return - * None - ***************************************************************************** + * Submits dpia hpd int enable command to dmub via inbox message */ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, uint32_t hpd_int_enable) @@ -4698,16 +4937,13 @@ void dc_disable_accelerated_mode(struct dc *dc) /** - ***************************************************************************** - * dc_notify_vsync_int_state() - notifies vsync enable/disable state + * dc_notify_vsync_int_state - notifies vsync enable/disable state * @dc: dc structure - * @stream: stream where vsync int state changed - * @enable: whether vsync is enabled or disabled - * - * Called when vsync is enabled/disabled - * Will notify DMUB to start/stop ABM interrupts after steady state is reached + * @stream: stream where vsync int state changed + * @enable: whether vsync is enabled or disabled * - ***************************************************************************** + * Called when vsync is enabled/disabled Will notify DMUB to start/stop ABM + * interrupts after steady state is reached. */ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bool enable) { @@ -4749,14 +4985,18 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause) pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst); } -/* - * dc_extended_blank_supported: Decide whether extended blank is supported + +/** + * dc_extended_blank_supported 0 Decide whether extended blank is supported + * + * @dc: [in] Current DC state * - * Extended blank is a freesync optimization feature to be enabled in the future. - * During the extra vblank period gained from freesync, we have the ability to enter z9/z10. + * Extended blank is a freesync optimization feature to be enabled in the + * future. During the extra vblank period gained from freesync, we have the + * ability to enter z9/z10. * - * @param [in] dc: Current DC state - * @return: Indicate whether extended blank is supported (true or false) + * Return: + * Indicate whether extended blank is supported (true or false) */ bool dc_extended_blank_supported(struct dc *dc) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 7c2e3b8dc26a..471078fc3900 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -366,6 +366,7 @@ void get_hdr_visual_confirm_color( struct tg_color *color) { uint32_t color_value = MAX_TG_COLOR_VALUE; + bool is_sdr = false; /* Determine the overscan color based on the top-most (desktop) plane's context */ struct pipe_ctx *top_pipe_ctx = pipe_ctx; @@ -382,7 +383,8 @@ void get_hdr_visual_confirm_color( /* FreeSync 2 ARGB2101010 - set border color to pink */ color->color_r_cr = color_value; color->color_b_cb = color_value; - } + } else + is_sdr = true; break; case PIXEL_FORMAT_FP16: if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) { @@ -391,14 +393,19 @@ void get_hdr_visual_confirm_color( } else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { /* FreeSync 2 HDR - set border color to green */ color->color_g_y = color_value; - } + } else + is_sdr = true; break; default: + is_sdr = true; + break; + } + + if (is_sdr) { /* SDR - set border color to Gray */ color->color_r_cr = color_value/2; color->color_b_cb = color_value/2; color->color_g_y = color_value/2; - break; } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index d7b1ace6328a..342e906ae26e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3378,7 +3378,7 @@ bool dc_link_setup_psr(struct dc_link *link, case FAMILY_YELLOW_CARP: case AMDGPU_FAMILY_GC_10_3_6: case AMDGPU_FAMILY_GC_11_0_1: - if (dc->debug.disable_z10) + if (dc->debug.disable_z10 || dc->debug.psr_skip_crtc_disable) psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true; break; default: @@ -4229,6 +4229,7 @@ static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pi link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp); dc->hwss.unblank_stream(pipe_ctx, &stream->link->cur_link_settings); + dc->hwss.enable_audio_stream(pipe_ctx); } void core_link_enable_stream( @@ -4308,10 +4309,7 @@ void core_link_enable_stream( /* Still enable stream features & audio on seamless boot for DP external displays */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT) { enable_stream_features(pipe_ctx); - if (pipe_ctx->stream_res.audio != NULL) { - pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc); - dc->hwss.enable_audio_stream(pipe_ctx); - } + dc->hwss.enable_audio_stream(pipe_ctx); } #if defined(CONFIG_DRM_AMD_DC_HDCP) @@ -4665,6 +4663,10 @@ void dc_link_set_preferred_training_settings(struct dc *dc, link->preferred_link_setting.link_rate = LINK_RATE_UNKNOWN; } + if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && + link->type == dc_connection_mst_branch) + dm_helpers_dp_mst_update_branch_bandwidth(dc->ctx, link); + /* Retrain now, or wait until next stream update to apply */ if (skip_immediate_retrain == false) dc_link_set_preferred_link_settings(dc, &link->preferred_link_setting, link); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 651231387043..ce8d6a54ca54 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -82,6 +82,7 @@ struct dp_hdmi_dongle_signature_data { #define HDMI_SCDC_STATUS_FLAGS 0x40 #define HDMI_SCDC_ERR_DETECT 0x50 #define HDMI_SCDC_TEST_CONFIG 0xC0 +#define HDMI_SCDC_DEVICE_ID 0xD3 union hdmi_scdc_update_read_data { uint8_t byte[2]; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 1254d38f1778..dedd1246ce58 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1912,7 +1912,7 @@ enum dc_status dpcd_configure_lttpr_mode(struct dc_link *link, struct link_train return status; } -static void dpcd_exit_training_mode(struct dc_link *link) +static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) { uint8_t sink_status = 0; uint8_t i; @@ -1920,12 +1920,14 @@ static void dpcd_exit_training_mode(struct dc_link *link) /* clear training pattern set */ dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); - /* poll for intra-hop disable */ - for (i = 0; i < 10; i++) { - if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) && - (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0) - break; - udelay(1000); + if (encoding == DP_128b_132b_ENCODING) { + /* poll for intra-hop disable */ + for (i = 0; i < 10; i++) { + if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) && + (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0) + break; + udelay(1000); + } } } @@ -2649,7 +2651,7 @@ enum link_training_result dc_link_dp_perform_link_training( <_settings); /* reset previous training states */ - dpcd_exit_training_mode(link); + dpcd_exit_training_mode(link, encoding); /* configure link prior to entering training mode */ dpcd_configure_lttpr_mode(link, <_settings); @@ -2670,7 +2672,7 @@ enum link_training_result dc_link_dp_perform_link_training( ASSERT(0); /* exit training mode */ - dpcd_exit_training_mode(link); + dpcd_exit_training_mode(link, encoding); /* switch to video idle */ if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) @@ -2771,8 +2773,11 @@ bool perform_link_training_with_retries( /* Update verified link settings to current one * Because DPIA LT might fallback to lower link setting. */ - link->verified_link_cap.link_rate = link->cur_link_settings.link_rate; - link->verified_link_cap.lane_count = link->cur_link_settings.lane_count; + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + link->verified_link_cap.link_rate = link->cur_link_settings.link_rate; + link->verified_link_cap.lane_count = link->cur_link_settings.lane_count; + dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link); + } } } else { status = dc_link_dp_perform_link_training(link, @@ -3020,7 +3025,7 @@ static enum dc_link_rate get_lttpr_max_link_rate(struct dc_link *link) static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link) { - enum dc_link_rate cable_max_link_rate = LINK_RATE_HIGH3; + enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN; if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) cable_max_link_rate = LINK_RATE_UHBR20; @@ -3083,15 +3088,29 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link) max_link_cap.link_spread = link->reported_link_cap.link_spread; - /* Lower link settings based on cable attributes */ + /* Lower link settings based on cable attributes + * Cable ID is a DP2 feature to identify max certified link rate that + * a cable can carry. The cable identification method requires both + * cable and display hardware support. Since the specs comes late, it is + * anticipated that the first round of DP2 cables and displays may not + * be fully compatible to reliably return cable ID data. Therefore the + * decision of our cable id policy is that if the cable can return non + * zero cable id data, we will take cable's link rate capability into + * account. However if we get zero data, the cable link rate capability + * is considered inconclusive. In this case, we will not take cable's + * capability into account to avoid of over limiting hardware capability + * from users. The max overall link rate capability is still determined + * after actual dp pre-training. Cable id is considered as an auxiliary + * method of determining max link bandwidth capability. + */ cable_max_link_rate = get_cable_max_link_rate(link); if (!link->dc->debug.ignore_cable_id && + cable_max_link_rate != LINK_RATE_UNKNOWN && cable_max_link_rate < max_link_cap.link_rate) max_link_cap.link_rate = cable_max_link_rate; - /* - * account for lttpr repeaters cap + /* account for lttpr repeaters cap * notes: repeaters do not snoop in the DPRX Capabilities addresses (3.6.3). */ if (dp_is_lttpr_present(link)) { @@ -4540,9 +4559,19 @@ void dc_link_dp_handle_link_loss(struct dc_link *link) for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) + if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off + && pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) { + // Always use max settings here for DP 1.4a LL Compliance CTS + if (link->is_automated) { + pipe_ctx->link_config.dp_link_settings.lane_count = + link->verified_link_cap.lane_count; + pipe_ctx->link_config.dp_link_settings.link_rate = + link->verified_link_cap.link_rate; + pipe_ctx->link_config.dp_link_settings.link_spread = + link->verified_link_cap.link_spread; + } core_link_enable_stream(link->dc->current_state, pipe_ctx); + } } } @@ -4583,6 +4612,8 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd } if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { + // Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC + link->is_automated = true; device_service_clear.bits.AUTOMATED_TEST = 1; core_link_write_dpcd( link, @@ -5031,7 +5062,7 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) return true; } -bool dp_retrieve_lttpr_cap(struct dc_link *link) +enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) { uint8_t lttpr_dpcd_data[8]; enum dc_status status = DC_ERROR_UNEXPECTED; @@ -5099,7 +5130,7 @@ bool dp_retrieve_lttpr_cap(struct dc_link *link) CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present); - return is_lttpr_present; + return status; } bool dp_is_lttpr_present(struct dc_link *link) @@ -5227,76 +5258,45 @@ static void retrieve_cable_id(struct dc_link *link) &link->dpcd_caps.cable_id, &usbc_cable_id); } -/* DPRX may take some time to respond to AUX messages after HPD asserted. - * If AUX read unsuccessful, try to wake unresponsive DPRX by toggling DPCD SET_POWER (0x600). - */ -static enum dc_status wa_try_to_wake_dprx(struct dc_link *link, uint64_t timeout_ms) +static enum dc_status wake_up_aux_channel(struct dc_link *link) { enum dc_status status = DC_ERROR_UNEXPECTED; - uint8_t dpcd_data = 0; - uint64_t start_ts = 0; - uint64_t current_ts = 0; - uint64_t time_taken_ms = 0; - enum dc_connection_type type = dc_connection_none; - bool lttpr_present; - bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; + uint32_t aux_channel_retry_cnt = 0; + uint8_t dpcd_power_state = '\0'; - lttpr_present = dp_is_lttpr_present(link) || - (!vbios_lttpr_interop || !link->dc->caps.extended_aux_timeout_support); - DC_LOG_DC("lttpr_present = %d.\n", lttpr_present ? 1 : 0); + while (status != DC_OK && aux_channel_retry_cnt < 10) { + status = core_link_read_dpcd(link, DP_SET_POWER, + &dpcd_power_state, sizeof(dpcd_power_state)); - /* Issue an AUX read to test DPRX responsiveness. If LTTPR is supported the first read is expected to - * be to determine LTTPR capabilities. Otherwise trying to read power state should be an innocuous AUX read. - */ - if (lttpr_present) - status = core_link_read_dpcd( - link, - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, - &dpcd_data, - sizeof(dpcd_data)); - else - status = core_link_read_dpcd( - link, - DP_SET_POWER, - &dpcd_data, - sizeof(dpcd_data)); + /* Delay 1 ms if AUX CH is in power down state. Based on spec + * section 2.3.1.2, if AUX CH may be powered down due to + * write to DPCD 600h = 2. Sink AUX CH is monitoring differential + * signal and may need up to 1 ms before being able to reply. + */ + if (status != DC_OK || dpcd_power_state == DP_SET_POWER_D3) { + udelay(1000); + aux_channel_retry_cnt++; + } + } if (status != DC_OK) { - DC_LOG_WARNING("%s: Read DPCD LTTPR_CAP failed - try to toggle DPCD SET_POWER for %lld ms.", - __func__, - timeout_ms); - start_ts = dm_get_timestamp(link->ctx); - - do { - if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) - break; - - dpcd_data = DP_SET_POWER_D3; - status = core_link_write_dpcd( - link, - DP_SET_POWER, - &dpcd_data, - sizeof(dpcd_data)); - - dpcd_data = DP_SET_POWER_D0; - status = core_link_write_dpcd( - link, - DP_SET_POWER, - &dpcd_data, - sizeof(dpcd_data)); - - current_ts = dm_get_timestamp(link->ctx); - time_taken_ms = div_u64(dm_get_elapse_time_in_ns(link->ctx, current_ts, start_ts), 1000000); - } while (status != DC_OK && time_taken_ms < timeout_ms); + dpcd_power_state = DP_SET_POWER_D0; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_power_state, + sizeof(dpcd_power_state)); - DC_LOG_WARNING("%s: DPCD SET_POWER %s after %lld ms%s", - __func__, - (status == DC_OK) ? "succeeded" : "failed", - time_taken_ms, - (type == dc_connection_none) ? ". Unplugged." : "."); + dpcd_power_state = DP_SET_POWER_D3; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_power_state, + sizeof(dpcd_power_state)); + return DC_ERROR_UNEXPECTED; } - return status; + return DC_OK; } static bool retrieve_link_cap(struct dc_link *link) @@ -5308,7 +5308,6 @@ static bool retrieve_link_cap(struct dc_link *link) /*Only need to read 1 byte starting from DP_DPRX_FEATURE_ENUMERATION_LIST. */ uint8_t dpcd_dprx_data = '\0'; - uint8_t dpcd_power_state = '\0'; struct dp_device_vendor_id sink_id; union down_stream_port_count down_strm_port_count; @@ -5316,11 +5315,9 @@ static bool retrieve_link_cap(struct dc_link *link) union dp_downstream_port_present ds_port = { 0 }; enum dc_status status = DC_ERROR_UNEXPECTED; uint32_t read_dpcd_retry_cnt = 3; - uint32_t aux_channel_retry_cnt = 0; int i; struct dp_sink_hw_fw_revision dp_hw_fw_revision; const uint32_t post_oui_delay = 30; // 30ms - bool is_lttpr_present = false; memset(dpcd_data, '\0', sizeof(dpcd_data)); memset(&down_strm_port_count, @@ -5335,51 +5332,17 @@ static bool retrieve_link_cap(struct dc_link *link) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); - /* Try to ensure AUX channel active before proceeding. */ - if (link->dc->debug.aux_wake_wa.bits.enable_wa) { - uint64_t timeout_ms = link->dc->debug.aux_wake_wa.bits.timeout_ms; + status = dp_retrieve_lttpr_cap(link); - if (link->dc->debug.aux_wake_wa.bits.use_default_timeout) - timeout_ms = LINK_AUX_WAKE_TIMEOUT_MS; - status = wa_try_to_wake_dprx(link, timeout_ms); - } - - while (status != DC_OK && aux_channel_retry_cnt < 10) { - status = core_link_read_dpcd(link, DP_SET_POWER, - &dpcd_power_state, sizeof(dpcd_power_state)); - - /* Delay 1 ms if AUX CH is in power down state. Based on spec - * section 2.3.1.2, if AUX CH may be powered down due to - * write to DPCD 600h = 2. Sink AUX CH is monitoring differential - * signal and may need up to 1 ms before being able to reply. - */ - if (status != DC_OK || dpcd_power_state == DP_SET_POWER_D3) { - udelay(1000); - aux_channel_retry_cnt++; - } - } - - /* If aux channel is not active, return false and trigger another detect*/ if (status != DC_OK) { - dpcd_power_state = DP_SET_POWER_D0; - status = core_link_write_dpcd( - link, - DP_SET_POWER, - &dpcd_power_state, - sizeof(dpcd_power_state)); - - dpcd_power_state = DP_SET_POWER_D3; - status = core_link_write_dpcd( - link, - DP_SET_POWER, - &dpcd_power_state, - sizeof(dpcd_power_state)); - return false; + status = wake_up_aux_channel(link); + if (status == DC_OK) + dp_retrieve_lttpr_cap(link); + else + return false; } - is_lttpr_present = dp_retrieve_lttpr_cap(link); - - if (is_lttpr_present) + if (dp_is_lttpr_present(link)) configure_lttpr_mode_transparent(link); /* Read DP tunneling information. */ @@ -5406,7 +5369,7 @@ static bool retrieve_link_cap(struct dc_link *link) return false; } - if (!is_lttpr_present) + if (!dp_is_lttpr_present(link)) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); { @@ -7294,6 +7257,7 @@ void dp_retrain_link_dp_test(struct dc_link *link, struct pipe_ctx *pipes = &link->dc->current_state->res_ctx.pipe_ctx[0]; unsigned int i; + bool do_fallback = false; for (i = 0; i < MAX_PIPES; i++) { @@ -7326,32 +7290,23 @@ void dp_retrain_link_dp_test(struct dc_link *link, memset(&link->cur_link_settings, 0, sizeof(link->cur_link_settings)); + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + do_fallback = true; + perform_link_training_with_retries( link_setting, skip_video_pattern, LINK_TRAINING_ATTEMPTS, &pipes[i], SIGNAL_TYPE_DISPLAY_PORT, - false); + do_fallback); link->dc->hwss.enable_stream(&pipes[i]); link->dc->hwss.unblank_stream(&pipes[i], link_setting); - if (pipes[i].stream_res.audio) { - /* notify audio driver for - * audio modes of monitor */ - pipes[i].stream_res.audio->funcs->az_enable( - pipes[i].stream_res.audio); - - /* un-mute audio */ - /* TODO: audio should be per stream rather than - * per link */ - pipes[i].stream_res.stream_enc->funcs-> - audio_mute_control( - pipes[i].stream_res.stream_enc, false); - } + link->dc->hwss.enable_audio_stream(&pipes[i]); } } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c index 74e36b34d3f7..d130d58ac08e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c @@ -791,10 +791,14 @@ static enum link_training_result dpia_training_eq_transparent( } if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && - dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status) && - dp_is_interlane_aligned(dpcd_lane_status_updated)) { - result = LINK_TRAINING_SUCCESS; - break; + dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status)) { + /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4 + * has to share encoders unlike DP and USBC + */ + if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) { + result = LINK_TRAINING_SUCCESS; + break; + } } /* Update VS/PE. */ @@ -1008,7 +1012,8 @@ enum link_training_result dc_link_dpia_perform_link_training( */ if (result == LINK_TRAINING_SUCCESS) { msleep(5); - result = dp_check_link_loss_status(link, <_settings); + if (!link->is_automated) + result = dp_check_link_loss_status(link, <_settings); } else if (result == LINK_TRAINING_ABORT) { dpia_training_abort(link, <_settings, repeater_id); } else { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index fd8db482e56f..da164685547d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1768,6 +1768,17 @@ bool dc_remove_plane_from_context( return true; } +/** + * dc_rem_all_planes_for_stream - Remove planes attached to the target stream. + * + * @dc: Current dc state. + * @stream: Target stream, which we want to remove the attached plans. + * @context: New context. + * + * Return: + * Return true if DC was able to remove all planes from the target + * stream, otherwise, return false. + */ bool dc_rem_all_planes_for_stream( const struct dc *dc, struct dc_stream_state *stream, @@ -2562,9 +2573,12 @@ enum dc_status resource_map_pool_resources( /** * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state - * Is a shallow copy. Increments refcounts on existing streams and planes. + * * @dc: copy out of dc->current_state * @dst_ctx: copy into this + * + * This function makes a shallow copy of the current DC state and increments + * refcounts on existing streams and planes. */ void dc_resource_state_copy_construct_current( const struct dc *dc, @@ -2593,15 +2607,241 @@ bool dc_resource_is_dsc_encoding_supported(const struct dc *dc) return dc->res_pool->res_cap->num_dsc > 0; } +static bool planes_changed_for_existing_stream(struct dc_state *context, + struct dc_stream_state *stream, + const struct dc_validation_set set[], + int set_count) +{ + int i, j; + struct dc_stream_status *stream_status = NULL; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i] == stream) { + stream_status = &context->stream_status[i]; + break; + } + } + + if (!stream_status) + ASSERT(0); + + for (i = 0; i < set_count; i++) + if (set[i].stream == stream) + break; + + if (i == set_count) + ASSERT(0); + + if (set[i].plane_count != stream_status->plane_count) + return true; + + for (j = 0; j < set[i].plane_count; j++) + if (set[i].plane_states[j] != stream_status->plane_states[j]) + return true; + + return false; +} /** - * dc_validate_global_state() - Determine if HW can support a given state - * Checks HW resource availability and bandwidth requirement. + * dc_validate_with_context - Validate and update the potential new stream in the context object + * + * @dc: Used to get the current state status + * @set: An array of dc_validation_set with all the current streams reference + * @set_count: Total of streams + * @context: New context + * @fast_validate: Enable or disable fast validation + * + * This function updates the potential new stream in the context object. It + * creates multiple lists for the add, remove, and unchanged streams. In + * particular, if the unchanged streams have a plane that changed, it is + * necessary to remove all planes from the unchanged streams. In summary, this + * function is responsible for validating the new context. + * + * Return: + * In case of success, return DC_OK (1), otherwise, return a DC error. + */ +enum dc_status dc_validate_with_context(struct dc *dc, + const struct dc_validation_set set[], + int set_count, + struct dc_state *context, + bool fast_validate) +{ + struct dc_stream_state *unchanged_streams[MAX_PIPES] = { 0 }; + struct dc_stream_state *del_streams[MAX_PIPES] = { 0 }; + struct dc_stream_state *add_streams[MAX_PIPES] = { 0 }; + int old_stream_count = context->stream_count; + enum dc_status res = DC_ERROR_UNEXPECTED; + int unchanged_streams_count = 0; + int del_streams_count = 0; + int add_streams_count = 0; + bool found = false; + int i, j, k; + + DC_LOGGER_INIT(dc->ctx->logger); + + /* First build a list of streams to be remove from current context */ + for (i = 0; i < old_stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + + for (j = 0; j < set_count; j++) { + if (stream == set[j].stream) { + found = true; + break; + } + } + + if (!found) + del_streams[del_streams_count++] = stream; + + found = false; + } + + /* Second, build a list of new streams */ + for (i = 0; i < set_count; i++) { + struct dc_stream_state *stream = set[i].stream; + + for (j = 0; j < old_stream_count; j++) { + if (stream == context->streams[j]) { + found = true; + break; + } + } + + if (!found) + add_streams[add_streams_count++] = stream; + + found = false; + } + + /* Build a list of unchanged streams which is necessary for handling + * planes change such as added, removed, and updated. + */ + for (i = 0; i < set_count; i++) { + /* Check if stream is part of the delete list */ + for (j = 0; j < del_streams_count; j++) { + if (set[i].stream == del_streams[j]) { + found = true; + break; + } + } + + if (!found) { + /* Check if stream is part of the add list */ + for (j = 0; j < add_streams_count; j++) { + if (set[i].stream == add_streams[j]) { + found = true; + break; + } + } + } + + if (!found) + unchanged_streams[unchanged_streams_count++] = set[i].stream; + + found = false; + } + + /* Remove all planes for unchanged streams if planes changed */ + for (i = 0; i < unchanged_streams_count; i++) { + if (planes_changed_for_existing_stream(context, + unchanged_streams[i], + set, + set_count)) { + if (!dc_rem_all_planes_for_stream(dc, + unchanged_streams[i], + context)) { + res = DC_FAIL_DETACH_SURFACES; + goto fail; + } + } + } + + /* Remove all planes for removed streams and then remove the streams */ + for (i = 0; i < del_streams_count; i++) { + /* Need to cpy the dwb data from the old stream in order to efc to work */ + if (del_streams[i]->num_wb_info > 0) { + for (j = 0; j < add_streams_count; j++) { + if (del_streams[i]->sink == add_streams[j]->sink) { + add_streams[j]->num_wb_info = del_streams[i]->num_wb_info; + for (k = 0; k < del_streams[i]->num_wb_info; k++) + add_streams[j]->writeback_info[k] = del_streams[i]->writeback_info[k]; + } + } + } + + if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) { + res = DC_FAIL_DETACH_SURFACES; + goto fail; + } + + res = dc_remove_stream_from_ctx(dc, context, del_streams[i]); + if (res != DC_OK) + goto fail; + } + + /* Swap seamless boot stream to pipe 0 (if needed) to ensure pipe_ctx + * matches. This may change in the future if seamless_boot_stream can be + * multiple. + */ + for (i = 0; i < add_streams_count; i++) { + mark_seamless_boot_stream(dc, add_streams[i]); + if (add_streams[i]->apply_seamless_boot_optimization && i != 0) { + struct dc_stream_state *temp = add_streams[0]; + + add_streams[0] = add_streams[i]; + add_streams[i] = temp; + break; + } + } + + /* Add new streams and then add all planes for the new stream */ + for (i = 0; i < add_streams_count; i++) { + calculate_phy_pix_clks(add_streams[i]); + res = dc_add_stream_to_ctx(dc, context, add_streams[i]); + if (res != DC_OK) + goto fail; + + if (!add_all_planes_for_stream(dc, add_streams[i], set, set_count, context)) { + res = DC_FAIL_ATTACH_SURFACES; + goto fail; + } + } + + /* Add all planes for unchanged streams if planes changed */ + for (i = 0; i < unchanged_streams_count; i++) { + if (planes_changed_for_existing_stream(context, + unchanged_streams[i], + set, + set_count)) { + if (!add_all_planes_for_stream(dc, unchanged_streams[i], set, set_count, context)) { + res = DC_FAIL_ATTACH_SURFACES; + goto fail; + } + } + } + + res = dc_validate_global_state(dc, context, fast_validate); + +fail: + if (res != DC_OK) + DC_LOG_WARNING("%s:resource validation failed, dc_status:%d\n", + __func__, + res); + + return res; +} + +/** + * dc_validate_global_state() - Determine if hardware can support a given state + * * @dc: dc struct for this driver * @new_ctx: state to be validated * @fast_validate: set to true if only yes/no to support matters * - * Return: DC_OK if the result can be programmed. Otherwise, an error code. + * Checks hardware resource availability and bandwidth requirement. + * + * Return: + * DC_OK if the result can be programmed. Otherwise, an error code. */ enum dc_status dc_validate_global_state( struct dc *dc, @@ -2789,6 +3029,12 @@ static void set_avi_info_frame( hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } + if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && + stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { + hdmi_info.bits.EC0_EC2 = 0; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; + } + /* TODO: un-hardcode aspect ratio */ aspect = stream->timing.aspect_ratio; @@ -3734,4 +3980,4 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm( } return true; -}
\ No newline at end of file +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 38d71b5c1f2d..20e534f73513 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -332,9 +332,21 @@ bool dc_stream_set_cursor_attributes( dc = stream->ctx->dc; - if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) - if (stream->mall_stream_config.type == SUBVP_MAIN) + /* SubVP is not compatible with HW cursor larger than 64 x 64 x 4. + * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case: + * 1. For single display cases, if resolution is >= 5K and refresh rate < 120hz + * 2. For multi display cases, if resolution is >= 4K and refresh rate < 120hz + * + * [< 120hz is a requirement for SubVP configs] + */ + if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) { + if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 && + ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) return false; + else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 && + ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) + return false; + } stream->cursor_attributes = *attributes; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0598465fd1a1..85ebeaa2de18 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.207" +#define DC_VER "3.2.215" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -56,9 +56,7 @@ struct dmub_notification; #define MIN_VIEWPORT_SIZE 12 #define MAX_NUM_EDP 2 -/******************************************************************************* - * Display Core Interfaces - ******************************************************************************/ +/* Display Core Interfaces */ struct dc_versions { const char *dc_ver; struct dmcu_version dmcu_version; @@ -263,11 +261,13 @@ struct dc_caps { uint32_t cache_line_size; uint32_t cache_num_ways; uint16_t subvp_fw_processing_delay_us; + uint8_t subvp_drr_max_vblank_margin_us; uint16_t subvp_prefetch_end_to_mall_start_us; uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height uint16_t subvp_pstate_allow_width_us; uint16_t subvp_vertical_int_margin_us; bool seamless_odm; + uint8_t subvp_drr_vblank_start_margin_us; }; struct dc_bug_wa { @@ -395,6 +395,7 @@ struct dc_config { bool disable_dmcu; bool enable_4to1MPC; bool enable_windowed_mpo_odm; + bool forceHBR2CP2520; // Used for switching between test patterns TPS4 and CP2520 uint32_t allow_edp_hotplug_detection; bool clamp_min_dcfclk; uint64_t vblank_alignment_dto_params; @@ -408,6 +409,7 @@ struct dc_config { bool use_default_clock_table; bool force_bios_enable_lttpr; uint8_t force_bios_fixed_vs; + int sdpif_request_limit_words_per_umc; }; @@ -457,15 +459,15 @@ enum pipe_split_policy { MPC_SPLIT_DYNAMIC = 0, /** - * @MPC_SPLIT_DYNAMIC: Avoid pipe split, which means that DC will not + * @MPC_SPLIT_AVOID: Avoid pipe split, which means that DC will not * try any sort of split optimization. */ MPC_SPLIT_AVOID = 1, /** - * @MPC_SPLIT_DYNAMIC: With this option, DC will only try to optimize - * the pipe utilization when using a single display; if the user - * connects to a second display, DC will avoid pipe split. + * @MPC_SPLIT_AVOID_MULT_DISP: With this option, DC will only try to + * optimize the pipe utilization when using a single display; if the + * user connects to a second display, DC will avoid pipe split. */ MPC_SPLIT_AVOID_MULT_DISP = 2, }; @@ -491,12 +493,17 @@ enum dcn_pwr_state { enum dcn_zstate_support_state { DCN_ZSTATE_SUPPORT_UNKNOWN, DCN_ZSTATE_SUPPORT_ALLOW, + DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY, + DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY, DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY, DCN_ZSTATE_SUPPORT_DISALLOW, }; -/* - * For any clocks that may differ per pipe - * only the max is stored in this structure + +/** + * struct dc_clocks - DC pipe clocks + * + * For any clocks that may differ per pipe only the max is stored in this + * structure */ struct dc_clocks { int dispclk_khz; @@ -523,6 +530,16 @@ struct dc_clocks { bool prev_p_state_change_support; bool fclk_prev_p_state_change_support; int num_ways; + + /* + * @fw_based_mclk_switching + * + * DC has a mechanism that leverage the variable refresh rate to switch + * memory clock in cases that we have a large latency to achieve the + * memory clock change and a short vblank window. DC has some + * requirements to enable this feature, and this field describes if the + * system support or not such a feature. + */ bool fw_based_mclk_switching; bool fw_based_mclk_switching_shut_down; int prev_num_ways; @@ -764,7 +781,6 @@ struct dc_debug_options { bool disable_mem_low_power; bool pstate_enabled; bool disable_dmcu; - bool disable_psr; bool force_abm_enable; bool disable_stereo_support; bool vsr_support; @@ -828,6 +844,7 @@ struct dc_debug_options { int crb_alloc_policy_min_disp_count; bool disable_z10; bool enable_z9_disable_interface; + bool psr_skip_crtc_disable; union dpia_debug_options dpia_debug; bool disable_fixed_vs_aux_timeout_wa; bool force_disable_subvp; @@ -836,6 +853,7 @@ struct dc_debug_options { unsigned int force_subvp_num_ways; unsigned int force_mall_ss_num_ways; bool alloc_extra_way_for_cursor; + uint32_t subvp_extra_lines; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; @@ -990,9 +1008,7 @@ void dc_init_callbacks(struct dc *dc, void dc_deinit_callbacks(struct dc *dc); void dc_destroy(struct dc **dc); -/******************************************************************************* - * Surface Interfaces - ******************************************************************************/ +/* Surface Interfaces */ enum { TRANSFER_FUNC_POINTS = 1025 @@ -1271,12 +1287,23 @@ void dc_post_update_surfaces_to_stream( #include "dc_stream.h" -/* - * Structure to store surface/stream associations for validation +/** + * struct dc_validation_set - Struct to store surface/stream associations for validation */ struct dc_validation_set { + /** + * @stream: Stream state properties + */ struct dc_stream_state *stream; + + /** + * @plane_state: Surface state + */ struct dc_plane_state *plane_states[MAX_SURFACES]; + + /** + * @plane_count: Total of active planes + */ uint8_t plane_count; }; @@ -1288,6 +1315,12 @@ enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *pla void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info); +enum dc_status dc_validate_with_context(struct dc *dc, + const struct dc_validation_set set[], + int set_count, + struct dc_state *context, + bool fast_validate); + bool dc_set_generic_gpio_for_stereo(bool enable, struct gpio_service *gpio_service); @@ -1323,15 +1356,12 @@ void dc_resource_state_destruct(struct dc_state *context); bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); -/* - * TODO update to make it about validation sets - * Set up streams and links associated to drive sinks - * The streams parameter is an absolute set of all active streams. - * - * After this call: - * Phy, Encoder, Timing Generator are programmed and enabled. - * New streams are enabled with blank stream; no memory read. - */ +enum dc_status dc_commit_streams(struct dc *dc, + struct dc_stream_state *streams[], + uint8_t stream_count); + +/* TODO: When the transition to the new commit sequence is done, remove this + * function in favor of dc_commit_streams. */ bool dc_commit_state(struct dc *dc, struct dc_state *context); struct dc_state *dc_create_state(struct dc *dc); @@ -1339,9 +1369,7 @@ struct dc_state *dc_copy_state(struct dc_state *src_ctx); void dc_retain_state(struct dc_state *context); void dc_release_state(struct dc_state *context); -/******************************************************************************* - * Link Interfaces - ******************************************************************************/ +/* Link Interfaces */ struct dpcd_caps { union dpcd_rev dpcd_rev; @@ -1443,9 +1471,7 @@ struct hdcp_caps { uint32_t dc_get_opp_for_plane(struct dc *dc, struct dc_plane_state *plane); -/******************************************************************************* - * Sink Interfaces - A sink corresponds to a display output device - ******************************************************************************/ +/* Sink Interfaces - A sink corresponds to a display output device */ struct dc_container_id { // 128bit GUID in binary form @@ -1528,9 +1554,7 @@ struct dc_cursor { }; -/******************************************************************************* - * Interrupt interfaces - ******************************************************************************/ +/* Interrupt interfaces */ enum dc_irq_source dc_interrupt_to_irq_source( struct dc *dc, uint32_t src_id, @@ -1542,9 +1566,7 @@ enum dc_irq_source dc_get_hpd_irq_source_at_index( void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bool enable); -/******************************************************************************* - * Power Interfaces - ******************************************************************************/ +/* Power Interfaces */ void dc_set_power_state( struct dc *dc, @@ -1617,14 +1639,10 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, uint32_t hpd_int_enable); -/******************************************************************************* - * DSC Interfaces - ******************************************************************************/ +/* DSC Interfaces */ #include "dc_dsc.h" -/******************************************************************************* - * Disable acc mode Interfaces - ******************************************************************************/ +/* Disable acc mode Interfaces */ void dc_disable_accelerated_mode(struct dc *dc); #endif /* DC_INTERFACE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 0541e87e4f38..6ccf477d1c4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -423,25 +423,20 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi #ifdef CONFIG_DRM_AMD_DC_DCN /** - * *********************************************************************************************** - * populate_subvp_cmd_drr_info: Helper to populate DRR pipe info for the DMCUB subvp command + * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command * - * Populate the DMCUB SubVP command with DRR pipe info. All the information required for calculating - * the SubVP + DRR microschedule is populated here. + * @dc: [in] current dc state + * @subvp_pipe: [in] pipe_ctx for the SubVP pipe + * @vblank_pipe: [in] pipe_ctx for the DRR pipe + * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info + * + * Populate the DMCUB SubVP command with DRR pipe info. All the information + * required for calculating the SubVP + DRR microschedule is populated here. * * High level algorithm: * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe * 2. Calculate the min and max vtotal which supports SubVP + DRR microschedule * 3. Populate the drr_info with the min and max supported vtotal values - * - * @param [in] dc: current dc state - * @param [in] subvp_pipe: pipe_ctx for the SubVP pipe - * @param [in] vblank_pipe: pipe_ctx for the DRR pipe - * @param [in] pipe_data: Pipe data which stores the VBLANK/DRR info - * - * @return: void - * - * *********************************************************************************************** */ static void populate_subvp_cmd_drr_info(struct dc *dc, struct pipe_ctx *subvp_pipe, @@ -482,33 +477,38 @@ static void populate_subvp_cmd_drr_info(struct dc *dc, (((uint64_t)main_timing->pix_clk_100hz * 100))); drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000), (((uint64_t)drr_timing->pix_clk_100hz * 100))); - max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us; - max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us; + max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - + dc->caps.subvp_fw_processing_delay_us - drr_active_us), 2) + drr_active_us; + max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us - dc->caps.subvp_fw_processing_delay_us; max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us; max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us), (((uint64_t)drr_timing->h_total * 1000000))); + /* When calculating the max vtotal supported for SubVP + DRR cases, add + * margin due to possible rounding errors (being off by 1 line in the + * FW calculation can incorrectly push the P-State switch to wait 1 frame + * longer). + */ + max_vtotal_supported = max_vtotal_supported - dc->caps.subvp_drr_max_vblank_margin_us; + pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported; pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported; + pipe_data->pipe_config.vblank_data.drr_info.drr_vblank_start_margin = dc->caps.subvp_drr_vblank_start_margin_us; } /** - * *********************************************************************************************** - * populate_subvp_cmd_vblank_pipe_info: Helper to populate VBLANK pipe info for the DMUB subvp command - * - * Populate the DMCUB SubVP command with VBLANK pipe info. All the information required to calculate - * the microschedule for SubVP + VBLANK case is stored in the pipe_data (subvp_data and vblank_data). - * Also check if the VBLANK pipe is a DRR display -- if it is make a call to populate drr_info. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] cmd: DMUB cmd to be populated with SubVP info - * @param [in] vblank_pipe: pipe_ctx for the VBLANK pipe - * @param [in] cmd_pipe_index: index for the pipe array in DMCUB SubVP cmd + * populate_subvp_cmd_vblank_pipe_info - Helper to populate VBLANK pipe info for the DMUB subvp command * - * @return: void + * @dc: [in] current dc state + * @context: [in] new dc state + * @cmd: [in] DMUB cmd to be populated with SubVP info + * @vblank_pipe: [in] pipe_ctx for the VBLANK pipe + * @cmd_pipe_index: [in] index for the pipe array in DMCUB SubVP cmd * - * *********************************************************************************************** + * Populate the DMCUB SubVP command with VBLANK pipe info. All the information + * required to calculate the microschedule for SubVP + VBLANK case is stored in + * the pipe_data (subvp_data and vblank_data). Also check if the VBLANK pipe + * is a DRR display -- if it is make a call to populate drr_info. */ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc, struct dc_state *context, @@ -551,22 +551,18 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc, } /** - * *********************************************************************************************** - * update_subvp_prefetch_end_to_mall_start: Helper for SubVP + SubVP case + * update_subvp_prefetch_end_to_mall_start - Helper for SubVP + SubVP case * - * For SubVP + SubVP, we use a single vertical interrupt to start the microschedule for both - * SubVP pipes. In order for this to work correctly, the MALL REGION of both SubVP pipes must - * start at the same time. This function lengthens the prefetch end to mall start delay of the - * SubVP pipe that has the shorter prefetch so that both MALL REGION's will start at the same time. + * @dc: [in] current dc state + * @context: [in] new dc state + * @cmd: [in] DMUB cmd to be populated with SubVP info + * @subvp_pipes: [in] Array of SubVP pipes (should always be length 2) * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] cmd: DMUB cmd to be populated with SubVP info - * @param [in] subvp_pipes: Array of SubVP pipes (should always be length 2) - * - * @return: void - * - * *********************************************************************************************** + * For SubVP + SubVP, we use a single vertical interrupt to start the + * microschedule for both SubVP pipes. In order for this to work correctly, the + * MALL REGION of both SubVP pipes must start at the same time. This function + * lengthens the prefetch end to mall start delay of the SubVP pipe that has + * the shorter prefetch so that both MALL REGION's will start at the same time. */ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, struct dc_state *context, @@ -608,22 +604,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, } /** - * *************************************************************************************** - * setup_subvp_dmub_command: Helper to populate the SubVP pipe info for the DMUB subvp command - * - * Populate the DMCUB SubVP command with SubVP pipe info. All the information required to - * calculate the microschedule for the SubVP pipe is stored in the pipe_data of the DMCUB - * SubVP command. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] cmd: DMUB cmd to be populated with SubVP info - * @param [in] subvp_pipe: pipe_ctx for the SubVP pipe - * @param [in] cmd_pipe_index: index for the pipe array in DMCUB SubVP cmd + * populate_subvp_cmd_pipe_info - Helper to populate the SubVP pipe info for the DMUB subvp command * - * @return: void + * @dc: [in] current dc state + * @context: [in] new dc state + * @cmd: [in] DMUB cmd to be populated with SubVP info + * @subvp_pipe: [in] pipe_ctx for the SubVP pipe + * @cmd_pipe_index: [in] index for the pipe array in DMCUB SubVP cmd * - * *************************************************************************************** + * Populate the DMCUB SubVP command with SubVP pipe info. All the information + * required to calculate the microschedule for the SubVP pipe is stored in the + * pipe_data of the DMCUB SubVP command. */ static void populate_subvp_cmd_pipe_info(struct dc *dc, struct dc_state *context, @@ -703,19 +694,14 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, } /** - * *************************************************************************************** - * dc_dmub_setup_subvp_dmub_command: Populate the DMCUB SubVP command + * dc_dmub_setup_subvp_dmub_command - Populate the DMCUB SubVP command * - * This function loops through each pipe and populates the DMUB - * SubVP CMD info based on the pipe (e.g. SubVP, VBLANK). + * @dc: [in] current dc state + * @context: [in] new dc state + * @cmd: [in] DMUB cmd to be populated with SubVP info * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] cmd: DMUB cmd to be populated with SubVP info - * - * @return: void - * - * *************************************************************************************** + * This function loops through each pipe and populates the DMUB SubVP CMD info + * based on the pipe (e.g. SubVP, VBLANK). */ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, struct dc_state *context, @@ -882,11 +868,59 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) diag_data.is_cw6_enabled); } +static bool dc_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) +{ + struct pipe_ctx *test_pipe, *split_pipe; + const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data; + struct rect r1 = scl_data->recout, r2, r2_half; + int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b; + int cur_layer = pipe_ctx->plane_state->layer_index; + + /** + * Disable the cursor if there's another pipe above this with a + * plane that contains this pipe's viewport to prevent double cursor + * and incorrect scaling artifacts. + */ + for (test_pipe = pipe_ctx->top_pipe; test_pipe; + test_pipe = test_pipe->top_pipe) { + // Skip invisible layer and pipe-split plane on same layer + if (!test_pipe->plane_state->visible || test_pipe->plane_state->layer_index == cur_layer) + continue; + + r2 = test_pipe->plane_res.scl_data.recout; + r2_r = r2.x + r2.width; + r2_b = r2.y + r2.height; + split_pipe = test_pipe; + + /** + * There is another half plane on same layer because of + * pipe-split, merge together per same height. + */ + for (split_pipe = pipe_ctx->top_pipe; split_pipe; + split_pipe = split_pipe->top_pipe) + if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) { + r2_half = split_pipe->plane_res.scl_data.recout; + r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x; + r2.width = r2.width + r2_half.width; + r2_r = r2.x + r2.width; + break; + } + + if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b) + return true; + } + + return false; +} + static bool dc_dmub_should_update_cursor_data(struct pipe_ctx *pipe_ctx) { if (pipe_ctx->plane_state != NULL) { if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) return false; + + if (dc_can_pipe_disable_cursor(pipe_ctx)) + return false; } if ((pipe_ctx->stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || @@ -962,19 +996,14 @@ static void dc_build_cursor_attribute_update_payload1( } /** - * *************************************************************************************** - * dc_send_update_cursor_info_to_dmu: Populate the DMCUB Cursor update info command - * - * This function would store the cursor related information and pass it into dmub + * dc_send_update_cursor_info_to_dmu - Populate the DMCUB Cursor update info command * - * @param [in] pCtx: pipe context - * @param [in] pipe_idx: pipe index + * @pCtx: [in] pipe context + * @pipe_idx: [in] pipe index * - * @return: void - * - * *************************************************************************************** + * This function would store the cursor related information and pass it into + * dmub */ - void dc_send_update_cursor_info_to_dmu( struct pipe_ctx *pCtx, uint8_t pipe_idx) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index caf0c7af2d0b..2e18bcf6b11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -117,7 +117,7 @@ struct psr_settings { * Add a struct dc_panel_config under dc_link */ struct dc_panel_config { - // extra panel power sequence parameters + /* extra panel power sequence parameters */ struct pps { unsigned int extra_t3_ms; unsigned int extra_t7_ms; @@ -127,13 +127,21 @@ struct dc_panel_config { unsigned int extra_t12_ms; unsigned int extra_post_OUI_ms; } pps; - // ABM + /* PSR */ + struct psr { + bool disable_psr; + bool disallow_psrsu; + bool rc_disable; + bool rc_allow_static_screen; + bool rc_allow_fullscreen_VPB; + } psr; + /* ABM */ struct varib { unsigned int varibright_feature_enable; unsigned int def_varibright_level; unsigned int abm_config_setting; } varib; - // edp DSC + /* edp DSC */ struct dsc { bool disable_dsc_edp; unsigned int force_dsc_edp_policy; @@ -143,6 +151,20 @@ struct dc_panel_config { bool optimize_edp_link_rate; /* eDP ILR */ } ilr; }; + +/* + * USB4 DPIA BW ALLOCATION STRUCTS + */ +struct dc_dpia_bw_alloc { + int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already + int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated + int padding_bw; // The Padding "Un-used" BW allocated by CM for padding reasons + int sink_max_bw; // The Max BW that sink can require/support + int estimated_bw; // The estimated available BW for this DPIA + int bw_granularity; // BW Granularity + bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM +}; + /* * A link contains one or more sinks and their connected status. * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. @@ -158,6 +180,14 @@ struct dc_link { enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ bool is_hpd_filter_disabled; bool dp_ss_off; + + /** + * @link_state_valid: + * + * If there is no link and local sink, this variable should be set to + * false. Otherwise, it should be set to true; usually, the function + * core_link_enable_stream sets this field to true. + */ bool link_state_valid; bool aux_access_disabled; bool sync_lt_in_progress; @@ -168,6 +198,7 @@ struct dc_link { bool is_dig_mapping_flexible; bool hpd_status; /* HPD status of link without physical HPD pin. */ bool is_hpd_pending; /* Indicates a new received hpd */ + bool is_automated; /* Indicates automated testing */ bool edp_sink_present; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 9e6025c98db9..dfd3df1d2f7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -41,6 +41,10 @@ struct timing_sync_info { struct dc_stream_status { int primary_otg_inst; int stream_enc_inst; + + /** + * @plane_count: Total of planes attached to a single stream + */ int plane_count; int audio_inst; struct timing_sync_info timing_sync_info; @@ -140,7 +144,7 @@ struct test_pattern { unsigned int cust_pattern_size; }; -#define SUBVP_DRR_MARGIN_US 500 // 500us for DRR margin (SubVP + DRR) +#define SUBVP_DRR_MARGIN_US 600 // 600us for DRR margin (SubVP + DRR) enum mall_stream_type { SUBVP_NONE, // subvp not in use @@ -156,6 +160,17 @@ struct mall_stream_config { struct dc_stream_state *paired_stream; // master / slave stream }; +/* Temp struct used to save and restore MALL config + * during validation. + * + * TODO: Move MALL config into dc_state instead of stream struct + * to avoid needing to save/restore. + */ +struct mall_temp_config { + struct mall_stream_config mall_stream_config[MAX_PIPES]; + bool is_phantom_plane[MAX_PIPES]; +}; + struct dc_stream_state { // sink is deprecated, new code should not reference // this pointer @@ -197,7 +212,18 @@ struct dc_stream_state { bool use_vsc_sdp_for_colorimetry; bool ignore_msa_timing_param; + /** + * @allow_freesync: + * + * It say if Freesync is enabled or not. + */ bool allow_freesync; + + /** + * @vrr_active_variable: + * + * It describes if VRR is in use. + */ bool vrr_active_variable; bool freesync_on_desktop; @@ -517,10 +543,10 @@ bool dc_stream_get_crtc_position(struct dc *dc, unsigned int *nom_v_pos); #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) -bool dc_stream_forward_dmcu_crc_window(struct dc *dc, struct dc_stream_state *stream, - struct crc_params *crc_window); -bool dc_stream_stop_dmcu_crc_win_update(struct dc *dc, - struct dc_stream_state *stream); +bool dc_stream_forward_crc_window(struct dc *dc, + struct rect *rect, + struct dc_stream_state *stream, + bool is_stop); #endif bool dc_stream_configure_crc(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h b/drivers/gpu/drm/amd/display/dc/dc_trace.h index c711797e5c9e..bbec308a3a5e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_trace.h +++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h @@ -40,3 +40,5 @@ #define TRACE_DCN_FPU(begin, function, line, ref_count) \ trace_dcn_fpu(begin, function, line, ref_count) +#define TRACE_OPTC_LOCK_UNLOCK_STATE(optc, inst, lock) \ + trace_dcn_optc_lock_unlock_state(optc, inst, lock, __func__, __LINE__) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index ad9041472cca..dc78e2404b48 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -993,4 +993,11 @@ struct display_endpoint_id { enum display_endpoint_type ep_type; }; +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) +struct otg_phy_mux { + uint8_t phy_output_num; + uint8_t otg_output_num; +}; +#endif + #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index fbb19e253f50..d3cc5ec46956 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -927,19 +927,20 @@ static bool dcn10_recv_edid_cea_ack(struct dmcu *dmcu, int *offset) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) static void dcn10_forward_crc_window(struct dmcu *dmcu, - struct crc_region *crc_win, + struct rect *rect, struct otg_phy_mux *mux_mapping) { struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); unsigned int dmcu_max_retry_on_wait_reg_ready = 801; unsigned int dmcu_wait_reg_ready_interval = 100; unsigned int crc_start = 0, crc_end = 0, otg_phy_mux = 0; + int x_start, y_start, x_end, y_end; /* If microcontroller is not running, do nothing */ if (dmcu->dmcu_state != DMCU_RUNNING) return; - if (!crc_win) + if (!rect) return; /* waitDMCUReadyForCmd */ @@ -947,9 +948,14 @@ static void dcn10_forward_crc_window(struct dmcu *dmcu, dmcu_wait_reg_ready_interval, dmcu_max_retry_on_wait_reg_ready); + x_start = rect->x; + y_start = rect->y; + x_end = x_start + rect->width; + y_end = y_start + rect->height; + /* build up nitification data */ - crc_start = (((unsigned int) crc_win->x_start) << 16) | crc_win->y_start; - crc_end = (((unsigned int) crc_win->x_end) << 16) | crc_win->y_end; + crc_start = (((unsigned int) x_start) << 16) | y_start; + crc_end = (((unsigned int) x_end) << 16) | y_end; otg_phy_mux = (((unsigned int) mux_mapping->otg_output_num) << 16) | mux_mapping->phy_output_num; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index bec5e9f787fc..2d3201b77d6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -399,7 +399,11 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, link->psr_settings.force_ffu_mode = 0; copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode; - if (link->fec_state == dc_link_fec_enabled && + if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE && + !link->dc->debug.disable_fec) && + (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT && + !link->panel_config.dsc.disable_dsc_edp && + link->dc->caps.edp_dsc_support)) && link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 && (!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1, sizeof(DP_SINK_DEVICE_STR_ID_1)) || @@ -409,6 +413,11 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, else copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 0; + //WA for PSR1 on specific TCON, require frame delay for frame re-lock + copy_settings_data->relock_delay_frame_cnt = 0; + if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8) + copy_settings_data->relock_delay_frame_cnt = 2; + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index d260eaa1509e..913a1fe6b3da 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -688,16 +688,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) early_control = lane_count; tg->funcs->set_early_control(tg, early_control); - - /* enable audio only within mode set */ - if (pipe_ctx->stream_res.audio != NULL) { - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc); - } - - - - } static enum bp_result link_transmitter_control( @@ -1081,12 +1071,14 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) struct dc *dc; struct clk_mgr *clk_mgr; unsigned int i, num_audio = 1; + const struct link_hwss *link_hwss; if (!pipe_ctx->stream) return; dc = pipe_ctx->stream->ctx->dc; clk_mgr = dc->clk_mgr; + link_hwss = get_link_hwss(pipe_ctx->stream->link, &pipe_ctx->link_res); if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true) return; @@ -1103,56 +1095,35 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) if (num_audio >= 1 && clk_mgr->funcs->enable_pme_wa) /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ clk_mgr->funcs->enable_pme_wa(clk_mgr); - /* un-mute audio */ - /* TODO: audio should be per stream rather than per link */ - if (is_dp_128b_132b_signal(pipe_ctx)) - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.hpo_dp_stream_enc, false); - else - pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, false); + + link_hwss->enable_audio_packet(pipe_ctx); + if (pipe_ctx->stream_res.audio) pipe_ctx->stream_res.audio->enabled = true; } - - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_ENABLE_AUDIO_STREAM); } void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx) { struct dc *dc; struct clk_mgr *clk_mgr; + const struct link_hwss *link_hwss; if (!pipe_ctx || !pipe_ctx->stream) return; dc = pipe_ctx->stream->ctx->dc; clk_mgr = dc->clk_mgr; + link_hwss = get_link_hwss(pipe_ctx->stream->link, &pipe_ctx->link_res); if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == false) return; - if (is_dp_128b_132b_signal(pipe_ctx)) - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.hpo_dp_stream_enc, true); - else - pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, true); + link_hwss->disable_audio_packet(pipe_ctx); + if (pipe_ctx->stream_res.audio) { pipe_ctx->stream_res.audio->enabled = false; - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - if (is_dp_128b_132b_signal(pipe_ctx)) - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_disable( - pipe_ctx->stream_res.hpo_dp_stream_enc); - else - pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable( - pipe_ctx->stream_res.stream_enc); - else - pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_disable( - pipe_ctx->stream_res.stream_enc); - if (clk_mgr->funcs->enable_pme_wa) /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ clk_mgr->funcs->enable_pme_wa(clk_mgr); @@ -1163,9 +1134,6 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx) * stream->stream_engine_id); */ } - - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_DISABLE_AUDIO_STREAM); } void dce110_disable_stream(struct pipe_ctx *pipe_ctx) @@ -1487,6 +1455,9 @@ static enum dc_status apply_single_controller_ctx_to_hw( unsigned int event_triggers = 0; struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe; struct dce_hwseq *hws = dc->hwseq; + const struct link_hwss *link_hwss = get_link_hwss( + link, &pipe_ctx->link_res); + if (hws->funcs.disable_stream_gating) { hws->funcs.disable_stream_gating(dc, pipe_ctx); @@ -1497,23 +1468,8 @@ static enum dc_status apply_single_controller_ctx_to_hw( build_audio_output(context, pipe_ctx, &audio_output); - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - if (is_dp_128b_132b_signal(pipe_ctx)) - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_setup( - pipe_ctx->stream_res.hpo_dp_stream_enc, - pipe_ctx->stream_res.audio->inst, - &pipe_ctx->stream->audio_info); - else - pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup( - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.audio->inst, - &pipe_ctx->stream->audio_info); - else - pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup( - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.audio->inst, - &pipe_ctx->stream->audio_info, - &audio_output.crtc_info); + link_hwss->setup_audio_output(pipe_ctx, &audio_output, + pipe_ctx->stream_res.audio->inst); pipe_ctx->stream_res.audio->funcs->az_configure( pipe_ctx->stream_res.audio, @@ -1605,8 +1561,13 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != NULL; - pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false; - + /* Phantom and main stream share the same link (because the stream + * is constructed with the same sink). Make sure not to override + * and link programming on the main. + */ + if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false; + } return DC_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c index fc6aa098bda0..8db9f7514466 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c @@ -1128,6 +1128,7 @@ struct resource_pool *dce60_create_resource_pool( if (dce60_construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } @@ -1325,6 +1326,7 @@ struct resource_pool *dce61_create_resource_pool( if (dce61_construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } @@ -1518,6 +1520,7 @@ struct resource_pool *dce64_create_resource_pool( if (dce64_construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index b28025960050..5825e6f412bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -1137,6 +1137,7 @@ struct resource_pool *dce80_create_resource_pool( if (dce80_construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } @@ -1336,6 +1337,7 @@ struct resource_pool *dce81_create_resource_pool( if (dce81_construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index e48fd044f572..ba1c0621f0f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -171,6 +171,7 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B; uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C; uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D; + uint32_t SDPIF_REQUEST_RATE_LIMIT; }; #define HUBBUB_REG_FIELD_LIST_DCN32(type) \ @@ -360,7 +361,8 @@ struct dcn_hubbub_registers { type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C;\ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C;\ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D;\ - type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D + type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D;\ + type SDPIF_REQUEST_RATE_LIMIT struct dcn_hubbub_shift { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 11e4c4e46947..c8ec11839b4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -97,10 +97,12 @@ void dcn10_lock_all_pipes(struct dc *dc, bool lock) { struct pipe_ctx *pipe_ctx; + struct pipe_ctx *old_pipe_ctx; struct timing_generator *tg; int i; for (i = 0; i < dc->res_pool->pipe_count; i++) { + old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; pipe_ctx = &context->res_ctx.pipe_ctx[i]; tg = pipe_ctx->stream_res.tg; @@ -110,7 +112,7 @@ void dcn10_lock_all_pipes(struct dc *dc, */ if (pipe_ctx->top_pipe || !pipe_ctx->stream || - !pipe_ctx->plane_state || + (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) || !tg->funcs->is_tg_enabled(tg)) continue; @@ -867,6 +869,32 @@ static void false_optc_underflow_wa( tg->funcs->clear_optc_underflow(tg); } +static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) +{ + struct pipe_ctx *other_pipe; + int vready_offset = pipe->pipe_dlg_param.vready_offset; + + /* Always use the largest vready_offset of all connected pipes */ + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + + return vready_offset; +} + enum dc_status dcn10_enable_stream_timing( struct pipe_ctx *pipe_ctx, struct dc_state *context, @@ -910,7 +938,7 @@ enum dc_status dcn10_enable_stream_timing( pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, &stream->timing, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, @@ -2188,6 +2216,12 @@ void dcn10_enable_vblanks_synchronization( opp = grouped_pipes[i]->stream_res.opp; tg = grouped_pipes[i]->stream_res.tg; tg->funcs->get_otg_active_size(tg, &width, &height); + + if (!tg->funcs->is_tg_enabled(tg)) { + DC_SYNC_INFO("Skipping timing sync on disabled OTG\n"); + return; + } + if (opp->funcs->opp_program_dpg_dimensions) opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1); } @@ -2250,6 +2284,12 @@ void dcn10_enable_timing_synchronization( opp = grouped_pipes[i]->stream_res.opp; tg = grouped_pipes[i]->stream_res.tg; tg->funcs->get_otg_active_size(tg, &width, &height); + + if (!tg->funcs->is_tg_enabled(tg)) { + DC_SYNC_INFO("Skipping timing sync on disabled OTG\n"); + return; + } + if (opp->funcs->opp_program_dpg_dimensions) opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1); } @@ -2900,7 +2940,7 @@ void dcn10_program_pipe( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 33d780218790..c9e53dc49c92 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -27,6 +27,7 @@ #include "reg_helper.h" #include "dcn10_optc.h" #include "dc.h" +#include "dc_trace.h" #define REG(reg)\ optc1->tg_regs->reg @@ -657,6 +658,8 @@ void optc1_lock(struct timing_generator *optc) REG_WAIT(OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, 1, 1, 10); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); } void optc1_unlock(struct timing_generator *optc) @@ -665,6 +668,8 @@ void optc1_unlock(struct timing_generator *optc) REG_SET(OTG_MASTER_UPDATE_LOCK, 0, OTG_MASTER_UPDATE_LOCK, 0); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, false); } void optc1_get_position(struct timing_generator *optc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 56d30baf12df..6bfac8088ab0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx) return value; } -/* - * Some architectures don't support soft-float (e.g. aarch64), on those - * this function has to be called with hardfloat enabled, make sure not - * to inline it so whatever fp stuff is done stays inside - */ -static noinline void dcn10_resource_construct_fp( - struct dc *dc) -{ - if (dc->ctx->dce_version == DCN_VERSION_1_01) { - struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; - struct dcn_ip_params *dcn_ip = dc->dcn_ip; - struct display_mode_lib *dml = &dc->dml; - - dml->ip.max_num_dpp = 3; - /* TODO how to handle 23.84? */ - dcn_soc->dram_clock_change_latency = 23; - dcn_ip->max_num_dpp = 3; - } - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { - dc->dcn_soc->urgent_latency = 3; - dc->debug.disable_dmcu = true; - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f; - } - - - dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width; - ASSERT(dc->dcn_soc->number_of_channels < 3); - if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/ - dc->dcn_soc->number_of_channels = 2; - - if (dc->dcn_soc->number_of_channels == 1) { - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f; - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f; - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f; - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f; - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f; - } - } -} - static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks) { int i; @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); - /* Other architectures we build for build this with soft-float */ + DC_FP_START(); dcn10_resource_construct_fp(dc); + DC_FP_END(); if (!dc->config.is_vmin_only_asic) if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h index a85ed228dfc2..a9dd9ae23ec9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h @@ -27,204 +27,177 @@ #define TO_DCN20_DWBC(dwbc_base) \ container_of(dwbc_base, struct dcn20_dwbc, base) -/* DCN */ -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define BASE(seg) \ - BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - - #define DWBC_COMMON_REG_LIST_DCN2_0(inst) \ - SRI2(WB_ENABLE, CNV, inst),\ - SRI2(WB_EC_CONFIG, CNV, inst),\ - SRI2(CNV_MODE, CNV, inst),\ - SRI2(CNV_WINDOW_START, CNV, inst),\ - SRI2(CNV_WINDOW_SIZE, CNV, inst),\ - SRI2(CNV_UPDATE, CNV, inst),\ - SRI2(CNV_SOURCE_SIZE, CNV, inst),\ - SRI2(CNV_TEST_CNTL, CNV, inst),\ - SRI2(CNV_TEST_CRC_RED, CNV, inst),\ - SRI2(CNV_TEST_CRC_GREEN, CNV, inst),\ - SRI2(CNV_TEST_CRC_BLUE, CNV, inst),\ - SRI2(WBSCL_COEF_RAM_SELECT, WBSCL, inst),\ - SRI2(WBSCL_COEF_RAM_TAP_DATA, WBSCL, inst),\ - SRI2(WBSCL_MODE, WBSCL, inst),\ - SRI2(WBSCL_TAP_CONTROL, WBSCL, inst),\ - SRI2(WBSCL_DEST_SIZE, WBSCL, inst),\ - SRI2(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL, inst),\ - SRI2(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL, inst),\ - SRI2(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL, inst),\ - SRI2(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL, inst),\ - SRI2(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL, inst),\ - SRI2(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL, inst),\ - SRI2(WBSCL_ROUND_OFFSET, WBSCL, inst),\ - SRI2(WBSCL_OVERFLOW_STATUS, WBSCL, inst),\ - SRI2(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL, inst),\ - SRI2(WBSCL_TEST_CNTL, WBSCL, inst),\ - SRI2(WBSCL_TEST_CRC_RED, WBSCL, inst),\ - SRI2(WBSCL_TEST_CRC_GREEN, WBSCL, inst),\ - SRI2(WBSCL_TEST_CRC_BLUE, WBSCL, inst),\ - SRI2(WBSCL_BACKPRESSURE_CNT_EN, WBSCL, inst),\ - SRI2(WB_MCIF_BACKPRESSURE_CNT, WBSCL, inst),\ - SRI2(WBSCL_CLAMP_Y_RGB, WBSCL, inst),\ - SRI2(WBSCL_CLAMP_CBCR, WBSCL, inst),\ - SRI2(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL, inst),\ - SRI2(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL, inst),\ - SRI2(WBSCL_DEBUG, WBSCL, inst),\ - SRI2(WBSCL_TEST_DEBUG_INDEX, WBSCL, inst),\ - SRI2(WBSCL_TEST_DEBUG_DATA, WBSCL, inst),\ - SRI2(WB_DEBUG_CTRL, CNV, inst),\ - SRI2(WB_DBG_MODE, CNV, inst),\ - SRI2(WB_HW_DEBUG, CNV, inst),\ - SRI2(CNV_TEST_DEBUG_INDEX, CNV, inst),\ - SRI2(CNV_TEST_DEBUG_DATA, CNV, inst),\ - SRI2(WB_SOFT_RESET, CNV, inst),\ - SRI2(WB_WARM_UP_MODE_CTL1, CNV, inst),\ - SRI2(WB_WARM_UP_MODE_CTL2, CNV, inst) + SRI2_DWB(WB_ENABLE, CNV, inst),\ + SRI2_DWB(WB_EC_CONFIG, CNV, inst),\ + SRI2_DWB(CNV_MODE, CNV, inst),\ + SRI2_DWB(CNV_WINDOW_START, CNV, inst),\ + SRI2_DWB(CNV_WINDOW_SIZE, CNV, inst),\ + SRI2_DWB(CNV_UPDATE, CNV, inst),\ + SRI2_DWB(CNV_SOURCE_SIZE, CNV, inst),\ + SRI2_DWB(CNV_TEST_CNTL, CNV, inst),\ + SRI2_DWB(CNV_TEST_CRC_RED, CNV, inst),\ + SRI2_DWB(CNV_TEST_CRC_GREEN, CNV, inst),\ + SRI2_DWB(CNV_TEST_CRC_BLUE, CNV, inst),\ + SRI2_DWB(WBSCL_COEF_RAM_SELECT, WBSCL, inst),\ + SRI2_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL, inst),\ + SRI2_DWB(WBSCL_MODE, WBSCL, inst),\ + SRI2_DWB(WBSCL_TAP_CONTROL, WBSCL, inst),\ + SRI2_DWB(WBSCL_DEST_SIZE, WBSCL, inst),\ + SRI2_DWB(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL, inst),\ + SRI2_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL, inst),\ + SRI2_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL, inst),\ + SRI2_DWB(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL, inst),\ + SRI2_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL, inst),\ + SRI2_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL, inst),\ + SRI2_DWB(WBSCL_ROUND_OFFSET, WBSCL, inst),\ + SRI2_DWB(WBSCL_OVERFLOW_STATUS, WBSCL, inst),\ + SRI2_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL, inst),\ + SRI2_DWB(WBSCL_TEST_CNTL, WBSCL, inst),\ + SRI2_DWB(WBSCL_TEST_CRC_RED, WBSCL, inst),\ + SRI2_DWB(WBSCL_TEST_CRC_GREEN, WBSCL, inst),\ + SRI2_DWB(WBSCL_TEST_CRC_BLUE, WBSCL, inst),\ + SRI2_DWB(WBSCL_BACKPRESSURE_CNT_EN, WBSCL, inst),\ + SRI2_DWB(WB_MCIF_BACKPRESSURE_CNT, WBSCL, inst),\ + SRI2_DWB(WBSCL_CLAMP_Y_RGB, WBSCL, inst),\ + SRI2_DWB(WBSCL_CLAMP_CBCR, WBSCL, inst),\ + SRI2_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL, inst),\ + SRI2_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL, inst),\ + SRI2_DWB(WBSCL_DEBUG, WBSCL, inst),\ + SRI2_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL, inst),\ + SRI2_DWB(WBSCL_TEST_DEBUG_DATA, WBSCL, inst),\ + SRI2_DWB(WB_DEBUG_CTRL, CNV, inst),\ + SRI2_DWB(WB_DBG_MODE, CNV, inst),\ + SRI2_DWB(WB_HW_DEBUG, CNV, inst),\ + SRI2_DWB(CNV_TEST_DEBUG_INDEX, CNV, inst),\ + SRI2_DWB(CNV_TEST_DEBUG_DATA, CNV, inst),\ + SRI2_DWB(WB_SOFT_RESET, CNV, inst),\ + SRI2_DWB(WB_WARM_UP_MODE_CTL1, CNV, inst),\ + SRI2_DWB(WB_WARM_UP_MODE_CTL2, CNV, inst) #define DWBC_COMMON_MASK_SH_LIST_DCN2_0(mask_sh) \ - SF(WB_ENABLE, WB_ENABLE, mask_sh),\ - SF(WB_EC_CONFIG, DISPCLK_R_WB_GATE_DIS, mask_sh),\ - SF(WB_EC_CONFIG, DISPCLK_G_WB_GATE_DIS, mask_sh),\ - SF(WB_EC_CONFIG, DISPCLK_G_WBSCL_GATE_DIS, mask_sh),\ - SF(WB_EC_CONFIG, WB_TEST_CLK_SEL, mask_sh),\ - SF(WB_EC_CONFIG, WB_LB_LS_DIS, mask_sh),\ - SF(WB_EC_CONFIG, WB_LB_SD_DIS, mask_sh),\ - SF(WB_EC_CONFIG, WB_LUT_LS_DIS, mask_sh),\ - SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_MODE_SEL, mask_sh),\ - SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_DIS, mask_sh),\ - SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_FORCE, mask_sh),\ - SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_STATE, mask_sh),\ - SF(WB_EC_CONFIG, WB_RAM_PW_SAVE_MODE, mask_sh),\ - SF(WB_EC_CONFIG, WBSCL_LUT_MEM_PWR_STATE, mask_sh),\ - SF(CNV_MODE, CNV_OUT_BPC, mask_sh),\ - SF(CNV_MODE, CNV_FRAME_CAPTURE_RATE, mask_sh),\ - SF(CNV_MODE, CNV_WINDOW_CROP_EN, mask_sh),\ - SF(CNV_MODE, CNV_STEREO_TYPE, mask_sh),\ - SF(CNV_MODE, CNV_INTERLACED_MODE, mask_sh),\ - SF(CNV_MODE, CNV_EYE_SELECTION, mask_sh),\ - SF(CNV_MODE, CNV_STEREO_POLARITY, mask_sh),\ - SF(CNV_MODE, CNV_INTERLACED_FIELD_ORDER, mask_sh),\ - SF(CNV_MODE, CNV_STEREO_SPLIT, mask_sh),\ - SF(CNV_MODE, CNV_NEW_CONTENT, mask_sh),\ - SF(CNV_MODE, CNV_FRAME_CAPTURE_EN_CURRENT, mask_sh),\ - SF(CNV_MODE, CNV_FRAME_CAPTURE_EN, mask_sh),\ - SF(CNV_WINDOW_START, CNV_WINDOW_START_X, mask_sh),\ - SF(CNV_WINDOW_START, CNV_WINDOW_START_Y, mask_sh),\ - SF(CNV_WINDOW_SIZE, CNV_WINDOW_WIDTH, mask_sh),\ - SF(CNV_WINDOW_SIZE, CNV_WINDOW_HEIGHT, mask_sh),\ - SF(CNV_UPDATE, CNV_UPDATE_PENDING, mask_sh),\ - SF(CNV_UPDATE, CNV_UPDATE_TAKEN, mask_sh),\ - SF(CNV_UPDATE, CNV_UPDATE_LOCK, mask_sh),\ - SF(CNV_SOURCE_SIZE, CNV_SOURCE_WIDTH, mask_sh),\ - SF(CNV_SOURCE_SIZE, CNV_SOURCE_HEIGHT, mask_sh),\ - SF(CNV_TEST_CNTL, CNV_TEST_CRC_EN, mask_sh),\ - SF(CNV_TEST_CNTL, CNV_TEST_CRC_CONT_EN, mask_sh),\ - SF(CNV_TEST_CRC_RED, CNV_TEST_CRC_RED_MASK, mask_sh),\ - SF(CNV_TEST_CRC_RED, CNV_TEST_CRC_SIG_RED, mask_sh),\ - SF(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_GREEN_MASK, mask_sh),\ - SF(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_SIG_GREEN, mask_sh),\ - SF(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_BLUE_MASK, mask_sh),\ - SF(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_SIG_BLUE, mask_sh),\ - SF(WB_DEBUG_CTRL, WB_DEBUG_EN, mask_sh),\ - SF(WB_DEBUG_CTRL, WB_DEBUG_SEL, mask_sh),\ - SF(WB_DBG_MODE, WB_DBG_MODE_EN, mask_sh),\ - SF(WB_DBG_MODE, WB_DBG_DIN_FMT, mask_sh),\ - SF(WB_DBG_MODE, WB_DBG_36MODE, mask_sh),\ - SF(WB_DBG_MODE, WB_DBG_CMAP, mask_sh),\ - SF(WB_DBG_MODE, WB_DBG_PXLRATE_ERROR, mask_sh),\ - SF(WB_DBG_MODE, WB_DBG_SOURCE_WIDTH, mask_sh),\ - SF(WB_HW_DEBUG, WB_HW_DEBUG, mask_sh),\ - SF(WB_SOFT_RESET, WB_SOFT_RESET, mask_sh),\ - SF(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_INDEX, mask_sh),\ - SF(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_WRITE_EN, mask_sh),\ - SF(CNV_TEST_DEBUG_DATA, CNV_TEST_DEBUG_DATA, mask_sh),\ - SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\ - SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_PHASE, mask_sh),\ - SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_FILTER_TYPE, mask_sh),\ - SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\ - SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\ - SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\ - SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\ - SF(WBSCL_MODE, WBSCL_MODE, mask_sh),\ - SF(WBSCL_MODE, WBSCL_OUT_BIT_DEPTH, mask_sh),\ - SF(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_Y_RGB, mask_sh),\ - SF(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_CBCR, mask_sh),\ - SF(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_Y_RGB, mask_sh),\ - SF(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_CBCR, mask_sh),\ - SF(WBSCL_DEST_SIZE, WBSCL_DEST_HEIGHT, mask_sh),\ - SF(WBSCL_DEST_SIZE, WBSCL_DEST_WIDTH, mask_sh),\ - SF(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, mask_sh),\ - SF(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_FRAC_Y_RGB, mask_sh),\ - SF(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_INT_Y_RGB, mask_sh),\ - SF(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_FRAC_CBCR, mask_sh),\ - SF(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_INT_CBCR, mask_sh),\ - SF(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, mask_sh),\ - SF(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_FRAC_Y_RGB, mask_sh),\ - SF(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_INT_Y_RGB, mask_sh),\ - SF(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_FRAC_CBCR, mask_sh),\ - SF(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_INT_CBCR, mask_sh),\ - SF(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_Y_RGB, mask_sh),\ - SF(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_CBCR, mask_sh),\ - SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_FLAG, mask_sh),\ - SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_ACK, mask_sh),\ - SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_MASK, mask_sh),\ - SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_STATUS, mask_sh),\ - SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_TYPE, mask_sh),\ - SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_FLAG, mask_sh),\ - SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_ACK, mask_sh),\ - SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_MASK, mask_sh),\ - SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_STATUS, mask_sh),\ - SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_TYPE, mask_sh),\ - SF(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_EN, mask_sh),\ - SF(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_CONT_EN, mask_sh),\ - SF(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_RED_MASK, mask_sh),\ - SF(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_SIG_RED, mask_sh),\ - SF(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_GREEN_MASK, mask_sh),\ - SF(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_SIG_GREEN, mask_sh),\ - SF(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_BLUE_MASK, mask_sh),\ - SF(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_SIG_BLUE, mask_sh),\ - SF(WBSCL_BACKPRESSURE_CNT_EN, WBSCL_BACKPRESSURE_CNT_EN, mask_sh),\ - SF(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_Y_MAX_BACKPRESSURE, mask_sh),\ - SF(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_C_MAX_BACKPRESSURE, mask_sh),\ - SF(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_UPPER_Y_RGB, mask_sh),\ - SF(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_LOWER_Y_RGB, mask_sh),\ - SF(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_UPPER_CBCR, mask_sh),\ - SF(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_LOWER_CBCR, mask_sh),\ - SF(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_OUTSIDE_PIX_STRATEGY, mask_sh),\ - SF(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_BLACK_COLOR_G_Y, mask_sh),\ - SF(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_B_CB, mask_sh),\ - SF(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_R_CR, mask_sh),\ - SF(WBSCL_DEBUG, WBSCL_DEBUG, mask_sh),\ - SF(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_INDEX, mask_sh),\ - SF(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_WRITE_EN, mask_sh),\ - SF(WBSCL_TEST_DEBUG_DATA, WBSCL_TEST_DEBUG_DATA, mask_sh),\ - SF(WB_WARM_UP_MODE_CTL1, WIDTH_WARMUP, mask_sh),\ - SF(WB_WARM_UP_MODE_CTL1, HEIGHT_WARMUP, mask_sh),\ - SF(WB_WARM_UP_MODE_CTL1, GMC_WARM_UP_ENABLE, mask_sh),\ - SF(WB_WARM_UP_MODE_CTL2, DATA_VALUE_WARMUP, mask_sh),\ - SF(WB_WARM_UP_MODE_CTL2, MODE_WARMUP, mask_sh),\ - SF(WB_WARM_UP_MODE_CTL2, DATA_DEPTH_WARMUP, mask_sh) + SF_DWB(WB_ENABLE, WB_ENABLE, mask_sh),\ + SF_DWB(WB_EC_CONFIG, DISPCLK_R_WB_GATE_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, DISPCLK_G_WB_GATE_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, DISPCLK_G_WBSCL_GATE_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WB_TEST_CLK_SEL, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WB_LB_LS_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WB_LB_SD_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WB_LUT_LS_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_MODE_SEL, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_DIS, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_FORCE, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_STATE, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WB_RAM_PW_SAVE_MODE, mask_sh),\ + SF_DWB(WB_EC_CONFIG, WBSCL_LUT_MEM_PWR_STATE, mask_sh),\ + SF_DWB(CNV_MODE, CNV_OUT_BPC, mask_sh),\ + SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_RATE, mask_sh),\ + SF_DWB(CNV_MODE, CNV_WINDOW_CROP_EN, mask_sh),\ + SF_DWB(CNV_MODE, CNV_STEREO_TYPE, mask_sh),\ + SF_DWB(CNV_MODE, CNV_INTERLACED_MODE, mask_sh),\ + SF_DWB(CNV_MODE, CNV_EYE_SELECTION, mask_sh),\ + SF_DWB(CNV_MODE, CNV_STEREO_POLARITY, mask_sh),\ + SF_DWB(CNV_MODE, CNV_INTERLACED_FIELD_ORDER, mask_sh),\ + SF_DWB(CNV_MODE, CNV_STEREO_SPLIT, mask_sh),\ + SF_DWB(CNV_MODE, CNV_NEW_CONTENT, mask_sh),\ + SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_EN_CURRENT, mask_sh),\ + SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_EN, mask_sh),\ + SF_DWB(CNV_WINDOW_START, CNV_WINDOW_START_X, mask_sh),\ + SF_DWB(CNV_WINDOW_START, CNV_WINDOW_START_Y, mask_sh),\ + SF_DWB(CNV_WINDOW_SIZE, CNV_WINDOW_WIDTH, mask_sh),\ + SF_DWB(CNV_WINDOW_SIZE, CNV_WINDOW_HEIGHT, mask_sh),\ + SF_DWB(CNV_UPDATE, CNV_UPDATE_PENDING, mask_sh),\ + SF_DWB(CNV_UPDATE, CNV_UPDATE_TAKEN, mask_sh),\ + SF_DWB(CNV_UPDATE, CNV_UPDATE_LOCK, mask_sh),\ + SF_DWB(CNV_SOURCE_SIZE, CNV_SOURCE_WIDTH, mask_sh),\ + SF_DWB(CNV_SOURCE_SIZE, CNV_SOURCE_HEIGHT, mask_sh),\ + SF_DWB(CNV_TEST_CNTL, CNV_TEST_CRC_EN, mask_sh),\ + SF_DWB(CNV_TEST_CNTL, CNV_TEST_CRC_CONT_EN, mask_sh),\ + SF_DWB(CNV_TEST_CRC_RED, CNV_TEST_CRC_RED_MASK, mask_sh),\ + SF_DWB(CNV_TEST_CRC_RED, CNV_TEST_CRC_SIG_RED, mask_sh),\ + SF_DWB(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_GREEN_MASK, mask_sh),\ + SF_DWB(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_SIG_GREEN, mask_sh),\ + SF_DWB(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_BLUE_MASK, mask_sh),\ + SF_DWB(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_SIG_BLUE, mask_sh),\ + SF_DWB(WB_DEBUG_CTRL, WB_DEBUG_EN, mask_sh),\ + SF_DWB(WB_DEBUG_CTRL, WB_DEBUG_SEL, mask_sh),\ + SF_DWB(WB_DBG_MODE, WB_DBG_MODE_EN, mask_sh),\ + SF_DWB(WB_DBG_MODE, WB_DBG_DIN_FMT, mask_sh),\ + SF_DWB(WB_DBG_MODE, WB_DBG_36MODE, mask_sh),\ + SF_DWB(WB_DBG_MODE, WB_DBG_CMAP, mask_sh),\ + SF_DWB(WB_DBG_MODE, WB_DBG_PXLRATE_ERROR, mask_sh),\ + SF_DWB(WB_DBG_MODE, WB_DBG_SOURCE_WIDTH, mask_sh),\ + SF_DWB(WB_HW_DEBUG, WB_HW_DEBUG, mask_sh),\ + SF_DWB(WB_SOFT_RESET, WB_SOFT_RESET, mask_sh),\ + SF_DWB(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_INDEX, mask_sh),\ + SF_DWB(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_WRITE_EN, mask_sh),\ + SF_DWB(CNV_TEST_DEBUG_DATA, CNV_TEST_DEBUG_DATA, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_PHASE, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_FILTER_TYPE, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\ + SF_DWB(WBSCL_MODE, WBSCL_MODE, mask_sh),\ + SF_DWB(WBSCL_MODE, WBSCL_OUT_BIT_DEPTH, mask_sh),\ + SF_DWB(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_CBCR, mask_sh),\ + SF_DWB(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_CBCR, mask_sh),\ + SF_DWB(WBSCL_DEST_SIZE, WBSCL_DEST_HEIGHT, mask_sh),\ + SF_DWB(WBSCL_DEST_SIZE, WBSCL_DEST_WIDTH, mask_sh),\ + SF_DWB(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, mask_sh),\ + SF_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_FRAC_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_INT_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_FRAC_CBCR, mask_sh),\ + SF_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_INT_CBCR, mask_sh),\ + SF_DWB(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, mask_sh),\ + SF_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_FRAC_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_INT_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_FRAC_CBCR, mask_sh),\ + SF_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_INT_CBCR, mask_sh),\ + SF_DWB(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_CBCR, mask_sh),\ + SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_FLAG, mask_sh),\ + SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_ACK, mask_sh),\ + SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_MASK, mask_sh),\ + SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_STATUS, mask_sh),\ + SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_TYPE, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_FLAG, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_ACK, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_MASK, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_STATUS, mask_sh),\ + SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_TYPE, mask_sh),\ + SF_DWB(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_EN, mask_sh),\ + SF_DWB(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_CONT_EN, mask_sh),\ + SF_DWB(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_RED_MASK, mask_sh),\ + SF_DWB(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_SIG_RED, mask_sh),\ + SF_DWB(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_GREEN_MASK, mask_sh),\ + SF_DWB(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_SIG_GREEN, mask_sh),\ + SF_DWB(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_BLUE_MASK, mask_sh),\ + SF_DWB(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_SIG_BLUE, mask_sh),\ + SF_DWB(WBSCL_BACKPRESSURE_CNT_EN, WBSCL_BACKPRESSURE_CNT_EN, mask_sh),\ + SF_DWB(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_Y_MAX_BACKPRESSURE, mask_sh),\ + SF_DWB(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_C_MAX_BACKPRESSURE, mask_sh),\ + SF_DWB(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_UPPER_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_LOWER_Y_RGB, mask_sh),\ + SF_DWB(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_UPPER_CBCR, mask_sh),\ + SF_DWB(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_LOWER_CBCR, mask_sh),\ + SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_OUTSIDE_PIX_STRATEGY, mask_sh),\ + SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_BLACK_COLOR_G_Y, mask_sh),\ + SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_B_CB, mask_sh),\ + SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_R_CR, mask_sh),\ + SF_DWB(WBSCL_DEBUG, WBSCL_DEBUG, mask_sh),\ + SF_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_INDEX, mask_sh),\ + SF_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_WRITE_EN, mask_sh),\ + SF_DWB(WBSCL_TEST_DEBUG_DATA, WBSCL_TEST_DEBUG_DATA, mask_sh),\ + SF_DWB(WB_WARM_UP_MODE_CTL1, WIDTH_WARMUP, mask_sh),\ + SF_DWB(WB_WARM_UP_MODE_CTL1, HEIGHT_WARMUP, mask_sh),\ + SF_DWB(WB_WARM_UP_MODE_CTL1, GMC_WARM_UP_ENABLE, mask_sh),\ + SF_DWB(WB_WARM_UP_MODE_CTL2, DATA_VALUE_WARMUP, mask_sh),\ + SF_DWB(WB_WARM_UP_MODE_CTL2, MODE_WARMUP, mask_sh),\ + SF_DWB(WB_WARM_UP_MODE_CTL2, DATA_DEPTH_WARMUP, mask_sh) #define DWBC_REG_FIELD_LIST_DCN2_0(type) \ type WB_ENABLE;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index a7e0001a8f46..6291a241158a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1079,6 +1079,29 @@ void dcn20_blank_pixel_data( 0); } + if (!blank && dc->debug.enable_single_display_2to1_odm_policy) { + /* when exiting dynamic ODM need to reinit DPG state for unused pipes */ + struct pipe_ctx *old_odm_pipe = dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx].next_odm_pipe; + + odm_pipe = pipe_ctx->next_odm_pipe; + + while (old_odm_pipe) { + if (!odm_pipe || old_odm_pipe->pipe_idx != odm_pipe->pipe_idx) + dc->hwss.set_disp_pattern_generator(dc, + old_odm_pipe, + CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, + CONTROLLER_DP_COLOR_SPACE_UDEFINED, + COLOR_DEPTH_888, + NULL, + 0, + 0, + 0); + old_odm_pipe = old_odm_pipe->next_odm_pipe; + if (odm_pipe) + odm_pipe = odm_pipe->next_odm_pipe; + } + } + if (!blank) if (stream_res->abm) { dc->hwss.set_pipe(pipe_ctx); @@ -1287,6 +1310,19 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx { new_pipe->update_flags.raw = 0; + /* If non-phantom pipe is being transitioned to a phantom pipe, + * set disable and return immediately. This is because the pipe + * that was previously in use must be fully disabled before we + * can "enable" it as a phantom pipe (since the OTG will certainly + * be different). The post_unlock sequence will set the correct + * update flags to enable the phantom pipe. + */ + if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom && + new_pipe->plane_state && new_pipe->plane_state->is_phantom) { + new_pipe->update_flags.bits.disable = 1; + return; + } + /* Exit on unchanged, unused pipe */ if (!old_pipe->plane_state && !new_pipe->plane_state) return; @@ -1616,6 +1652,31 @@ static void dcn20_update_dchubp_dpp( hubp->funcs->phantom_hubp_post_enable(hubp); } +static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) +{ + struct pipe_ctx *other_pipe; + int vready_offset = pipe->pipe_dlg_param.vready_offset; + + /* Always use the largest vready_offset of all connected pipes */ + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) + vready_offset = other_pipe->pipe_dlg_param.vready_offset; + } + + return vready_offset; +} static void dcn20_program_pipe( struct dc *dc, @@ -1634,16 +1695,14 @@ static void dcn20_program_pipe( && !pipe_ctx->prev_odm_pipe) { pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { - pipe_ctx->stream_res.tg->funcs->wait_for_state( - pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); - pipe_ctx->stream_res.tg->funcs->wait_for_state( - pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); } pipe_ctx->stream_res.tg->funcs->set_vtg_params( @@ -1682,7 +1741,10 @@ static void dcn20_program_pipe( * only do gamma programming for powering on, internal memcmp to avoid * updating on slave planes */ - if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf) + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf || + pipe_ctx->plane_state->update_flags.bits.output_tf_change) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1813,6 +1875,17 @@ void dcn20_program_front_end_for_ctx( context->stream_status[0].plane_count > 1) { pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp); } + + /* when dynamic ODM is active, pipes must be reconfigured when all planes are + * disabled, as some transitions will leave software and hardware state + * mismatched. + */ + if (dc->debug.enable_single_display_2to1_odm_policy && + pipe->stream && + pipe->update_flags.bits.disable && + !pipe->prev_odm_pipe && + hws->funcs.update_odm) + hws->funcs.update_odm(dc, context, pipe); } } @@ -1852,26 +1925,6 @@ void dcn20_post_unlock_program_front_end( for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - - /* If an active, non-phantom pipe is being transitioned into a phantom - * pipe, wait for the double buffer update to complete first before we do - * phantom pipe programming (HUBP_VTG_SEL updates right away so that can - * cause issues). - */ - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM && - old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { - old_pipe->stream_res.tg->funcs->wait_for_state( - old_pipe->stream_res.tg, - CRTC_STATE_VBLANK); - old_pipe->stream_res.tg->funcs->wait_for_state( - old_pipe->stream_res.tg, - CRTC_STATE_VACTIVE); - } - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->plane_state && !pipe->top_pipe) { /* Program phantom pipe here to prevent a frame of underflow in the MPO transition @@ -1881,6 +1934,11 @@ void dcn20_post_unlock_program_front_end( */ while (pipe) { if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + /* When turning on the phantom pipe we want to run through the + * entire enable sequence, so apply all the "enable" flags. + */ + if (dc->hwss.apply_update_flags_for_phantom) + dc->hwss.apply_update_flags_for_phantom(pipe); if (dc->hwss.update_phantom_vp_position) dc->hwss.update_phantom_vp_position(dc, context, pipe); dcn20_program_pipe(dc, pipe, context); @@ -1953,10 +2011,13 @@ void dcn20_prepare_bandwidth( /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_size) { - if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) + if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - else + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); } @@ -2037,7 +2098,7 @@ bool dcn20_update_bandwidth( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, + calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); @@ -2601,14 +2662,6 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) if (dc->hwseq->funcs.set_pixels_per_cycle) dc->hwseq->funcs.set_pixels_per_cycle(pipe_ctx); - - /* enable audio only within mode set */ - if (pipe_ctx->stream_res.audio != NULL) { - if (is_dp_128b_132b_signal(pipe_ctx)) - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.hpo_dp_stream_enc); - else if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc); - } } void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h index 7bcee5894d2e..5ab32aa51e13 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h @@ -29,13 +29,6 @@ #define TO_DCN20_MMHUBBUB(mcif_wb_base) \ container_of(mcif_wb_base, struct dcn20_mmhubbub, base) -/* DCN */ -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define BASE(seg) \ - BASE_INNER(seg) - #define MCIF_WB_COMMON_REG_LIST_DCN2_0(inst) \ SRI(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst),\ SRI(MCIF_WB_BUFMGR_CUR_LINE_R, MCIF_WB, inst),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 8224b9bf01d1..8a0dd0d7134b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -124,8 +124,6 @@ enum dcn20_clk_src_array_id { * macros to expend register list macro defined in HW object header file */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -138,6 +136,15 @@ enum dcn20_clk_src_array_id { .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name +#define SRI2_DWB(reg_name, block, id)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name +#define SF_DWB(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define SRIR(var_name, reg_name, block, id)\ .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name @@ -1454,6 +1461,22 @@ enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ return result; } +/** + * dcn20_split_stream_for_odm - Check if stream can be splited for ODM + * + * @dc: DC object with resource pool info required for pipe split + * @res_ctx: Persistent state of resources + * @prev_odm_pipe: Reference to the previous ODM pipe + * @next_odm_pipe: Reference to the next ODM pipe + * + * This function takes a logically active pipe and a logically free pipe and + * halves all the scaling parameters that need to be halved while populating + * the free pipe with the required resources and configuring the next/previous + * ODM pipe pointers. + * + * Return: + * Return true if split stream for ODM is possible, otherwise, return false. + */ bool dcn20_split_stream_for_odm( const struct dc *dc, struct resource_context *res_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h index f1ef46e8da5b..e7a1b7fa2cce 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h @@ -28,12 +28,6 @@ #include "vmid.h" -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define BASE(seg) \ - BASE_INNER(seg) - #define DCN20_VMID_REG_LIST(id)\ SRI(CNTL, DCN_VM_CONTEXT, id),\ SRI(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 887081472c0d..fbcf0afeae0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -94,8 +94,6 @@ * macros to expend register list macro defined in HW object header file */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -671,12 +669,15 @@ static const struct dc_debug_options debug_defaults_diags = { .disable_pplib_wm_range = true, .disable_stutter = true, .disable_48mhz_pwrdwn = true, - .disable_psr = true, .enable_tri_buf = true, .use_max_lb = true }; static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, .ilr = { .optimize_edp_link_rate = true, }, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h index 1010930cf071..fc00ec0a0881 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h @@ -27,21 +27,6 @@ #define TO_DCN30_DWBC(dwbc_base) \ container_of(dwbc_base, struct dcn30_dwbc, base) -/* DCN */ -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define BASE(seg) \ - BASE_INNER(seg) - -#define SF_DWB(reg_name, block, id, field_name, post_fix)\ - .field_name = block ## id ## _ ## reg_name ## __ ## field_name ## post_fix - - /* set field name */ -#define SF_DWB2(reg_name, block, id, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - - #define DWBC_COMMON_REG_LIST_DCN30(inst) \ SR(DWB_ENABLE_CLK_CTRL),\ SR(DWB_MEM_PWR_CTRL),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h index 7446e54bf5aa..376620a8f02f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h @@ -31,13 +31,6 @@ #define TO_DCN30_MMHUBBUB(mcif_wb_base) \ container_of(mcif_wb_base, struct dcn30_mmhubbub, base) -/* DCN */ -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define BASE(seg) \ - BASE_INNER(seg) - #define MCIF_WB_COMMON_REG_LIST_DCN3_0(inst) \ SRI(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst),\ SRI(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 892d3c4d01a1..867d60151aeb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -30,6 +30,7 @@ #include "dc_dmub_srv.h" #include "dml/dcn30/dcn30_fpu.h" +#include "dc_trace.h" #define REG(reg)\ optc1->tg_regs->reg @@ -58,6 +59,8 @@ void optc3_triplebuffer_lock(struct timing_generator *optc) REG_WAIT(OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, 1, 1, 10); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); } void optc3_lock_doublebuffer_enable(struct timing_generator *optc) @@ -93,6 +96,8 @@ void optc3_lock_doublebuffer_enable(struct timing_generator *optc) MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, 100, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); } void optc3_lock_doublebuffer_disable(struct timing_generator *optc) @@ -108,6 +113,8 @@ void optc3_lock_doublebuffer_disable(struct timing_generator *optc) REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0); REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 0); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); } void optc3_lock(struct timing_generator *optc) @@ -122,6 +129,8 @@ void optc3_lock(struct timing_generator *optc) REG_WAIT(OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, 1, 1, 10); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); } void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 020f512e9690..c18c52a60100 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -108,8 +108,6 @@ enum dcn30_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -142,6 +140,9 @@ enum dcn30_clk_src_array_id { .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name @@ -723,7 +724,6 @@ static const struct dc_debug_options debug_defaults_drv = { .underflow_assert_delay_us = 0xFFFFFFFF, .dwb_fi_phase = -1, // -1 = disable, .dmub_command_table = true, - .disable_psr = false, .use_max_lb = true, .exit_idle_opt_for_cursor_updates = true }; @@ -742,11 +742,17 @@ static const struct dc_debug_options debug_defaults_diags = { .scl_reset_length10 = true, .dwb_fi_phase = -1, // -1 = disable .dmub_command_table = true, - .disable_psr = true, .enable_tri_buf = true, .use_max_lb = true }; +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, +}; + static void dcn30_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); @@ -1323,6 +1329,7 @@ static struct clock_source *dcn30_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -2212,6 +2219,11 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params } } +static void dcn30_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + static const struct resource_funcs dcn30_res_pool_funcs = { .destroy = dcn30_destroy_resource_pool, .link_enc_create = dcn30_link_encoder_create, @@ -2231,6 +2243,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn30_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn30_get_panel_config_defaults, }; #define CTX ctx diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index f04595b750ab..8cf10351f271 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -107,8 +107,6 @@ enum dcn301_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -146,6 +144,9 @@ enum dcn301_clk_src_array_id { .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name @@ -1288,6 +1289,7 @@ static struct clock_source *dcn301_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1491,6 +1493,8 @@ static bool dcn301_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.dp_hdmi21_pcon_support = true; + /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index b925b6ddde5a..47cffd0e6830 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -112,10 +112,16 @@ static const struct dc_debug_options debug_defaults_diags = { .dwb_fi_phase = -1, // -1 = disable .dmub_command_table = true, .enable_tri_buf = true, - .disable_psr = true, .use_max_lb = true }; +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, +}; + enum dcn302_clk_src_array_id { DCN302_CLK_SRC_PLL0, DCN302_CLK_SRC_PLL1, @@ -177,7 +183,6 @@ static const struct dc_plane_cap plane_cap = { mm ## reg_name /* DCN */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -210,6 +215,9 @@ static const struct dc_plane_cap plane_cap = { .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define SRII_MPC_RMU(reg_name, block, id)\ .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name @@ -458,6 +466,7 @@ static struct clock_source *dcn302_clock_source_create(struct dc_context *ctx, s return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1132,6 +1141,11 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param DC_FP_END(); } +static void dcn302_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + static struct resource_funcs dcn302_res_pool_funcs = { .destroy = dcn302_destroy_resource_pool, .link_enc_create = dcn302_link_encoder_create, @@ -1151,6 +1165,7 @@ static struct resource_funcs dcn302_res_pool_funcs = { .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn302_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn302_get_panel_config_defaults, }; static struct dc_cap_funcs cap_funcs = { @@ -1266,6 +1281,8 @@ static bool dcn302_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.dp_hdmi21_pcon_support = true; + /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 527d5c902878..c14d35894b2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -96,7 +96,13 @@ static const struct dc_debug_options debug_defaults_diags = { .dwb_fi_phase = -1, // -1 = disable .dmub_command_table = true, .enable_tri_buf = true, - .disable_psr = true, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, }; enum dcn303_clk_src_array_id { @@ -156,7 +162,6 @@ static const struct dc_plane_cap plane_cap = { mm ## reg_name /* DCN */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -189,6 +194,9 @@ static const struct dc_plane_cap plane_cap = { .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define SRII_MPC_RMU(reg_name, block, id)\ .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name @@ -425,6 +433,7 @@ static struct clock_source *dcn303_clock_source_create(struct dc_context *ctx, s return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1055,6 +1064,10 @@ static void dcn303_destroy_resource_pool(struct resource_pool **pool) *pool = NULL; } +static void dcn303_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { @@ -1082,6 +1095,7 @@ static struct resource_funcs dcn303_res_pool_funcs = { .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn303_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn303_get_panel_config_defaults, }; static struct dc_cap_funcs cap_funcs = { @@ -1198,6 +1212,8 @@ static bool dcn303_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.dp_hdmi21_pcon_support = true; + /* read VBIOS LTTPR caps */ if (ctx->dc_bios->funcs->get_lttpr_caps) { enum bp_result bp_query_result; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c index de5e18c2a3ac..24e9ff65434d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c @@ -134,23 +134,10 @@ static void apg31_se_audio_setup( /* Disable forced mem power off */ REG_UPDATE(APG_MEM_PWR, APG_MEM_PWR_FORCE, 0); - - apg31_enable(apg); -} - -static void apg31_audio_mute_control( - struct apg *apg, - bool mute) -{ - if (mute) - apg31_disable(apg); - else - apg31_enable(apg); } static struct apg_funcs dcn31_apg_funcs = { .se_audio_setup = apg31_se_audio_setup, - .audio_mute_control = apg31_audio_mute_control, .enable_apg = apg31_enable, .disable_apg = apg31_disable, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h index 24f568e120d8..1b81f6773c53 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h @@ -84,10 +84,6 @@ struct apg_funcs { unsigned int az_inst, struct audio_info *audio_info); - void (*audio_mute_control)( - struct apg *apg, - bool mute); - void (*enable_apg)( struct apg *apg); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c index 814f401db3b3..16639bd03adf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c @@ -600,14 +600,6 @@ static void dcn31_hpo_dp_stream_enc_map_stream_to_link( } } -static void dcn31_hpo_dp_stream_enc_mute_control( - struct hpo_dp_stream_encoder *enc, - bool mute) -{ - ASSERT(enc->apg); - enc->apg->funcs->audio_mute_control(enc->apg, mute); -} - static void dcn31_hpo_dp_stream_enc_audio_setup( struct hpo_dp_stream_encoder *enc, unsigned int az_inst, @@ -726,7 +718,6 @@ static const struct hpo_dp_stream_encoder_funcs dcn30_str_enc_funcs = { .stop_dp_info_packets = dcn31_hpo_dp_stream_enc_stop_dp_info_packets, .dp_set_dsc_pps_info_packet = dcn31_hpo_dp_stream_enc_set_dsc_pps_info_packet, .map_stream_to_link = dcn31_hpo_dp_stream_enc_map_stream_to_link, - .audio_mute_control = dcn31_hpo_dp_stream_enc_mute_control, .dp_audio_setup = dcn31_hpo_dp_stream_enc_audio_setup, .dp_audio_enable = dcn31_hpo_dp_stream_enc_audio_enable, .dp_audio_disable = dcn31_hpo_dp_stream_enc_audio_disable, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index bdf101547484..4226a051df41 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -89,7 +89,8 @@ static void enable_memory_low_power(struct dc *dc) REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1); } - if (dc->debug.enable_mem_low_power.bits.mpc) + if (dc->debug.enable_mem_low_power.bits.mpc && + dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode) dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode(dc->res_pool->mpc); @@ -141,7 +142,8 @@ void dcn31_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb)) { hws->funcs.bios_golden_init(dc); - hws->funcs.disable_vga(dc->hwseq); + if (hws->funcs.disable_vga) + hws->funcs.disable_vga(dc->hwseq); } // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) @@ -621,3 +623,43 @@ void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable) if (hws->ctx->dc->debug.hpo_optimization) REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable); } +void dcn31_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust) +{ + int i = 0; + struct drr_params params = {0}; + unsigned int event_triggers = 0x2;/*Bit[1]: OTG_TRIG_A*/ + unsigned int num_frames = 2; + params.vertical_total_max = adjust.v_total_max; + params.vertical_total_min = adjust.v_total_min; + params.vertical_total_mid = adjust.v_total_mid; + params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; + for (i = 0; i < num_pipes; i++) { + if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) + pipe_ctx[i]->stream_res.tg->funcs->set_drr( + pipe_ctx[i]->stream_res.tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx[i]->stream_res.tg, + event_triggers, num_frames); + } + } +} +void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx, + int num_pipes, const struct dc_static_screen_params *params) +{ + unsigned int i; + unsigned int triggers = 0; + if (params->triggers.surface_update) + triggers |= 0x600;/*bit 9 and bit10 : 110 0000 0000*/ + if (params->triggers.cursor_update) + triggers |= 0x10;/*bit4*/ + if (params->triggers.force_trigger) + triggers |= 0x1; + for (i = 0; i < num_pipes; i++) + pipe_ctx[i]->stream_res.tg->funcs-> + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h index edfc01d6ad73..e7e03a8722e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h @@ -56,4 +56,8 @@ bool dcn31_is_abm_supported(struct dc *dc, void dcn31_init_pipes(struct dc *dc, struct dc_state *context); void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable); +void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx, + int num_pipes, const struct dc_static_screen_params *params); +void dcn31_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust); #endif /* __DC_HWSS_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 3a32810bbe38..7c2da70ffe21 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -64,9 +64,9 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .prepare_bandwidth = dcn20_prepare_bandwidth, .optimize_bandwidth = dcn20_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn31_set_drr, .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, + .set_static_screen_control = dcn31_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, .set_avmute = dcn30_set_avmute, .log_hw_state = dcn10_log_hw_state, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index 63a677c8ee27..fe449f7aa771 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -40,6 +40,7 @@ #define FN(reg_name, field_name) \ optc1->tg_shift->field_name, optc1->tg_mask->field_name +#define STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN 0x2000 /*bit 13*/ static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, struct dc_crtc_timing *timing) { @@ -231,6 +232,32 @@ void optc3_init_odm(struct timing_generator *optc) OPTC_MEM_SEL, 0); optc1->opp_count = 1; } +void optc31_set_static_screen_control( + struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t framecount; + uint32_t events; + + if (num_frames > 0xFF) + num_frames = 0xFF; + REG_GET_2(OTG_STATIC_SCREEN_CONTROL, + OTG_STATIC_SCREEN_EVENT_MASK, &events, + OTG_STATIC_SCREEN_FRAME_COUNT, &framecount); + + if (events == event_triggers && num_frames == framecount) + return; + if ((event_triggers & STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN) + != 0) + event_triggers = event_triggers & + ~STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN; + + REG_UPDATE_2(OTG_STATIC_SCREEN_CONTROL, + OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, + OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); +} static struct timing_generator_funcs dcn31_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -266,7 +293,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = { .set_drr = optc31_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, + .set_static_screen_control = optc31_set_static_screen_control, .program_stereo = optc1_program_stereo, .is_stereo_left_eye = optc1_is_stereo_left_eye, .tg_init = optc3_tg_init, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index 30b81a448ce2..5fc6c63580d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -263,5 +263,8 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc); void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); void optc3_init_odm(struct timing_generator *optc); - +void optc31_set_static_screen_control( + struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames); #endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index fddc21a5a04c..3ca517dcc82d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -119,8 +119,6 @@ enum dcn31_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -153,6 +151,9 @@ enum dcn31_clk_src_array_id { .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -911,6 +912,10 @@ static const struct dc_debug_options debug_defaults_diags = { }; static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, .ilr = { .optimize_edp_link_rate = true, }, @@ -1625,6 +1630,7 @@ static struct clock_source *dcn31_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1634,6 +1640,31 @@ static bool is_dual_plane(enum surface_pixel_format format) return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; } +int dcn31x_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + uint32_t pipe_cnt; + int i; + + dc_assert_fp_enabled(); + + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0; i < pipe_cnt; i++) { + pipes[i].pipe.src.gpuvm = 1; + if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) { + //pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; + pipes[i].pipe.src.hostvm = dc->vm_pa_config.is_hvm_enabled; + } else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_FALSE) + pipes[i].pipe.src.hostvm = false; + else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_TRUE) + pipes[i].pipe.src.hostvm = true; + } + return pipe_cnt; +} + int dcn31_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1645,7 +1676,7 @@ int dcn31_populate_dml_pipes_from_context( bool upscaled = false; DC_FP_START(); - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); DC_FP_END(); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { @@ -1675,12 +1706,6 @@ int dcn31_populate_dml_pipes_from_context( dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); DC_FP_END(); - if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; - else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_FALSE) - pipes[pipe_cnt].pipe.src.hostvm = false; - else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_TRUE) - pipes[pipe_cnt].pipe.src.hostvm = true; if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { @@ -1898,6 +1923,8 @@ static bool dcn31_resource_construct( dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; dc->caps.dp_hpo = true; dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 389a8938ee45..0b769ee71405 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -104,7 +104,7 @@ static void dccg314_set_pixel_rate_div( } dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); - if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2)) + if (k1 == cur_k1 && k2 == cur_k2) return; switch (otg_inst) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index 5b6c2d94ec71..31feb4b0edee 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -66,9 +66,9 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .prepare_bandwidth = dcn20_prepare_bandwidth, .optimize_bandwidth = dcn20_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn31_set_drr, .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, + .set_static_screen_control = dcn31_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, .set_avmute = dcn30_set_avmute, .log_hw_state = dcn10_log_hw_state, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 7dd36e402bac..41edbd64ea21 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -228,7 +228,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .set_drr = optc31_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, + .set_static_screen_control = optc31_set_static_screen_control, .program_stereo = optc1_program_stereo, .is_stereo_left_eye = optc1_is_stereo_left_eye, .tg_init = optc3_tg_init, @@ -241,7 +241,6 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .set_dsc_config = optc3_set_dsc_config, .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, - .set_odm_bypass = optc3_set_odm_bypass, .set_odm_combine = optc314_set_odm_combine, .get_optc_source = optc2_get_optc_source, .set_out_mux = optc3_set_out_mux, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 9066c511a052..f9ea1e86707f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -184,6 +184,9 @@ enum dcn31_clk_src_array_id { .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -883,6 +886,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, + .psr_skip_crtc_disable = true, .disable_dmcu = true, .force_abm_enable = false, .timing_trace = false, @@ -937,6 +941,10 @@ static const struct dc_debug_options debug_defaults_diags = { }; static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, .ilr = { .optimize_edp_link_rate = true, }, @@ -1766,6 +1774,8 @@ static bool dcn314_resource_construct( dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; dc->caps.dp_hpo = true; dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index 58746c437554..7887078c5f64 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -151,8 +151,6 @@ enum dcn31_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -185,6 +183,9 @@ enum dcn31_clk_src_array_id { .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -907,6 +908,10 @@ static const struct dc_debug_options debug_defaults_diags = { }; static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, .ilr = { .optimize_edp_link_rate = true, }, @@ -1623,6 +1628,7 @@ static struct clock_source *dcn31_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1643,7 +1649,7 @@ static int dcn315_populate_dml_pipes_from_context( const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB; DC_FP_START(); - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); DC_FP_END(); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { @@ -1662,7 +1668,6 @@ static int dcn315_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; @@ -1703,7 +1708,9 @@ static int dcn315_populate_dml_pipes_from_context( dc->config.enable_4to1MPC = true; context->bw_ctx.dml.ip.det_buffer_size_kbytes = (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / 4) * DCN3_15_CRB_SEGMENT_SIZE_KB; - } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { + } else if (!is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 5120 + && pipe->stream->timing.pix_clk_100hz < dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)) { /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; pipes[0].pipe.src.unbounded_req_mode = true; @@ -1779,6 +1786,8 @@ static bool dcn315_resource_construct( dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; dc->caps.dp_hpo = true; dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 6b40a11ac83a..b4d5076e124c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -142,8 +142,6 @@ enum dcn31_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg #define BASE(seg) BASE_INNER(seg) @@ -176,6 +174,9 @@ enum dcn31_clk_src_array_id { .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -906,6 +907,10 @@ static const struct dc_debug_options debug_defaults_diags = { }; static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, .ilr = { .optimize_edp_link_rate = true, }, @@ -1646,7 +1651,7 @@ static int dcn316_populate_dml_pipes_from_context( const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_16_MIN_COMPBUF_SIZE_KB; DC_FP_START(); - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); DC_FP_END(); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { @@ -1665,7 +1670,6 @@ static int dcn316_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; @@ -1781,6 +1785,8 @@ static bool dcn316_resource_construct( dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; dc->caps.dp_hpo = true; dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index df4f25119142..e4472c6be6c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -225,11 +225,7 @@ static void dccg32_set_dtbclk_dto( } else { REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], DTBCLK_DTO_ENABLE[params->otg_inst], 0, - PIPE_DTO_SRC_SEL[params->otg_inst], 1); - if (params->is_hdmi) - REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], - PIPE_DTO_SRC_SEL[params->otg_inst], 0); - + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c index 9fbb72369c10..5947c2cb0f30 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c @@ -41,6 +41,10 @@ #define FN(reg_name, field_name) \ hubbub2->shifts->field_name, hubbub2->masks->field_name +/** + * @DCN32_CRB_SEGMENT_SIZE_KB: Maximum Configurable Return Buffer size for + * DCN32 + */ #define DCN32_CRB_SEGMENT_SIZE_KB 64 static void dcn32_init_crb(struct hubbub *hubbub) @@ -68,6 +72,23 @@ static void dcn32_init_crb(struct hubbub *hubbub) REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F); } +void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + uint32_t request_limit = 3 * memory_channel_count * words_per_channel / 4; + + ASSERT((request_limit & (~0xFFF)) == 0); //field is only 24 bits long + ASSERT(request_limit > 0); //field is only 24 bits long + + if (request_limit > 0xFFF) + request_limit = 0xFFF; + + if (request_limit > 0) + REG_UPDATE(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, request_limit); +} + + void dcn32_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigned int det_buffer_size_in_kbyte) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); @@ -945,6 +966,7 @@ static const struct hubbub_funcs hubbub32_funcs = { .init_crb = dcn32_init_crb, .hubbub_read_state = hubbub2_read_state, .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, + .set_request_limit = hubbub32_set_request_limit }; void hubbub32_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h index cda94e0e31bf..786f9ce07f92 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h @@ -82,7 +82,8 @@ SR(DCN_VM_FAULT_ADDR_MSB),\ SR(DCN_VM_FAULT_ADDR_LSB),\ SR(DCN_VM_FAULT_CNTL),\ - SR(DCN_VM_FAULT_STATUS) + SR(DCN_VM_FAULT_STATUS),\ + SR(SDPIF_REQUEST_RATE_LIMIT) #define HUBBUB_MASK_SH_LIST_DCN32(mask_sh)\ HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \ @@ -159,7 +160,8 @@ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \ - HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh) + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh),\ + HUBBUB_SF(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, mask_sh) bool hubbub32_program_urgent_watermarks( struct hubbub *hubbub, @@ -200,4 +202,6 @@ void hubbub32_construct(struct dcn20_hubbub *hubbub2, int pixel_chunk_size_kb, int config_return_buffer_size_kb); +void hubbub32_set_request_limit(struct hubbub *hubbub, int umc_count, int words_per_umc); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index d0b46a3e0155..b8767be1e4c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -227,8 +227,13 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + /* If PSR is supported on an eDP panel that's connected, but that panel is + * not in PSR at the time of trying to enter MALL SS, we have to include it + * in the static screen CAB calculation + */ if (!pipe->stream || !pipe->plane_state || - pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED || + (pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED && + pipe->stream->link->psr_settings.psr_allow_active) || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) continue; @@ -257,11 +262,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); - /* For DCC: - * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1) + /*For DCC: + * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1) */ if (pipe->plane_state->dcc.enable) - num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel + + num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel + (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES); bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; @@ -311,8 +316,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2; + break; } - break; } } @@ -698,11 +703,7 @@ void dcn32_subvp_update_force_pstate(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // For SubVP + DRR, also force disallow on the DRR pipe - // (We will force allow in the DMUB sequence -- some DRR timings by default won't allow P-State so we have - // to force once the vblank is stretched). - if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN || - (pipe->stream->mall_stream_config.type == SUBVP_NONE && pipe->stream->ignore_msa_timing_param))) { + if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN)) { struct hubp *hubp = pipe->plane_res.hubp; if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) @@ -780,6 +781,10 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context) if (hws && hws->funcs.update_mall_sel) hws->funcs.update_mall_sel(dc, context); + //update subvp force pstate + if (hws && hws->funcs.subvp_update_force_pstate) + dc->hwseq->funcs.subvp_update_force_pstate(dc, context); + // Program FORCE_ONE_ROW_FOR_FRAME and CURSOR_REQ_MODE for main subvp pipes for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -980,15 +985,14 @@ void dcn32_init_hw(struct dc *dc) if (dc->res_pool->hubbub->funcs->init_crb) dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub); + if (dc->res_pool->hubbub->funcs->set_request_limit && dc->config.sdpif_request_limit_words_per_umc > 0) + dc->res_pool->hubbub->funcs->set_request_limit(dc->res_pool->hubbub, dc->ctx->dc_bios->vram_info.num_chans, dc->config.sdpif_request_limit_words_per_umc); + // Get DMCUB capabilities if (dc->ctx->dmub_srv) { dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub); dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; } - - /* Enable support for ODM and windowed MPO if policy flag is set */ - if (dc->debug.enable_single_display_2to1_odm_policy) - dc->config.enable_windowed_mpo_odm = true; } static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, @@ -1180,7 +1184,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; @@ -1365,6 +1369,33 @@ void dcn32_update_phantom_vp_position(struct dc *dc, } } +/* Treat the phantom pipe as if it needs to be fully enabled. + * If the pipe was previously in use but not phantom, it would + * have been disabled earlier in the sequence so we need to run + * the full enable sequence. + */ +void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe) +{ + phantom_pipe->update_flags.raw = 0; + if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (phantom_pipe->stream && phantom_pipe->plane_state) { + phantom_pipe->update_flags.bits.enable = 1; + phantom_pipe->update_flags.bits.mpcc = 1; + phantom_pipe->update_flags.bits.dppclk = 1; + phantom_pipe->update_flags.bits.hubp_interdependent = 1; + phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + phantom_pipe->update_flags.bits.gamut_remap = 1; + phantom_pipe->update_flags.bits.scaler = 1; + phantom_pipe->update_flags.bits.viewport = 1; + phantom_pipe->update_flags.bits.det_size = 1; + if (!phantom_pipe->top_pipe && !phantom_pipe->prev_odm_pipe) { + phantom_pipe->update_flags.bits.odm = 1; + phantom_pipe->update_flags.bits.global_sync = 1; + } + } + } +} + bool dcn32_dsc_pg_status( struct dce_hwseq *hws, unsigned int dsc_inst) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h index ac3657a5b9ea..7de36529cf99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h @@ -92,6 +92,8 @@ void dcn32_update_phantom_vp_position(struct dc *dc, struct dc_state *context, struct pipe_ctx *phantom_pipe); +void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe); + bool dcn32_dsc_pg_status( struct dce_hwseq *hws, unsigned int dsc_inst); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c index 45a949ba6f3f..dc4649458567 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c @@ -110,6 +110,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .update_visual_confirm_color = dcn20_update_visual_confirm_color, .update_phantom_vp_position = dcn32_update_phantom_vp_position, .update_dsc_pg = dcn32_update_dsc_pg, + .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c index 41b0baf8e183..c3b089ba511a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c @@ -211,7 +211,7 @@ static void mmhubbub32_config_mcif_arb(struct mcif_wb *mcif_wb, REG_UPDATE(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB_CLIENT_ARBITRATION_SLICE, params->arbitration_slice); } -const struct mcif_wb_funcs dcn32_mmhubbub_funcs = { +static const struct mcif_wb_funcs dcn32_mmhubbub_funcs = { .warmup_mcif = mmhubbub32_warmup_mcif, .enable_mcif = mmhubbub2_enable_mcif, .disable_mcif = mmhubbub2_disable_mcif, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 4edd0655965b..206a5ddbaf6d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -982,7 +982,7 @@ static bool mpc32_program_3dlut( return true; } -const struct mpc_funcs dcn32_mpc_funcs = { +static const struct mpc_funcs dcn32_mpc_funcs = { .read_mpcc_state = mpc1_read_mpcc_state, .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index 2b33eeb213e2..2ee798965bc2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -167,6 +167,13 @@ static void optc32_phantom_crtc_post_enable(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); } +static void optc32_disable_phantom_otg(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); +} + static void optc32_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) { @@ -260,6 +267,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .enable_crtc = optc32_enable_crtc, .disable_crtc = optc32_disable_crtc, .phantom_crtc_post_enable = optc32_phantom_crtc_post_enable, + .disable_phantom_crtc = optc32_disable_phantom_otg, /* used by enable_timing_synchronization. Not need for FPGA */ .is_counter_moving = optc1_is_counter_moving, .get_position = optc1_get_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index d1598e3131f6..e4dbc8353ea3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -106,8 +106,6 @@ enum dcn32_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] #define BASE(seg) BASE_INNER(seg) @@ -167,6 +165,9 @@ enum dcn32_clk_src_array_id { REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## temp_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define DCCG_SRII(reg_name, block, id)\ REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -722,7 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { /* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ .enable_double_buffered_dsc_pg_support = true, .enable_dp_dig_pixel_rate_div_policy = 1, - .allow_sw_cursor_fallback = false, + .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" .alloc_extra_way_for_cursor = true, .min_prefetch_in_strobe_ns = 60000, // 60us }; @@ -830,6 +831,7 @@ static struct clock_source *dcn32_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1679,7 +1681,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc, /* Shadow pipe has small viewport. */ phantom_plane->clip_rect.y = 0; - phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; + phantom_plane->clip_rect.height = phantom_stream->src.height; phantom_plane->is_phantom = true; @@ -1719,8 +1721,29 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, return phantom_stream; } +void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i; + struct dc_plane_state *phantom_plane = NULL; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->top_pipe && !pipe->prev_odm_pipe && + pipe->plane_state && pipe->stream && + pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + phantom_plane = pipe->plane_state; + phantom_stream = pipe->stream; + + dc_plane_state_retain(phantom_plane); + dc_stream_retain(phantom_stream); + } + } +} + // return true if removed piped from ctx, false otherwise -bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context) +bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) { int i; bool removed_pipe = false; @@ -1747,14 +1770,23 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context) removed_pipe = true; } - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } + /* For non-full updates, a shallow copy of the current state + * is created. In this case we don't want to erase the current + * state (there can be 2 HIRQL threads, one in flip, and one in + * checkMPO) that can cause a race condition. + * + * This is just a workaround, needs a proper fix. + */ + if (!fast_update) { + // Clear all phantom stream info + if (pipe->stream) { + pipe->stream->mall_stream_config.type = SUBVP_NONE; + pipe->stream->mall_stream_config.paired_stream = NULL; + } - if (pipe->plane_state) { - pipe->plane_state->is_phantom = false; + if (pipe->plane_state) { + pipe->plane_state->is_phantom = false; + } } } return removed_pipe; @@ -1901,7 +1933,7 @@ int dcn32_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; if (context->stream_count == 1 && - context->stream_status[0].plane_count <= 1 && + context->stream_status[0].plane_count == 1 && !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) && is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) && pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ && @@ -1919,30 +1951,36 @@ int dcn32_populate_dml_pipes_from_context( timing = &pipe->stream->timing; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + DC_FP_START(); + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; - switch (pipe->stream->mall_stream_config.type) { - case SUBVP_MAIN: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; - subvp_in_use = true; - break; - case SUBVP_PHANTOM: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; - // Disallow unbounded req for SubVP according to DCHUB programming guide - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - break; - case SUBVP_NONE: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; - break; - default: - break; + /* Only populate DML input with subvp info for full updates. + * This is just a workaround -- needs a proper fix. + */ + if (!fast_validate) { + switch (pipe->stream->mall_stream_config.type) { + case SUBVP_MAIN: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; + break; + case SUBVP_PHANTOM: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; + // Disallow unbounded req for SubVP according to DCHUB programming guide + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + break; + case SUBVP_NONE: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; + break; + default: + break; + } } pipes[pipe_cnt].dout.dsc_input_bpc = 0; @@ -2030,6 +2068,9 @@ static struct resource_funcs dcn32_res_pool_funcs = { .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, .remove_phantom_pipes = dcn32_remove_phantom_pipes, + .retain_phantom_pipes = dcn32_retain_phantom_pipes, + .save_mall_state = dcn32_save_mall_state, + .restore_mall_state = dcn32_restore_mall_state, }; @@ -2116,16 +2157,20 @@ static bool dcn32_resource_construct( dc->caps.cache_num_ways = 16; dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64 dc->caps.subvp_fw_processing_delay_us = 15; + dc->caps.subvp_drr_max_vblank_margin_us = 40; dc->caps.subvp_prefetch_end_to_mall_start_us = 15; dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin dc->caps.max_slave_planes = 2; dc->caps.max_slave_yuv_planes = 2; dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; dc->caps.dp_hpo = true; dc->caps.dp_hdmi21_pcon_support = true; dc->caps.edp_dsc_support = true; @@ -2409,6 +2454,9 @@ static bool dcn32_resource_construct( pool->base.oem_device = NULL; } + if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) + dc->config.sdpif_request_limit_words_per_umc = 16; + DC_FP_END(); return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index f76120e67c16..13fbc574910b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -45,17 +45,6 @@ extern struct _vcs_dpi_ip_params_st dcn3_2_ip; extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; -/* Temp struct used to save and restore MALL config - * during validation. - * - * TODO: Move MALL config into dc_state instead of stream struct - * to avoid needing to save/restore. - */ -struct mall_temp_config { - struct mall_stream_config mall_stream_config[MAX_PIPES]; - bool is_phantom_plane[MAX_PIPES]; -}; - struct dcn32_resource_pool { struct resource_pool base; }; @@ -81,6 +70,9 @@ bool dcn32_release_post_bldn_3dlut( struct dc_transfer_func **shaper); bool dcn32_remove_phantom_pipes(struct dc *dc, + struct dc_state *context, bool fast_update); + +void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context); void dcn32_add_phantom_pipes(struct dc *dc, @@ -1244,7 +1236,8 @@ void dcn32_restore_mall_state(struct dc *dc, SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C), \ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D), \ SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ - SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS) \ + SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ + SR(SDPIF_REQUEST_RATE_LIMIT) \ ) /* DCCG */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index fa3778849db1..783935c4e664 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -97,14 +97,14 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat * FLOOR(vp_x_start, blk_width) */ full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + - pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) - (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) - * FLOOR(vp_y_start, blk_height) */ full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y + - full_vp_height + mblk_height - 1) / mblk_height * mblk_height) + + full_vp_height + mblk_height - 1) / mblk_height * mblk_height) - (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height); /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ @@ -121,14 +121,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat */ num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); + + /*For DCC: + * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1) + */ + if (pipe->plane_state->dcc.enable) + num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel + + (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES); + bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2; - /* For DCC divide by 256 */ - if (pipe->plane_state->dcc.enable) - cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1; cache_lines_used += cache_lines_per_plane; } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 6292ac515d1a..d1f36df03c2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -109,8 +109,6 @@ enum dcn321_clk_src_array_id { */ /* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER #define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] #define BASE(seg) BASE_INNER(seg) @@ -174,6 +172,9 @@ enum dcn321_clk_src_array_id { REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + #define VUPDATE_SRII(reg_name, block, id)\ REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ reg ## reg_name ## _ ## block ## id @@ -720,7 +721,7 @@ static const struct dc_debug_options debug_defaults_drv = { /*must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ .enable_double_buffered_dsc_pg_support = true, .enable_dp_dig_pixel_rate_div_policy = 1, - .allow_sw_cursor_fallback = false, + .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" .alloc_extra_way_for_cursor = true, .min_prefetch_in_strobe_ns = 60000, // 60us }; @@ -742,7 +743,7 @@ static const struct dc_debug_options debug_defaults_diags = { .dmub_command_table = true, .enable_tri_buf = true, .use_max_lb = true, - .force_disable_subvp = true + .force_disable_subvp = true, }; @@ -829,6 +830,7 @@ static struct clock_source *dcn321_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } @@ -1619,6 +1621,9 @@ static struct resource_funcs dcn321_res_pool_funcs = { .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, .remove_phantom_pipes = dcn32_remove_phantom_pipes, + .retain_phantom_pipes = dcn32_retain_phantom_pipes, + .save_mall_state = dcn32_save_mall_state, + .restore_mall_state = dcn32_restore_mall_state, }; @@ -1704,10 +1709,12 @@ static bool dcn321_resource_construct( dc->caps.cache_num_ways = 16; dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32 dc->caps.subvp_fw_processing_delay_us = 15; + dc->caps.subvp_drr_max_vblank_margin_us = 40; dc->caps.subvp_prefetch_end_to_mall_start_us = 15; dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index e3e5c39895a3..af1c50ed905a 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -116,6 +116,11 @@ bool dm_helpers_dp_mst_start_top_mgr( bool dm_helpers_dp_mst_stop_top_mgr( struct dc_context *ctx, struct dc_link *link); + +void dm_helpers_dp_mst_update_branch_bandwidth( + struct dc_context *ctx, + struct dc_link *link); + /** * OS specific aux read callback. */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ca7d24000621..0ecea87cf48f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,6 +33,10 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif +ifdef CONFIG_ARM64 +dml_rcflags := -mgeneral-regs-only +endif + ifdef CONFIG_CC_IS_GCC ifneq ($(call gcc-min-version, 70100),y) IS_OLD_GCC = 1 @@ -55,8 +59,6 @@ frame_warn_flag := -Wframe-larger-than=2048 endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) - -ifdef CONFIG_DRM_AMD_DC_DCN CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) @@ -88,7 +90,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) @@ -105,7 +106,18 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcf CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) -endif +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h index 74e86732e301..2cbdd75429ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h @@ -29,6 +29,13 @@ #define DC__PRESENT 1 #define DC__PRESENT__1 1 #define DC__NUM_DPP 4 + +/** + * @DC__VOLTAGE_STATES: + * + * Define the maximum amount of states supported by the ASIC. Every ASIC has a + * specific number of states; this macro defines the maximum number of states. + */ #define DC__VOLTAGE_STATES 20 #define DC__NUM_DPP__4 1 #define DC__NUM_DPP__0_PRESENT 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c index 99644d896222..c5e84190c17a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c @@ -27,6 +27,8 @@ #include "dcn10/dcn10_resource.h" #include "dcn10_fpu.h" +#include "resource.h" +#include "amdgpu_dm/dc_fpu.h" /** * DOC: DCN10 FPU manipulation Overview @@ -121,3 +123,37 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = { .writeback_dram_clock_change_latency_us = 23.0, .return_bus_width_bytes = 64, }; + +void dcn10_resource_construct_fp(struct dc *dc) +{ + dc_assert_fp_enabled(); + if (dc->ctx->dce_version == DCN_VERSION_1_01) { + struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; + struct dcn_ip_params *dcn_ip = dc->dcn_ip; + struct display_mode_lib *dml = &dc->dml; + + dml->ip.max_num_dpp = 3; + /* TODO how to handle 23.84? */ + dcn_soc->dram_clock_change_latency = 23; + dcn_ip->max_num_dpp = 3; + } + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { + dc->dcn_soc->urgent_latency = 3; + dc->debug.disable_dmcu = true; + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f; + } + + dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width; + ASSERT(dc->dcn_soc->number_of_channels < 3); + if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/ + dc->dcn_soc->number_of_channels = 2; + + if (dc->dcn_soc->number_of_channels == 1) { + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f; + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f; + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f; + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f; + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h index e74ed4b4ce5b..63219ecd8478 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h @@ -27,4 +27,6 @@ #ifndef __DCN10_FPU_H__ #define __DCN10_FPU_H__ +void dcn10_resource_construct_fp(struct dc *dc); + #endif /* __DCN20_FPU_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 45db40c41882..c26da3bb2892 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -565,7 +565,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dppclk_mhz = 847.06, .phyclk_mhz = 810.0, .socclk_mhz = 953.0, - .dscclk_mhz = 489.0, + .dscclk_mhz = 300.0, .dram_speed_mts = 2400.0, }, { @@ -576,7 +576,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dppclk_mhz = 960.00, .phyclk_mhz = 810.0, .socclk_mhz = 278.0, - .dscclk_mhz = 287.67, + .dscclk_mhz = 342.86, .dram_speed_mts = 2666.0, }, { @@ -587,7 +587,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dppclk_mhz = 1028.57, .phyclk_mhz = 810.0, .socclk_mhz = 715.0, - .dscclk_mhz = 318.334, + .dscclk_mhz = 369.23, .dram_speed_mts = 3200.0, }, { @@ -949,6 +949,7 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc int plane_count; int i; unsigned int optimized_min_dst_y_next_start_us; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > 1000.0; plane_count = 0; optimized_min_dst_y_next_start_us = 0; @@ -963,6 +964,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc * 2. single eDP, on link 0, 1 plane and stutter period > 5ms * Z10 only cases: * 1. single eDP, on link 0, 1 plane and stutter period >= 5ms + * Z8 cases: + * 1. stutter period sufficient * Zstate not allowed cases: * 1. Everything else */ @@ -989,12 +992,15 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000) return DCN_ZSTATE_SUPPORT_ALLOW; - else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr) - return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) + return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; else - return DCN_ZSTATE_SUPPORT_DISALLOW; - } else + return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW; + } else if (allow_z8) { + return DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; + } else { return DCN_ZSTATE_SUPPORT_DISALLOW; + } } void dcn20_calculate_dlg_params( @@ -1296,6 +1302,8 @@ int dcn20_populate_dml_pipes_from_context( case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: pipes[pipe_cnt].dout.output_type = dm_dp; + if (is_dp_128b_132b_signal(&res_ctx->pipe_ctx[i])) + pipes[pipe_cnt].dout.output_type = dm_dp2p0; break; case SIGNAL_TYPE_EDP: pipes[pipe_cnt].dout.output_type = dm_edp; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index e1e92daba668..d4c0f9cdac8e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -520,9 +520,7 @@ void dcn30_fpu_calculate_wm_and_dlg( pipe_idx++; } - DC_FP_START(); dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); if (!pstate_en) /* Restore full p-state latency */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 479e2c1a1301..379729b02847 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -4851,7 +4851,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], - v->UrgentLatency, + v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 7dd0845d1bd9..b37d14369a62 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -483,7 +483,7 @@ void dcn31_calculate_wm_and_dlg_fp( int pipe_cnt, int vlevel) { - int i, pipe_idx, active_dpp_count = 0; + int i, pipe_idx, active_hubp_count = 0; double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; dc_assert_fp_enabled(); @@ -529,7 +529,7 @@ void dcn31_calculate_wm_and_dlg_fp( continue; if (context->res_ctx.pipe_ctx[i].plane_state) - active_dpp_count++; + active_hubp_count++; pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); @@ -547,9 +547,22 @@ void dcn31_calculate_wm_and_dlg_fp( } dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - /* For 31x apu pstate change is only supported if possible in vactive or if there are no active dpps */ + /* For 31x apu pstate change is only supported if possible in vactive*/ context->bw_ctx.bw.dcn.clk.p_state_change_support = - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive || !active_dpp_count; + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive; + /* If DCN isn't making memory requests we can allow pstate change and lower clocks */ + if (!active_hubp_count) { + context->bw_ctx.bw.dcn.clk.socclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0; + context->bw_ctx.bw.dcn.clk.dramclk_khz = 0; + context->bw_ctx.bw.dcn.clk.fclk_khz = 0; + context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (context->res_ctx.pipe_ctx[i].stream) + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; + } } void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) @@ -797,3 +810,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param else dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); } + +int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc) +{ + return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index fd58b2561ec9..687d3522cc33 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -46,5 +46,10 @@ void dcn31_calculate_wm_and_dlg_fp( void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc); +int dcn31x_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); #endif /* __DCN31_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index b612edb14417..ec351c8418cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -1052,17 +1052,16 @@ static bool CalculatePrefetchSchedule( else bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; /*rev 99*/ - prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); + prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; + prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; Tsw_oto = Lsw_oto * LineTime; - prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; #ifdef __DML_VBA_DEBUG__ dml_print("DML: HTotal: %d\n", myPipe->HTotal); @@ -5083,7 +5082,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], - v->UrgentLatency, + v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], @@ -5361,6 +5360,58 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->ModeSupport[i][j] = true; } else { v->ModeSupport[i][j] = false; +#ifdef __DML_VBA_DEBUG__ + if (v->ScaleRatioAndTapsSupport == false) + dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed"); + if (v->SourceFormatPixelAndScanSupport == false) + dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed"); + if (v->ViewportSizeSupport[i][j] == false) + dml_print("DML SUPPORT: ViewportSizeSupport failed"); + if (v->LinkCapacitySupport[i] == false) + dml_print("DML SUPPORT: LinkCapacitySupport failed"); + if (v->ODMCombine4To1SupportCheckOK[i] == false) + dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); + if (v->NotEnoughDSCUnits[i] == true) + dml_print("DML SUPPORT: NotEnoughDSCUnits"); + if (v->DTBCLKRequiredMoreThanSupported[i] == true) + dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported"); + if (v->ROBSupport[i][j] == false) + dml_print("DML SUPPORT: ROBSupport failed"); + if (v->DISPCLK_DPPCLK_Support[i][j] == false) + dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed"); + if (v->TotalAvailablePipesSupport[i][j] == false) + dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); + if (EnoughWritebackUnits == false) + dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); + if (v->WritebackLatencySupport == false) + dml_print("DML SUPPORT: WritebackLatencySupport failed"); + if (v->WritebackScaleRatioAndTapsSupport == false) + dml_print("DML SUPPORT: DSC422NativeNotSupported "); + if (v->CursorSupport == false) + dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); + if (v->PitchSupport == false) + dml_print("DML SUPPORT: PitchSupport failed"); + if (ViewportExceedsSurface == true) + dml_print("DML SUPPORT: ViewportExceedsSurface failed"); + if (v->PrefetchSupported[i][j] == false) + dml_print("DML SUPPORT: PrefetchSupported failed"); + if (v->DynamicMetadataSupported[i][j] == false) + dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); + if (v->TotalVerticalActiveBandwidthSupport[i][j] == false) + dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed"); + if (v->VRatioInPrefetchSupported[i][j] == false) + dml_print("DML SUPPORT: VRatioInPrefetchSupported failed"); + if (v->PTEBufferSizeNotExceeded[i][j] == false) + dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed"); + if (v->NonsupportedDSCInputBPC == true) + dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed"); + if (!((v->HostVMEnable == false + && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) + || v->ImmediateFlipSupportedForState[i][j] == true)) + dml_print("DML SUPPORT: ImmediateFlipRequirement failed"); + if (FMTBufferExceeded == true) + dml_print("DML SUPPORT: FMTBufferExceeded failed"); +#endif } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 34b6c763a455..6a1cf6adea77 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -29,6 +29,7 @@ #include "dcn31/dcn31_hubbub.h" #include "dcn314_fpu.h" #include "dml/dcn20/dcn20_fpu.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dml/display_mode_vba.h" struct _vcs_dpi_ip_params_st dcn3_14_ip = { @@ -148,8 +149,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .num_states = 5, .sr_exit_time_us = 16.5, .sr_enter_plus_exit_time_us = 18.5, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, + .sr_exit_z8_time_us = 280.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, @@ -264,11 +265,8 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; } - if ((int)(dcn3_14_soc.dram_clock_change_latency_us * 1000) - != dc->debug.dram_clock_change_latency_ns - && dc->debug.dram_clock_change_latency_ns) { - dcn3_14_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000; - } + dcn20_patch_bounding_box(dc, &dcn3_14_soc); + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314); else @@ -291,7 +289,7 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c dc_assert_fp_enabled(); - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing; @@ -318,8 +316,6 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; - pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 0d12fd079cd6..950669f2c10d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -1074,17 +1074,16 @@ static bool CalculatePrefetchSchedule( else bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; /*rev 99*/ - prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); + prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; + prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; Tsw_oto = Lsw_oto * LineTime; - prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; #ifdef __DML_VBA_DEBUG__ dml_print("DML: HTotal: %d\n", myPipe->HTotal); @@ -5180,7 +5179,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], - v->UrgentLatency, + v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 2abe3967f7fb..f94abd124021 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -256,16 +256,24 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, int vlevel) { const int max_latency_table_entries = 4; - const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; int dummy_latency_index = 0; + enum clock_change_support temp_clock_change_support = vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; dc_assert_fp_enabled(); while (dummy_latency_index < max_latency_table_entries) { + if (temp_clock_change_support != dm_dram_clock_change_unsupported) + vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + /* for subvp + DRR case, if subvp pipes are still present we support pstate */ + if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && + dcn32_subvp_in_use(dc, context)) + vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; + if (vlevel < context->bw_ctx.dml.vba.soc.num_states && vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) break; @@ -531,9 +539,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, unsigned int i, pipe_idx; struct pipe_ctx *pipe; uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + unsigned int num_dpp; unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + struct dc_stream_state *main_stream = ref_pipe->stream; dc_assert_fp_enabled(); @@ -569,13 +580,26 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines; + // W/A for DCC corruption with certain high resolution timings. + // Determing if pipesplit is used. If so, add meta_row_height to the phantom vactive. + num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]]; + phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0; + + /* dc->debug.subvp_extra_lines 0 by default*/ + phantom_vactive += dc->debug.subvp_extra_lines; + // For backporch of phantom pipe, use vstartup of the main pipe phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); phantom_stream->dst.y = 0; phantom_stream->dst.height = phantom_vactive; + /* When scaling, DML provides the end to end required number of lines for MALL. + * dst.height is always correct for this case, but src.height is not which causes a + * delta between main and phantom pipe scaling outputs. Need to adjust src.height on + * phantom for this case. + */ phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; + phantom_stream->src.height = (double)phantom_vactive * (double)main_stream->src.height / (double)main_stream->dst.height; phantom_stream->timing.v_addressable = phantom_vactive; phantom_stream->timing.v_front_porch = 1; @@ -1128,7 +1152,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == dm_prefetch_support_uclk_fclk_and_stutter) { context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; + dm_prefetch_support_fclk_and_stutter; /* There are params (such as FabricClock) that need to be recalculated * after validation fails (otherwise it will be 0). Calculation for * phantom vactive requires call into DML, so we must ensure all the @@ -1179,7 +1203,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) // remove phantom pipes and repopulate dml pipes if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context); + dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); @@ -1191,9 +1215,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, } } else { // Most populate phantom DLG params before programming hardware / timing for phantom pipe - DC_FP_START(); dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); - DC_FP_END(); /* Call validate_apply_pipe_split flags after calling DML getters for * phantom dlg params, or some of the VBA params indicating pipe split @@ -1230,7 +1252,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel) { - int i, pipe_idx; + int i, pipe_idx, active_hubp_count = 0; bool usr_retraining_support = false; bool unbounded_req_enabled = false; @@ -1275,6 +1297,8 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; + if (context->res_ctx.pipe_ctx[i].plane_state) + active_hubp_count++; pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, @@ -1296,10 +1320,23 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + if (context->res_ctx.pipe_ctx[i].plane_state) + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + else + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; pipe_idx++; } + /* If DCN isn't making memory requests we can allow pstate change and lower clocks */ + if (!active_hubp_count) { + context->bw_ctx.bw.dcn.clk.socclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0; + context->bw_ctx.bw.dcn.clk.dramclk_khz = 0; + context->bw_ctx.bw.dcn.clk.fclk_khz = 0; + context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + } /*save a original dppclock copy*/ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; @@ -1481,7 +1518,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, return false; // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context); + dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate); dc->res_pool->funcs->update_soc_for_wm_a(dc, context); @@ -1494,11 +1531,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - if (!fast_validate) { - DC_FP_START(); + if (!fast_validate) dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); - DC_FP_END(); - } if (fast_validate || (dc->debug.dml_disallow_alternate_prefetch_modes && @@ -1734,6 +1768,10 @@ bool dcn32_internal_validate_bw(struct dc *dc, } if (repopulate_pipes) { + int flag_max_mpc_comb = vba->maxMpcComb; + int flag_vlevel = vlevel; + int i; + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); /* repopulate_pipes = 1 means the pipes were either split or merged. In this case @@ -1741,10 +1779,28 @@ bool dcn32_internal_validate_bw(struct dc *dc, * ensure all the params are calculated correctly. We do not need to run the * pipe split check again after this call (pipes are already split / merged). * */ - if (!fast_validate) { - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_uclk_fclk_and_stutter_if_possible; - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel == context->bw_ctx.dml.soc.num_states) { + /* failed after DET size changes */ + goto validate_fail; + } else if (flag_max_mpc_comb == 0 && + flag_max_mpc_comb != context->bw_ctx.dml.vba.maxMpcComb) { + /* check the context constructed with pipe split flags is still valid*/ + bool flags_valid = false; + for (i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) { + if (vba->ModeSupport[i][flag_max_mpc_comb]) { + vba->maxMpcComb = flag_max_mpc_comb; + vba->VoltageLevel = i; + vlevel = i; + flags_valid = true; + } + } + + /* this should never happen */ + if (!flags_valid) + goto validate_fail; } } *vlevel_out = vlevel; @@ -1775,14 +1831,38 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int dummy_latency_index = 0; int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + bool subvp_in_use = dcn32_subvp_in_use(dc, context); unsigned int min_dram_speed_mts_margin; + bool need_fclk_lat_as_dummy = false; + bool is_subvp_p_drr = true; dc_assert_fp_enabled(); - // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK - if (!pstate_en && dcn32_subvp_in_use(dc, context)) { - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; - pstate_en = true; + /* need to find dummy latency index for subvp */ + if (subvp_in_use) { + /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ + if (!pstate_en) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + pstate_en = true; + is_subvp_p_drr = true; + } + dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc, + context, pipes, pipe_cnt, vlevel); + + /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so prefetch is + * scheduled correctly to account for dummy pstate. + */ + if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) { + need_fclk_lat_as_dummy = true; + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + } + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + if (is_subvp_p_drr) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + } } context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; @@ -1806,9 +1886,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so * prefetch is scheduled correctly to account for dummy pstate. */ - if (dummy_latency_index == 0) + if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) { + need_fclk_lat_as_dummy = true; context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + } dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; @@ -1916,7 +1998,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; } - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { /* find largest table entry that is lower than dram speed, * but lower than DPM0 still uses DPM0 */ @@ -1996,7 +2078,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0) + /* for proper prefetch calculations, if dummy lat > fclk lat, use fclk lat = dummy lat */ + if (need_fclk_lat_as_dummy) context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; @@ -2009,10 +2092,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); - if (dummy_latency_index == 0) - context->bw_ctx.dml.soc.fclk_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; } + + /* revert fclk lat changes if required */ + if (need_fclk_lat_as_dummy) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; } static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, @@ -2159,9 +2244,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); } // Insert the max DCFCLK @@ -2169,9 +2252,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); // Insert the UCLK DPMS for (i = 0; i < num_uclk_dpms; i++) { @@ -2179,9 +2260,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); } // If FCLK is coarse grained, insert individual DPMs. @@ -2191,9 +2270,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; - DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); } } // If FCLK fine grained, only insert max @@ -2202,9 +2279,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = max_fclk_mhz; entry.dram_speed_mts = 0; - DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); } // At this point, the table contains all "points of interest" based on @@ -2539,3 +2614,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa } } +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 3a3dc2ce4c73..ab010e7e840b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9afd9ba23fb2..4b8f5fa0f0ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -670,6 +670,25 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; } + v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->UrgentLatency, + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + mode_lib->vba.DETBufferSizeY, + mode_lib->vba.DETBufferSizeC, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] && !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ? @@ -1665,6 +1684,8 @@ static void mode_support_configuration(struct vba_vars_st *v, && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true && mode_lib->vba.NonsupportedDSCInputBPC == false && !mode_lib->vba.ExceededMALLSize + && (mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false + || i == v->soc.num_states - 1) && ((mode_lib->vba.HostVMEnable == false && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) @@ -3158,6 +3179,25 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); + mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.UrgLatency[i], + mode_lib->vba.SwathHeightYThisState, + mode_lib->vba.SwathHeightCThisState, + mode_lib->vba.swath_width_luma_ub_this_state, + mode_lib->vba.swath_width_chroma_ub_this_state, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.DETBufferSizeYThisState, + mode_lib->vba.DETBufferSizeCThisState, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index debe46b24a3e..5af601cff1a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6228,3 +6228,72 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; } + +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) +{ + int k; + double SwathSizeAllSurfaces = 0; + double SwathSizeAllSurfacesInFetchTimeUs; + double DETSwathLatencyHidingUs; + double DETSwathLatencyHidingYUs; + double DETSwathLatencyHidingCUs; + double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; + double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; + bool NotEnoughDETSwathFillLatencyHiding = false; + + /* calculate sum of single swath size for all pipes in bytes*/ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + + if (SwathHeightC[k] != 0) + SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + else + SwathSizePerSurfaceC[k] = 0; + + SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; + } + + SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; + + /* ensure all DET - 1 swath can hide a fetch for all surfaces */ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + double LineTime = HTotal[k] / PixelClock[k]; + + /* only care if surface is not phantom */ + if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; + + if (SwathHeightC[k] != 0) { + DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; + + DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); + } else { + DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; + } + + /* DET must be able to hide time to fetch 1 swath for each surface */ + if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { + NotEnoughDETSwathFillLatencyHiding = true; + break; + } + } + } + + return NotEnoughDETSwathFillLatencyHiding; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 3989c2a28fae..779c6805f599 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -1141,4 +1141,22 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf double *FractionOfUrgentBandwidth, bool *ImmediateFlipBandwidthSupport); +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index f394b3f3922a..0bffae95f3a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -105,14 +105,39 @@ enum source_macro_tile_size { enum cursor_bpp { dm_cur_2bit = 0, dm_cur_32bit = 1, dm_cur_64bit = 2 }; + +/** + * @enum clock_change_support - It represents possible reasons to change the DRAM clock. + * + * DC may change the DRAM clock during its execution, and this enum tracks all + * the available methods. Note that every ASIC has their specific way to deal + * with these clock switch. + */ enum clock_change_support { + /** + * @dm_dram_clock_change_uninitialized: If you see this, we might have + * a code initialization issue + */ dm_dram_clock_change_uninitialized = 0, + + /** + * @dm_dram_clock_change_vactive: Support DRAM switch in VActive + */ dm_dram_clock_change_vactive, + + /** + * @dm_dram_clock_change_vblank: Support DRAM switch in VBlank + */ dm_dram_clock_change_vblank, + dm_dram_clock_change_vactive_w_mall_full_frame, dm_dram_clock_change_vactive_w_mall_sub_vp, dm_dram_clock_change_vblank_w_mall_full_frame, dm_dram_clock_change_vblank_w_mall_sub_vp, + + /** + * @dm_dram_clock_change_unsupported: Do not support DRAM switch + */ dm_dram_clock_change_unsupported }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index a0207a8f8756..81e53e67cd0b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -419,6 +419,15 @@ struct vba_vars_st { double MinPixelChunkSizeBytes; unsigned int DCCMetaBufferSizeBytes; // Pipe/Plane Parameters + + /** @VoltageLevel: + * Every ASIC has a fixed number of DPM states, and some devices might + * have some particular voltage configuration that does not map + * directly to the DPM states. This field tells how many states the + * target device supports; even though this field combines the DPM and + * special SOC voltages, it mostly matches the total number of DPM + * states. + */ int VoltageLevel; double FabricClock; double DRAMSpeed; @@ -1041,6 +1050,7 @@ struct vba_vars_st { double MinFullDETBufferingTime; double AverageReadBandwidthGBytePerSecond; bool FirstMainPlane; + bool NotEnoughDETSwathFillLatencyHiding; unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX]; unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; @@ -1224,6 +1234,7 @@ struct vba_vars_st { unsigned int BlockWidthC[DC__NUM_DPP__MAX]; unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX]; bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2]; + bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2]; struct dummy_vars dummy_vars; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h index e5fac9f4181d..dcff0dd2b6a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h @@ -25,7 +25,7 @@ */ -const qp_table qp_table_422_10bpc_min = { +static const qp_table qp_table_422_10bpc_min = { { 6, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} }, { 6.5, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} }, { 7, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 7, 9, 9, 9, 11, 15} }, @@ -58,7 +58,7 @@ const qp_table qp_table_422_10bpc_min = { }; -const qp_table qp_table_444_8bpc_max = { +static const qp_table qp_table_444_8bpc_max = { { 6, { 4, 6, 8, 8, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 15} }, { 6.5, { 4, 6, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15} }, { 7, { 4, 5, 7, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 13, 14} }, @@ -99,7 +99,7 @@ const qp_table qp_table_444_8bpc_max = { }; -const qp_table qp_table_420_12bpc_max = { +static const qp_table qp_table_420_12bpc_max = { { 4, {11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 21, 22} }, { 4.5, {10, 11, 12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} }, { 5, { 9, 11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 18, 19, 20, 21} }, @@ -132,7 +132,7 @@ const qp_table qp_table_420_12bpc_max = { }; -const qp_table qp_table_444_10bpc_min = { +static const qp_table qp_table_444_10bpc_min = { { 6, { 0, 4, 7, 7, 9, 9, 9, 9, 9, 10, 10, 10, 10, 12, 18} }, { 6.5, { 0, 4, 6, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 12, 18} }, { 7, { 0, 4, 6, 6, 8, 8, 8, 8, 8, 9, 9, 10, 10, 12, 17} }, @@ -185,7 +185,7 @@ const qp_table qp_table_444_10bpc_min = { }; -const qp_table qp_table_420_8bpc_max = { +static const qp_table qp_table_420_8bpc_max = { { 4, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 13, 14} }, { 4.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} }, { 5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 13} }, @@ -206,7 +206,7 @@ const qp_table qp_table_420_8bpc_max = { }; -const qp_table qp_table_444_8bpc_min = { +static const qp_table qp_table_444_8bpc_min = { { 6, { 0, 1, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 9, 14} }, { 6.5, { 0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 9, 14} }, { 7, { 0, 0, 2, 2, 4, 4, 4, 4, 4, 5, 5, 6, 6, 9, 13} }, @@ -247,7 +247,7 @@ const qp_table qp_table_444_8bpc_min = { }; -const qp_table qp_table_444_12bpc_min = { +static const qp_table qp_table_444_12bpc_min = { { 6, { 0, 5, 11, 11, 13, 13, 13, 13, 13, 14, 14, 14, 14, 17, 22} }, { 6.5, { 0, 5, 10, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 17, 22} }, { 7, { 0, 5, 10, 10, 12, 12, 12, 12, 12, 13, 13, 14, 14, 17, 21} }, @@ -312,7 +312,7 @@ const qp_table qp_table_444_12bpc_min = { }; -const qp_table qp_table_420_12bpc_min = { +static const qp_table qp_table_420_12bpc_min = { { 4, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 21} }, { 4.5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} }, { 5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} }, @@ -345,7 +345,7 @@ const qp_table qp_table_420_12bpc_min = { }; -const qp_table qp_table_422_12bpc_min = { +static const qp_table qp_table_422_12bpc_min = { { 6, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} }, { 6.5, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} }, { 7, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} }, @@ -386,7 +386,7 @@ const qp_table qp_table_422_12bpc_min = { }; -const qp_table qp_table_422_12bpc_max = { +static const qp_table qp_table_422_12bpc_max = { { 6, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} }, { 6.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} }, { 7, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} }, @@ -427,7 +427,7 @@ const qp_table qp_table_422_12bpc_max = { }; -const qp_table qp_table_444_12bpc_max = { +static const qp_table qp_table_444_12bpc_max = { { 6, {12, 14, 16, 16, 17, 17, 17, 18, 19, 20, 20, 20, 20, 21, 23} }, { 6.5, {12, 14, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 20, 21, 23} }, { 7, {12, 13, 15, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 21, 22} }, @@ -492,7 +492,7 @@ const qp_table qp_table_444_12bpc_max = { }; -const qp_table qp_table_420_8bpc_min = { +static const qp_table qp_table_420_8bpc_min = { { 4, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 9, 13} }, { 4.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} }, { 5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} }, @@ -513,7 +513,7 @@ const qp_table qp_table_420_8bpc_min = { }; -const qp_table qp_table_422_8bpc_min = { +static const qp_table qp_table_422_8bpc_min = { { 6, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} }, { 6.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} }, { 7, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} }, @@ -538,7 +538,7 @@ const qp_table qp_table_422_8bpc_min = { }; -const qp_table qp_table_422_10bpc_max = { +static const qp_table qp_table_422_10bpc_max = { { 6, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} }, { 6.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} }, { 7, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} }, @@ -571,7 +571,7 @@ const qp_table qp_table_422_10bpc_max = { }; -const qp_table qp_table_420_10bpc_max = { +static const qp_table qp_table_420_10bpc_max = { { 4, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 17, 18} }, { 4.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} }, { 5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 16, 17} }, @@ -598,7 +598,7 @@ const qp_table qp_table_420_10bpc_max = { }; -const qp_table qp_table_420_10bpc_min = { +static const qp_table qp_table_420_10bpc_min = { { 4, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 13, 17} }, { 4.5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} }, { 5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} }, @@ -625,7 +625,7 @@ const qp_table qp_table_420_10bpc_min = { }; -const qp_table qp_table_444_10bpc_max = { +static const qp_table qp_table_444_10bpc_max = { { 6, { 8, 10, 12, 12, 13, 13, 13, 14, 15, 16, 16, 16, 16, 17, 19} }, { 6.5, { 8, 10, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 16, 17, 19} }, { 7, { 8, 9, 11, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 17, 18} }, @@ -678,7 +678,7 @@ const qp_table qp_table_444_10bpc_max = { }; -const qp_table qp_table_422_8bpc_max = { +static const qp_table qp_table_422_8bpc_max = { { 6, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} }, { 6.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} }, { 7, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} }, diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c index 0ea52ba5ac82..9fd8b269dd79 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c @@ -256,8 +256,8 @@ static const struct hw_factory_funcs funcs = { */ void dal_hw_factory_dcn32_init(struct hw_factory *factory) { - factory->number_of_pins[GPIO_ID_DDC_DATA] = 6; - factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 6; + factory->number_of_pins[GPIO_ID_DDC_DATA] = 8; + factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 8; factory->number_of_pins[GPIO_ID_GENERIC] = 4; factory->number_of_pins[GPIO_ID_HPD] = 5; factory->number_of_pins[GPIO_ID_GPIO_PAD] = 28; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 9498105c98ab..525f8f0b8732 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -115,6 +115,13 @@ struct resource_funcs { int vlevel); void (*update_soc_for_wm_a)( struct dc *dc, struct dc_state *context); + + /** + * @populate_dml_pipes - Populate pipe data struct + * + * Returns: + * Total of pipes available in the specific ASIC. + */ int (*populate_dml_pipes)( struct dc *dc, struct dc_state *context, @@ -233,8 +240,11 @@ struct resource_funcs { unsigned int pipe_cnt, unsigned int index); - bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context); + bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update); + void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context); void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); + void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); + void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); }; struct audio_support{ @@ -413,7 +423,10 @@ struct pipe_ctx { struct pll_settings pll_settings; - /* link config records software decision for what link config should be + /** + * @link_config: + * + * link config records software decision for what link config should be * enabled given current link capability and stream during hw resource * mapping. This is to decouple the dependency on link capability during * dc commit or update. @@ -507,33 +520,62 @@ struct bw_context { union bw_output bw; struct display_mode_lib dml; }; + /** - * struct dc_state - The full description of a state requested by a user - * - * @streams: Stream properties - * @stream_status: The planes on a given stream - * @res_ctx: Persistent state of resources - * @bw_ctx: The output from bandwidth and watermark calculations and the DML - * @pp_display_cfg: PowerPlay clocks and settings - * @dcn_bw_vars: non-stack memory to support bandwidth calculations - * + * struct dc_state - The full description of a state requested by users */ struct dc_state { + /** + * @streams: Stream state properties + */ struct dc_stream_state *streams[MAX_PIPES]; + + /** + * @stream_status: Planes status on a given stream + */ struct dc_stream_status stream_status[MAX_PIPES]; + + /** + * @stream_count: Total of streams in use + */ uint8_t stream_count; uint8_t stream_mask; + /** + * @res_ctx: Persistent state of resources + */ struct resource_context res_ctx; + /** + * @bw_ctx: The output from bandwidth and watermark calculations and the DML + * + * Each context must have its own instance of VBA, and in order to + * initialize and obtain IP and SOC, the base DML instance from DC is + * initially copied into every context. + */ struct bw_context bw_ctx; - /* Note: these are big structures, do *not* put on stack! */ + /** + * @pp_display_cfg: PowerPlay clocks and settings + * Note: this is a big struct, do *not* put on stack! + */ struct dm_pp_display_configuration pp_display_cfg; + + /** + * @dcn_bw_vars: non-stack memory to support bandwidth calculations + * Note: this is a big struct, do *not* put on stack! + */ struct dcn_bw_internal_vars dcn_bw_vars; struct clk_mgr *clk_mgr; + /** + * @refcount: refcount reference + * + * Notice that dc_state is used around the code to capture the current + * context, so we need to pass it everywhere. That's why we want to use + * kref in this struct. + */ struct kref refcount; struct { diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index b304d450b038..e8d8c5cb1309 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -193,7 +193,7 @@ enum dc_status dpcd_configure_lttpr_mode( struct link_training_settings *lt_settings); enum dp_link_encoding dp_get_link_encoding_format(const struct dc_link_settings *link_settings); -bool dp_retrieve_lttpr_cap(struct dc_link *link); +enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link); bool dp_is_lttpr_present(struct dc_link *link); enum lttpr_mode dp_decide_lttpr_mode(struct dc_link *link, struct dc_link_settings *link_setting); void dp_get_lttpr_mode_override(struct dc_link *link, enum lttpr_mode *override); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index e7571c6f5ead..f2e1fcb668fb 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -167,10 +167,26 @@ struct hubbub_funcs { void (*force_pstate_change_control)(struct hubbub *hubbub, bool force, bool allow); void (*init_watermarks)(struct hubbub *hubbub); + + /** + * @program_det_size: + * + * DE-Tile buffers (DET) is a memory that is used to convert the tiled + * data into linear, which the rest of the display can use to generate + * the graphics output. One of the main features of this component is + * that each pipe has a configurable DET buffer which means that when a + * pipe is not enabled, the device can assign the memory to other + * enabled pipes to try to be more efficient. + * + * DET logic is handled by dchubbub. Some ASICs provide a feature named + * Configurable Return Buffer (CRB) segments which can be allocated to + * compressed or detiled buffers. + */ void (*program_det_size)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_in_kbyte); void (*program_compbuf_size)(struct hubbub *hubbub, unsigned compbuf_size_kb, bool safe_to_increase); void (*init_crb)(struct hubbub *hubbub); void (*force_usr_retraining_allow)(struct hubbub *hubbub, bool allow); + void (*set_request_limit)(struct hubbub *hubbub, int memory_channel_count, int words_per_channel); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index 8df2765cce78..de3113ecbc77 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -56,20 +56,6 @@ struct dmcu { bool auto_load_dmcu; }; -#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) -struct crc_region { - uint16_t x_start; - uint16_t y_start; - uint16_t x_end; - uint16_t y_end; -}; - -struct otg_phy_mux { - uint8_t phy_output_num; - uint8_t otg_output_num; -}; -#endif - struct dmcu_funcs { bool (*dmcu_init)(struct dmcu *dmcu); bool (*load_iram)(struct dmcu *dmcu, @@ -100,7 +86,7 @@ struct dmcu_funcs { bool (*recv_edid_cea_ack)(struct dmcu *dmcu, int *offset); #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) void (*forward_crc_window)(struct dmcu *dmcu, - struct crc_region *crc_win, + struct rect *rect, struct otg_phy_mux *mux_mapping); void (*stop_crc_win_update)(struct dmcu *dmcu, struct otg_phy_mux *mux_mapping); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index dcb80c4747b0..131fcfa28bca 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -83,10 +83,15 @@ static const struct dpp_input_csc_matrix __maybe_unused dpp_input_csc_matrix[] = {COLOR_SPACE_YCBCR709, {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, 0x2000, 0x3b61, 0xe24f} }, - {COLOR_SPACE_YCBCR709_LIMITED, {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, - 0x2568, 0x43ee, 0xdbb2} } + 0x2568, 0x43ee, 0xdbb2} }, + {COLOR_SPACE_2020_YCBCR, + {0x2F30, 0x2000, 0, 0xE869, 0xEDB7, 0x2000, 0xFABC, 0xBC6, 0, + 0x2000, 0x3C34, 0xE1E6} }, + {COLOR_SPACE_2020_RGB_LIMITEDRANGE, + {0x35E0, 0x255F, 0, 0xE2B3, 0xEB20, 0x255F, 0xF9FD, 0xB1E, 0, + 0x255F, 0x44BD, 0xDB43} } }; struct dpp_grph_csc_adjustment { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index cd2be729846b..a819f0f97c5f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -35,6 +35,13 @@ ******************************************************************************/ #define MAX_AUDIOS 7 + +/** + * @MAX_PIPES: + * + * Every ASIC support a fixed number of pipes; MAX_PIPES defines a large number + * to be used inside loops and for determining array sizes. + */ #define MAX_PIPES 6 #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 42afa1952890..42db4b7b79fd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -243,6 +243,9 @@ struct stream_encoder_funcs { uint32_t hubp_requestor_id, enum dynamic_metadata_mode dmdata_mode); + /** + * @dp_set_odm_combine: Sets up DP stream encoder for ODM. + */ void (*dp_set_odm_combine)( struct stream_encoder *enc, bool odm_combine); @@ -317,9 +320,6 @@ struct hpo_dp_stream_encoder_funcs { uint32_t stream_enc_inst, uint32_t link_enc_inst); - void (*audio_mute_control)( - struct hpo_dp_stream_encoder *enc, bool mute); - void (*dp_audio_setup)( struct hpo_dp_stream_encoder *enc, unsigned int az_inst, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 25a1df45b264..0e42e721dd15 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -185,6 +185,7 @@ struct timing_generator_funcs { #ifdef CONFIG_DRM_AMD_DC_DCN void (*phantom_crtc_post_enable)(struct timing_generator *tg); #endif + void (*disable_phantom_crtc)(struct timing_generator *tg); bool (*immediate_disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); void (*get_position)(struct timing_generator *tg, @@ -301,6 +302,11 @@ struct timing_generator_funcs { void (*get_dsc_status)(struct timing_generator *optc, uint32_t *dsc_mode); void (*set_odm_bypass)(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); + + /** + * @set_odm_combine: Set up the ODM block to read from the correct + * OPP(s) and turn on/off ODM memory. + */ void (*set_odm_combine)(struct timing_generator *optc, int *opp_id, int opp_cnt, struct dc_crtc_timing *timing); void (*set_h_timing_div_manual_mode)(struct timing_generator *optc, bool manual_mode); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index d04b68dad413..c43523f9ff6d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -263,6 +263,7 @@ struct hw_sequencer_funcs { void (*update_phantom_vp_position)(struct dc *dc, struct dc_state *context, struct pipe_ctx *phantom_pipe); + void (*apply_update_flags_for_phantom)(struct pipe_ctx *phantom_pipe); void (*commit_subvp_config)(struct dc *dc, struct dc_state *context); void (*subvp_pipe_control_lock)(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h index 89964c980b87..0f69946cce9f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h @@ -38,6 +38,7 @@ struct link_resource; struct pipe_ctx; struct encoder_set_dp_phy_pattern_param; struct link_mst_stream_allocation_table; +struct audio_output; struct link_hwss_ext { /* function pointers below may require to check for NULL if caller @@ -79,6 +80,10 @@ struct link_hwss { void (*disable_link_output)(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal); + void (*setup_audio_output)(struct pipe_ctx *pipe_ctx, + struct audio_output *audio_output, uint32_t audio_inst); + void (*enable_audio_packet)(struct pipe_ctx *pipe_ctx); + void (*disable_audio_packet)(struct pipe_ctx *pipe_ctx); }; #endif /* __DC_LINK_HWSS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c index 45f99351a0ab..5f4f6dd79511 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c @@ -28,20 +28,19 @@ #include "include/logger_interface.h" #include "../dce110/irq_service_dce110.h" +#include "irq_service_dcn201.h" #include "dcn/dcn_2_0_3_offset.h" #include "dcn/dcn_2_0_3_sh_mask.h" #include "cyan_skillfish_ip_offset.h" #include "soc15_hw_ip.h" - -#include "irq_service_dcn201.h" - #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -static enum dc_irq_source to_dal_irq_source_dcn201(struct irq_service *irq_service, - uint32_t src_id, - uint32_t ext_id) +enum dc_irq_source to_dal_irq_source_dcn201( + struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) { switch (src_id) { case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: @@ -79,7 +78,6 @@ static enum dc_irq_source to_dal_irq_source_dcn201(struct irq_service *irq_servi default: return DC_IRQ_SOURCE_INVALID; } - return DC_IRQ_SOURCE_INVALID; } static bool hpd_ack( @@ -138,6 +136,11 @@ static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs dmub_outbox_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h index 8e27c5e219a3..0cfd2f2d62e8 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c index 7bad39bba86b..d100edaedbbb 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c @@ -112,8 +112,15 @@ bool dal_irq_service_set( dal_irq_service_ack(irq_service, source); - if (info->funcs && info->funcs->set) + if (info->funcs && info->funcs->set) { + if (info->funcs->set == dal_irq_service_dummy_set) { + DC_LOG_WARNING("%s: src: %d, st: %d\n", __func__, + source, enable); + ASSERT(0); + } + return info->funcs->set(irq_service, info, enable); + } dal_irq_service_set_generic(irq_service, info, enable); @@ -146,8 +153,14 @@ bool dal_irq_service_ack( return false; } - if (info->funcs && info->funcs->ack) + if (info->funcs && info->funcs->ack) { + if (info->funcs->ack == dal_irq_service_dummy_ack) { + DC_LOG_WARNING("%s: src: %d\n", __func__, source); + ASSERT(0); + } + return info->funcs->ack(irq_service, info); + } dal_irq_service_ack_generic(irq_service, info); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c new file mode 100644 index 000000000000..801a95b34e8c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c @@ -0,0 +1,28 @@ + +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +/*********************************************************************/ +// USB4 DPIA BANDWIDTH ALLOCATION LOGIC +/*********************************************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h new file mode 100644 index 000000000000..669e995f825f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h @@ -0,0 +1,69 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DC_INC_LINK_DP_DPIA_BW_H_ +#define DC_INC_LINK_DP_DPIA_BW_H_ + +// XXX: TODO: Re-add for Phase 2 +/* Number of Host Routers per motherboard is 2 and 2 DPIA per host router */ +#define MAX_HR_NUM 2 + +struct dc_host_router_bw_alloc { + int max_bw[MAX_HR_NUM]; // The Max BW that each Host Router has available to be shared btw DPIAs + int total_estimated_bw[MAX_HR_NUM]; // The Total Verified and available BW that Host Router has +}; + +/* + * Enable BW Allocation Mode Support from the DP-Tx side + * + * @link: pointer to the dc_link struct instance + * + * return: SUCCESS or FAILURE + */ +bool set_dptx_usb4_bw_alloc_support(struct dc_link *link); + +/* + * Send a request from DP-Tx requesting to allocate BW remotely after + * allocating it locally. This will get processed by CM and a CB function + * will be called. + * + * @link: pointer to the dc_link struct instance + * @req_bw: The requested bw in Kbyte to allocated + * + * return: none + */ +void set_usb4_req_bw_req(struct dc_link *link, int req_bw); + +/* + * CB function for when the status of the Req above is complete. We will + * find out the result of allocating on CM and update structs accordingly + * + * @link: pointer to the dc_link struct instance + * + * return: none + */ +void get_usb4_req_bw_resp(struct dc_link *link); + +#endif /* DC_INC_LINK_DP_DPIA_BW_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c index 4227adbc646a..33148b753c03 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c @@ -170,11 +170,63 @@ static void update_dio_stream_allocation_table(struct dc_link *link, link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); } +void setup_dio_audio_output(struct pipe_ctx *pipe_ctx, + struct audio_output *audio_output, uint32_t audio_inst) +{ + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup( + pipe_ctx->stream_res.stream_enc, + audio_inst, + &pipe_ctx->stream->audio_info); + else + pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup( + pipe_ctx->stream_res.stream_enc, + audio_inst, + &pipe_ctx->stream->audio_info, + &audio_output->crtc_info); +} + +void enable_dio_audio_packet(struct pipe_ctx *pipe_ctx) +{ + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable( + pipe_ctx->stream_res.stream_enc); + + pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( + pipe_ctx->stream_res.stream_enc, false); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + dp_source_sequence_trace(pipe_ctx->stream->link, + DPCD_SOURCE_SEQ_AFTER_ENABLE_AUDIO_STREAM); +} + +void disable_dio_audio_packet(struct pipe_ctx *pipe_ctx) +{ + pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( + pipe_ctx->stream_res.stream_enc, true); + + if (pipe_ctx->stream_res.audio) { + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable( + pipe_ctx->stream_res.stream_enc); + else + pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_disable( + pipe_ctx->stream_res.stream_enc); + } + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + dp_source_sequence_trace(pipe_ctx->stream->link, + DPCD_SOURCE_SEQ_AFTER_DISABLE_AUDIO_STREAM); +} + static const struct link_hwss dio_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, .setup_stream_attribute = setup_dio_stream_attribute, .disable_link_output = disable_dio_link_output, + .setup_audio_output = setup_dio_audio_output, + .enable_audio_packet = enable_dio_audio_packet, + .disable_audio_packet = disable_dio_audio_packet, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h index 126d37f847a1..9a108c3d7831 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h @@ -50,5 +50,9 @@ void set_dio_dp_lane_settings(struct dc_link *link, const struct link_resource *link_res, const struct dc_link_settings *link_settings, const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]); +void setup_dio_audio_output(struct pipe_ctx *pipe_ctx, + struct audio_output *audio_output, uint32_t audio_inst); +void enable_dio_audio_packet(struct pipe_ctx *pipe_ctx); +void disable_dio_audio_packet(struct pipe_ctx *pipe_ctx); #endif /* __LINK_HWSS_DIO_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c index 64f7ea6a9aa3..861f3cd5b356 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c @@ -57,6 +57,9 @@ static const struct link_hwss dpia_link_hwss = { .reset_stream_encoder = reset_dio_stream_encoder, .setup_stream_attribute = setup_dio_stream_attribute, .disable_link_output = disable_dio_link_output, + .setup_audio_output = setup_dio_audio_output, + .enable_audio_packet = enable_dio_audio_packet, + .disable_audio_packet = disable_dio_audio_packet, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index 153a88381f2c..2f46e1ac4ce0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -262,11 +262,36 @@ static void update_hpo_dp_stream_allocation_table(struct dc_link *link, table); } +static void setup_hpo_dp_audio_output(struct pipe_ctx *pipe_ctx, + struct audio_output *audio_output, uint32_t audio_inst) +{ + pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_setup( + pipe_ctx->stream_res.hpo_dp_stream_enc, + audio_inst, + &pipe_ctx->stream->audio_info); +} + +static void enable_hpo_dp_audio_packet(struct pipe_ctx *pipe_ctx) +{ + pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_enable( + pipe_ctx->stream_res.hpo_dp_stream_enc); +} + +static void disable_hpo_dp_audio_packet(struct pipe_ctx *pipe_ctx) +{ + if (pipe_ctx->stream_res.audio) + pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_disable( + pipe_ctx->stream_res.hpo_dp_stream_enc); +} + static const struct link_hwss hpo_dp_link_hwss = { .setup_stream_encoder = setup_hpo_dp_stream_encoder, .reset_stream_encoder = reset_hpo_dp_stream_encoder, .setup_stream_attribute = setup_hpo_dp_stream_attribute, .disable_link_output = disable_hpo_dp_link_output, + .setup_audio_output = setup_hpo_dp_audio_output, + .enable_audio_packet = enable_hpo_dp_audio_packet, + .disable_audio_packet = disable_hpo_dp_audio_packet, .ext = { .set_throttled_vcp_size = set_hpo_dp_throttled_vcp_size, .set_hblank_min_symbol_width = set_hpo_dp_hblank_min_symbol_width, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7a8f61517424..33907feefebb 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -225,6 +225,12 @@ union dmub_psr_debug_flags { * Use TPS3 signal when restore main link. */ uint32_t force_wakeup_by_tps3 : 1; + + /** + * Back to back flip, therefore cannot power down PHY + */ + uint32_t back_to_back_flip : 1; + } bitfields; /** @@ -401,8 +407,9 @@ union dmub_fw_boot_options { uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ uint32_t usb4_cm_version: 1; /**< 1 CM support */ uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ + uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */ - uint32_t reserved : 16; /**< reserved */ + uint32_t reserved : 15; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; @@ -731,6 +738,11 @@ enum dmub_cmd_type { */ /** + * Command type used for all SECURE_DISPLAY commands. + */ + DMUB_CMD__SECURE_DISPLAY = 85, + + /** * Command type used to set DPIA HPD interrupt state */ DMUB_CMD__DPIA_HPD_INT_ENABLE = 86, @@ -1017,13 +1029,14 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 { uint16_t vtotal; uint16_t htotal; uint8_t vblank_pipe_index; - uint8_t padding[2]; + uint8_t padding[1]; struct { uint8_t drr_in_use; uint8_t drr_window_size_ms; // Indicates largest VMIN/VMAX adjustment per frame uint16_t min_vtotal_supported; // Min VTOTAL that supports switching in VBLANK uint16_t max_vtotal_supported; // Max VTOTAL that can support SubVP static scheduling uint8_t use_ramping; // Use ramping or not + uint8_t drr_vblank_start_margin; } drr_info; // DRR considered as part of SubVP + VBLANK case } vblank_data; } pipe_config; @@ -1866,9 +1879,13 @@ struct dmub_cmd_psr_copy_settings_data { */ uint8_t use_phy_fsm; /** + * frame delay for frame re-lock + */ + uint8_t relock_delay_frame_cnt; + /** * Explicit padding to 2 byte boundary. */ - uint8_t pad3[2]; + uint8_t pad3; }; /** @@ -3144,6 +3161,33 @@ struct dmub_rb_cmd_get_usbc_cable_id { }; /** + * Command type of a DMUB_CMD__SECURE_DISPLAY command + */ +enum dmub_cmd_secure_display_type { + DMUB_CMD__SECURE_DISPLAY_TEST_CMD = 0, /* test command to only check if inbox message works */ + DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE, + DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY +}; + +/** + * Definition of a DMUB_CMD__SECURE_DISPLAY command + */ +struct dmub_rb_cmd_secure_display { + struct dmub_cmd_header header; + /** + * Data passed from driver to dmub firmware. + */ + struct dmub_cmd_roi_info { + uint16_t x_start; + uint16_t x_end; + uint16_t y_start; + uint16_t y_end; + uint8_t otg_id; + uint8_t phy_id; + } roi_info; +}; + +/** * union dmub_rb_cmd - DMUB inbox command. */ union dmub_rb_cmd { @@ -3348,6 +3392,11 @@ union dmub_rb_cmd { */ struct dmub_rb_cmd_query_hpd_state query_hpd; /** + * Definition of a DMUB_CMD__SECURE_DISPLAY command. + */ + struct dmub_rb_cmd_secure_display secure_display; + + /** * Definition of a DMUB_CMD__DPIA_HPD_INT_ENABLE command. */ struct dmub_rb_cmd_dpia_hpd_int_enable dpia_hpd_int_enable; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 447a0ec9cbe2..f6034213c700 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -61,7 +61,7 @@ static const int32_t numerator01[] = { 31308, 180000, 0, 0, 0}; static const int32_t numerator02[] = { 12920, 4500, 0, 0, 0}; static const int32_t numerator03[] = { 55, 99, 0, 0, 0}; static const int32_t numerator04[] = { 55, 99, 0, 0, 0}; -static const int32_t numerator05[] = { 2400, 2200, 2200, 2400, 2600}; +static const int32_t numerator05[] = { 2400, 2222, 2200, 2400, 2600}; /* one-time setup of X points */ void setup_x_points_distribution(void) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 0f39ab9dc5b4..c2e00f7b8381 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -688,10 +688,10 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, if (app_tf != TRANSFER_FUNC_UNKNOWN) { infopacket->valid = true; - infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] - - if (app_tf == TRANSFER_FUNC_GAMMA_22) { - infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] + if (app_tf != TRANSFER_FUNC_PQ2084) { + infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] + if (app_tf == TRANSFER_FUNC_GAMMA_22) + infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] } } } diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 1d8b746b02f2..edf5845f6a1f 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -35,7 +35,8 @@ struct mod_vrr_params; void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs); + enum dc_color_space cs, + enum color_transfer_func tf); void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 27ceba9d6d65..69691058ab89 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -132,7 +132,8 @@ enum ColorimetryYCCDP { void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs) + enum dc_color_space cs, + enum color_transfer_func tf) { unsigned int vsc_packet_revision = vsc_packet_undefined; unsigned int i; @@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; else if (cs == COLOR_SPACE_2020_YCBCR) colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; + + if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) + colorimetryFormat = ColorimetryYCC_DP_ITU709; break; default: diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 235259d6c5a1..9b5d9b2c9a6a 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -102,9 +102,18 @@ static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xcccc, 0xcccc}, }; +static const struct abm_parameters abm_settings_config2[abm_defines_max_level] = { +// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blRed blStart + {0xf0, 0xbf, 0x20, 0x00, 0x88, 0x99, 0xb3, 0x40, 0xe0, 0x0000, 0xcccc}, + {0xd8, 0x85, 0x20, 0x00, 0x70, 0x90, 0xa8, 0x40, 0xc8, 0x0700, 0xb333}, + {0xb8, 0x58, 0x20, 0x00, 0x64, 0x88, 0x78, 0x70, 0xa0, 0x7000, 0x9999}, + {0x82, 0x40, 0x20, 0x00, 0x00, 0xb8, 0xb3, 0x70, 0x70, 0xc333, 0xb333}, +}; + static const struct abm_parameters * const abm_settings[] = { abm_settings_config0, abm_settings_config1, + abm_settings_config2, }; #define NUM_AMBI_LEVEL 5 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h index 483769fb1736..537aee0536d3 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT #ifndef _dcn_3_0_0_OFFSET_HEADER #define _dcn_3_0_0_OFFSET_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h index b79be3a25a80..f9d90b098519 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT #ifndef _dcn_3_0_0_SH_MASK_HEADER #define _dcn_3_0_0_SH_MASK_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h index 363d2139cea2..db7e22720d00 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h @@ -993,7 +993,8 @@ #define mmUVD_RAS_VCPU_VCODEC_STATUS_BASE_IDX 1 #define mmUVD_RAS_MMSCH_FATAL_ERROR 0x0058 #define mmUVD_RAS_MMSCH_FATAL_ERROR_BASE_IDX 1 - +#define mmVCN_RAS_CNTL 0x04b9 +#define mmVCN_RAS_CNTL_BASE_IDX 1 /* JPEG 2_6_0 regs */ #define mmUVD_RAS_JPEG0_STATUS 0x0059 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h index 8de883b76d90..874a8b7e1feb 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h @@ -3618,6 +3618,33 @@ #define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF_MASK 0x7FFFFFFFL #define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF_MASK 0x80000000L +//VCN 2_6_0 VCN_RAS_CNTL +#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN__SHIFT 0x0 +#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN__SHIFT 0x1 +#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN__SHIFT 0x4 +#define VCN_RAS_CNTL__MMSCH_PMI_EN__SHIFT 0x5 +#define VCN_RAS_CNTL__VCPU_VCODEC_REARM__SHIFT 0x8 +#define VCN_RAS_CNTL__MMSCH_REARM__SHIFT 0x9 +#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN__SHIFT 0xc +#define VCN_RAS_CNTL__VCPU_VCODEC_READY__SHIFT 0x10 +#define VCN_RAS_CNTL__MMSCH_READY__SHIFT 0x11 +#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK 0x00000001L +#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN_MASK 0x00000002L +#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK 0x00000010L +#define VCN_RAS_CNTL__MMSCH_PMI_EN_MASK 0x00000020L +#define VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK 0x00000100L +#define VCN_RAS_CNTL__MMSCH_REARM_MASK 0x00000200L +#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK 0x00001000L +#define VCN_RAS_CNTL__VCPU_VCODEC_READY_MASK 0x00010000L +#define VCN_RAS_CNTL__MMSCH_READY_MASK 0x00020000L + +//VCN 2_6_0 UVD_VCPU_INT_EN +#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN__SHIFT 0x16 +#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK 0x00400000L + +//VCN 2_6_0 UVD_SYS_INT_EN +#define UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK 0x04000000L + /* JPEG 2_6_0 UVD_RAS_JPEG0_STATUS */ #define UVD_RAS_JPEG0_STATUS__POISONED_VF__SHIFT 0x0 #define UVD_RAS_JPEG0_STATUS__POISONED_PF__SHIFT 0x1f diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 15943bc21bc5..b78360a71bc9 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -4107,7 +4107,7 @@ typedef struct _ATOM_FAKE_EDID_PATCH_RECORD { UCHAR ucRecordType; UCHAR ucFakeEDIDLength; // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 - UCHAR ucFakeEDIDString[1]; // This actually has ucFakeEdidLength elements. + UCHAR ucFakeEDIDString[]; // This actually has ucFakeEdidLength elements. } ATOM_FAKE_EDID_PATCH_RECORD; typedef struct _ATOM_PANEL_RESOLUTION_PATCH_RECORD @@ -4386,7 +4386,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT typedef struct _ATOM_GPIO_PIN_LUT { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[1]; + ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[]; }ATOM_GPIO_PIN_LUT; /****************************************************************************/ @@ -4513,7 +4513,7 @@ typedef struct _ATOM_DISPLAY_OBJECT_PATH USHORT usSize; //the size of ATOM_DISPLAY_OBJECT_PATH USHORT usConnObjectId; //Connector Object ID USHORT usGPUObjectId; //GPU ID - USHORT usGraphicObjIds[1]; //1st Encoder Obj source from GPU to last Graphic Obj destinate to connector. + USHORT usGraphicObjIds[]; //1st Encoder Obj source from GPU to last Graphic Obj destinate to connector. }ATOM_DISPLAY_OBJECT_PATH; typedef struct _ATOM_DISPLAY_EXTERNAL_OBJECT_PATH @@ -4530,7 +4530,7 @@ typedef struct _ATOM_DISPLAY_OBJECT_PATH_TABLE UCHAR ucNumOfDispPath; UCHAR ucVersion; UCHAR ucPadding[2]; - ATOM_DISPLAY_OBJECT_PATH asDispPath[1]; + ATOM_DISPLAY_OBJECT_PATH asDispPath[]; }ATOM_DISPLAY_OBJECT_PATH_TABLE; typedef struct _ATOM_OBJECT //each object has this structure @@ -4545,7 +4545,7 @@ typedef struct _ATOM_OBJECT_TABLE //Above 4 object table { UCHAR ucNumberOfObjects; UCHAR ucPadding[3]; - ATOM_OBJECT asObjects[1]; + ATOM_OBJECT asObjects[]; }ATOM_OBJECT_TABLE; typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT //usSrcDstTableOffset pointing to this structure @@ -4733,7 +4733,7 @@ typedef struct _ATOM_CONNECTOR_DEVICE_TAG_RECORD ATOM_COMMON_RECORD_HEADER sheader; UCHAR ucNumberOfDevice; UCHAR ucReserved; - ATOM_CONNECTOR_DEVICE_TAG asDeviceTag[1]; //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation + ATOM_CONNECTOR_DEVICE_TAG asDeviceTag[]; //This Id is same as "ATOM_DEVICE_XXX_SUPPORT" }ATOM_CONNECTOR_DEVICE_TAG_RECORD; @@ -4793,7 +4793,7 @@ typedef struct _ATOM_OBJECT_GPIO_CNTL_RECORD ATOM_COMMON_RECORD_HEADER sheader; UCHAR ucFlags; // Future expnadibility UCHAR ucNumberOfPins; // Number of GPIO pins used to control the object - ATOM_GPIO_PIN_CONTROL_PAIR asGpio[1]; // the real gpio pin pair determined by number of pins ucNumberOfPins + ATOM_GPIO_PIN_CONTROL_PAIR asGpio[]; // the real gpio pin pair determined by number of pins ucNumberOfPins }ATOM_OBJECT_GPIO_CNTL_RECORD; //Definitions for GPIO pin state @@ -4982,7 +4982,7 @@ typedef struct _ATOM_BRACKET_LAYOUT_RECORD UCHAR ucWidth; UCHAR ucConnNum; UCHAR ucReserved; - ATOM_CONNECTOR_LAYOUT_INFO asConnInfo[1]; + ATOM_CONNECTOR_LAYOUT_INFO asConnInfo[]; }ATOM_BRACKET_LAYOUT_RECORD; @@ -5146,7 +5146,7 @@ typedef struct _ATOM_I2C_VOLTAGE_OBJECT_V3 UCHAR ucVoltageControlOffset; UCHAR ucVoltageControlFlag; // Bit0: 0 - One byte data; 1 - Two byte data UCHAR ulReserved[3]; - VOLTAGE_LUT_ENTRY asVolI2cLut[1]; // end with 0xff + VOLTAGE_LUT_ENTRY asVolI2cLut[]; // end with 0xff }ATOM_I2C_VOLTAGE_OBJECT_V3; // ATOM_I2C_VOLTAGE_OBJECT_V3.ucVoltageControlFlag @@ -5161,7 +5161,7 @@ typedef struct _ATOM_GPIO_VOLTAGE_OBJECT_V3 UCHAR ucPhaseDelay; // phase delay in unit of micro second UCHAR ucReserved; ULONG ulGpioMaskVal; // GPIO Mask value - VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[1]; + VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[]; }ATOM_GPIO_VOLTAGE_OBJECT_V3; typedef struct _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 @@ -5171,7 +5171,7 @@ typedef struct _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 UCHAR ucLeakageEntryNum; // indicate the entry number of LeakageId/Voltage Lut table UCHAR ucReserved[2]; ULONG ulMaxVoltageLevel; - LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1]; + LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[]; }ATOM_LEAKAGE_VOLTAGE_OBJECT_V3; @@ -6599,7 +6599,7 @@ typedef struct _ATOM_FUSION_SYSTEM_INFO_V3 typedef struct _ATOM_I2C_DATA_RECORD { UCHAR ucNunberOfBytes; //Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop" - UCHAR ucI2CData[1]; //I2C data in bytes, should be less than 16 bytes usually + UCHAR ucI2CData[]; //I2C data in bytes, should be less than 16 bytes usually }ATOM_I2C_DATA_RECORD; @@ -6610,14 +6610,14 @@ typedef struct _ATOM_I2C_DEVICE_SETUP_INFO UCHAR ucSSChipID; //SS chip being used UCHAR ucSSChipSlaveAddr; //Slave Address to set up this SS chip UCHAR ucNumOfI2CDataRecords; //number of data block - ATOM_I2C_DATA_RECORD asI2CData[1]; + ATOM_I2C_DATA_RECORD asI2CData[]; }ATOM_I2C_DEVICE_SETUP_INFO; //========================================================================================== typedef struct _ATOM_ASIC_MVDD_INFO { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_I2C_DEVICE_SETUP_INFO asI2CSetup[1]; + ATOM_I2C_DEVICE_SETUP_INFO asI2CSetup[]; }ATOM_ASIC_MVDD_INFO; //========================================================================================== @@ -6679,7 +6679,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V2 { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_ASIC_SS_ASSIGNMENT_V2 asSpreadSpectrum[1]; //this is point only. + ATOM_ASIC_SS_ASSIGNMENT_V2 asSpreadSpectrum[]; //this is point only. }ATOM_ASIC_INTERNAL_SS_INFO_V2; typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3 @@ -6701,7 +6701,7 @@ typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3 typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_ASIC_SS_ASSIGNMENT_V3 asSpreadSpectrum[1]; //this is pointer only. + ATOM_ASIC_SS_ASSIGNMENT_V3 asSpreadSpectrum[]; //this is pointer only. }ATOM_ASIC_INTERNAL_SS_INFO_V3; @@ -9292,7 +9292,7 @@ typedef struct { typedef struct { VFCT_IMAGE_HEADER VbiosHeader; - UCHAR VbiosContent[1]; + UCHAR VbiosContent[]; }GOP_VBIOS_CONTENT; typedef struct { diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index ff855cb21d3f..bbe1337a8cee 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -705,20 +705,65 @@ struct atom_gpio_pin_lut_v2_1 }; -/* - *************************************************************************** - Data Table vram_usagebyfirmware structure - *************************************************************************** -*/ +/* + * VBIOS/PRE-OS always reserve a FB region at the top of frame buffer. driver should not write + * access that region. driver can allocate their own reservation region as long as it does not + * overlap firwmare's reservation region. + * if (pre-NV1X) atom data table firmwareInfoTable version < 3.3: + * in this case, atom data table vram_usagebyfirmwareTable version always <= 2.1 + * if VBIOS/UEFI GOP is posted: + * VBIOS/UEFIGOP update used_by_firmware_in_kb = total reserved size by VBIOS + * update start_address_in_kb = total_mem_size_in_kb - used_by_firmware_in_kb; + * ( total_mem_size_in_kb = reg(CONFIG_MEMSIZE)<<10) + * driver can allocate driver reservation region under firmware reservation, + * used_by_driver_in_kb = driver reservation size + * driver reservation start address = (start_address_in_kb - used_by_driver_in_kb) + * Comment1[hchan]: There is only one reservation at the beginning of the FB reserved by + * host driver. Host driver would overwrite the table with the following + * used_by_firmware_in_kb = total reserved size for pf-vf info exchange and + * set SRIOV_MSG_SHARE_RESERVATION mask start_address_in_kb = 0 + * else there is no VBIOS reservation region: + * driver must allocate driver reservation region at top of FB. + * driver set used_by_driver_in_kb = driver reservation size + * driver reservation start address = (total_mem_size_in_kb - used_by_driver_in_kb) + * same as Comment1 + * else (NV1X and after): + * if VBIOS/UEFI GOP is posted: + * VBIOS/UEFIGOP update: + * used_by_firmware_in_kb = atom_firmware_Info_v3_3.fw_reserved_size_in_kb; + * start_address_in_kb = total_mem_size_in_kb - used_by_firmware_in_kb; + * (total_mem_size_in_kb = reg(CONFIG_MEMSIZE)<<10) + * if vram_usagebyfirmwareTable version <= 2.1: + * driver can allocate driver reservation region under firmware reservation, + * driver set used_by_driver_in_kb = driver reservation size + * driver reservation start address = start_address_in_kb - used_by_driver_in_kb + * same as Comment1 + * else driver can: + * allocate it reservation any place as long as it does overlap pre-OS FW reservation area + * set used_by_driver_region0_in_kb = driver reservation size + * set driver_region0_start_address_in_kb = driver reservation region start address + * Comment2[hchan]: Host driver can set used_by_firmware_in_kb and start_address_in_kb to + * zero as the reservation for VF as it doesn’t exist. And Host driver should also + * update atom_firmware_Info table to remove the same VBIOS reservation as well. + */ struct vram_usagebyfirmware_v2_1 { - struct atom_common_table_header table_header; - uint32_t start_address_in_kb; - uint16_t used_by_firmware_in_kb; - uint16_t used_by_driver_in_kb; + struct atom_common_table_header table_header; + uint32_t start_address_in_kb; + uint16_t used_by_firmware_in_kb; + uint16_t used_by_driver_in_kb; }; +struct vram_usagebyfirmware_v2_2 { + struct atom_common_table_header table_header; + uint32_t fw_region_start_address_in_kb; + uint16_t used_by_firmware_in_kb; + uint16_t reserved; + uint32_t driver_region0_start_address_in_kb; + uint32_t used_by_driver_region0_in_kb; + uint32_t reserved32[7]; +}; /* *************************************************************************** diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h index a81138c9e491..03cfa0517df2 100644 --- a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h @@ -38,4 +38,7 @@ #define VCN_4_0__SRCID__JPEG6_DECODE 174 // 0xae JRBC6 Decode interrupt #define VCN_4_0__SRCID__JPEG7_DECODE 175 // 0xaf JRBC7 Decode interrupt +#define VCN_4_0__SRCID_UVD_POISON 160 +#define VCN_4_0__SRCID_DJPEG0_POISON 161 +#define VCN_4_0__SRCID_EJPEG0_POISON 162 #endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index a40ead44778a..d18162e9ed1d 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -354,7 +354,8 @@ struct amd_pm_funcs { int (*get_power_profile_mode)(void *handle, char *buf); int (*set_power_profile_mode)(void *handle, long *input, uint32_t size); int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, uint32_t size); - int (*odn_edit_dpm_table)(void *handle, uint32_t type, long *input, uint32_t size); + int (*odn_edit_dpm_table)(void *handle, enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size); int (*set_mp1_state)(void *handle, enum pp_mp1_state mp1_state); int (*smu_i2c_bus_access)(void *handle, bool acquire); int (*gfx_state_change_set)(void *handle, uint32_t state); diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 7e85cdc5bd34..dc694cb246d9 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -222,7 +222,11 @@ union MESAPI_SET_HW_RESOURCES { uint32_t apply_grbm_remote_register_dummy_read_wa : 1; uint32_t second_gfx_pipe_enabled : 1; uint32_t enable_level_process_quantum_check : 1; - uint32_t reserved : 25; + uint32_t legacy_sch_mode : 1; + uint32_t disable_add_queue_wptr_mc_addr : 1; + uint32_t enable_mes_event_int_logging : 1; + uint32_t enable_reg_active_poll : 1; + uint32_t reserved : 21; }; uint32_t uint32_t_all; }; diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h b/drivers/gpu/drm/amd/include/yellow_carp_offset.h index 28a56b56bcaf..0fea6a746611 100644 --- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h +++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT #ifndef YELLOW_CARP_OFFSET_H #define YELLOW_CARP_OFFSET_H diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index ec055858eb95..304190d5c9d2 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -838,7 +838,8 @@ static int pp_set_fine_grain_clk_vol(void *handle, uint32_t type, long *input, u return hwmgr->hwmgr_func->set_fine_grain_clk_vol(hwmgr, type, input, size); } -static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint32_t size) +static int pp_odn_edit_dpm_table(void *handle, enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size) { struct pp_hwmgr *hwmgr = handle; @@ -1507,7 +1508,7 @@ static void pp_pm_compute_clocks(void *handle) struct pp_hwmgr *hwmgr = handle; struct amdgpu_device *adev = hwmgr->adev; - if (!amdgpu_device_has_dc_support(adev)) { + if (!adev->dc_enabled) { amdgpu_dpm_get_active_displays(adev); adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c index 67d7da0b6fed..1d829402cd2e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c @@ -75,8 +75,10 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr) for (i = 0; i < table_entries; i++) { result = hwmgr->hwmgr_func->get_pp_table_entry(hwmgr, i, state); if (result) { + kfree(hwmgr->current_ps); kfree(hwmgr->request_ps); kfree(hwmgr->ps); + hwmgr->current_ps = NULL; hwmgr->request_ps = NULL; hwmgr->ps = NULL; return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index 97b3ad369046..b30684c84e20 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -2961,7 +2961,8 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, data->od8_settings.od8_settings_array; OverDriveTable_t *od_table = &(data->smc_state_table.overdrive_table); - int32_t input_index, input_clk, input_vol, i; + int32_t input_clk, input_vol, i; + uint32_t input_index; int od8_id; int ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b880f4d7d67e..ca3beb5d8f27 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -161,7 +161,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu, int smu_set_gfx_power_up_by_imu(struct smu_context *smu) { - if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu) + if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu) return -EOPNOTSUPP; return smu->ppt_funcs->set_gfx_power_up_by_imu(smu); @@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) yellow_carp_set_ppt_funcs(smu); break; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): smu_v13_0_4_set_ppt_funcs(smu); break; case IP_VERSION(13, 0, 5): @@ -1448,6 +1449,7 @@ static int smu_disable_dpms(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): return 0; default: break; @@ -1516,7 +1518,7 @@ static int smu_disable_dpms(struct smu_context *smu) } if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) && - adev->gfx.rlc.funcs->stop) + !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f816b1dd110e..3bc4128a22ac 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -168,6 +168,7 @@ struct smu_temperature_range { int mem_crit_max; int mem_emergency_max; int software_shutdown_temp; + int software_shutdown_temp_offset; }; struct smu_state_validation_block { @@ -568,6 +569,10 @@ struct smu_context u32 param_reg; u32 msg_reg; u32 resp_reg; + + u32 debug_param_reg; + u32 debug_msg_reg; + u32 debug_resp_reg; }; struct i2c_adapter; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index 9ebb8f39732a..8b8266890a10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -131,7 +131,13 @@ #define PPSMC_MSG_EnableAudioStutterWA 0x44 #define PPSMC_MSG_PowerUpUmsch 0x45 #define PPSMC_MSG_PowerDownUmsch 0x46 -#define PPSMC_Message_Count 0x47 +#define PPSMC_MSG_SetDcsArch 0x47 +#define PPSMC_MSG_TriggerVFFLR 0x48 +#define PPSMC_MSG_SetNumBadMemoryPagesRetired 0x49 +#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A +#define PPSMC_MSG_SetPriorityDeltaGain 0x4B +#define PPSMC_MSG_AllowIHHostInterrupt 0x4C +#define PPSMC_Message_Count 0x4D //Debug Dump Message #define DEBUGSMC_MSG_TestMessage 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 58098b82df66..a4e3425b1027 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -239,7 +239,9 @@ __SMU_DUMMY_MAP(DriverMode2Reset), \ __SMU_DUMMY_MAP(GetGfxOffStatus), \ __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ - __SMU_DUMMY_MAP(LogGfxOffResidency), + __SMU_DUMMY_MAP(LogGfxOffResidency), \ + __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 70b560737687..ad66d57aa102 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -79,6 +79,17 @@ MODULE_FIRMWARE("amdgpu/beige_goby_smc.bin"); #define mmTHM_BACO_CNTL_ARCT 0xA7 #define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0 +static void smu_v11_0_poll_baco_exit(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t data, loop = 0; + + do { + usleep_range(1000, 1100); + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + } while ((data & 0x100) && (++loop < 100)); +} + int smu_v11_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1588,6 +1599,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu) if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) return false; + /* return true if ASIC is in BACO state already */ + if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + /* Arcturus does not support this bit mask */ if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) @@ -1685,7 +1700,18 @@ int smu_v11_0_baco_enter(struct smu_context *smu) int smu_v11_0_baco_exit(struct smu_context *smu) { - return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); + int ret; + + ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); + if (!ret) { + /* + * Poll BACO exit status to ensure FW has completed + * BACO exit process to avoid timing issues. + */ + smu_v11_0_poll_baco_exit(smu); + } + + return ret; } int smu_v11_0_mode1_reset(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 89f0f6eb19f3..f5e90e0a99df 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): mp1_fw_flags = RREG32_PCIE(MP1_Public | (smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff)); break; @@ -301,6 +302,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP; break; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4; break; case IP_VERSION(13, 0, 5): @@ -841,6 +843,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; if (enable) @@ -1376,6 +1379,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, */ uint32_t ctxid = entry->src_data[0]; uint32_t data; + uint32_t high; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1432,6 +1436,36 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, schedule_work(&smu->throttling_logging_work); break; + case 0x8: + high = smu->thermal_range.software_shutdown_temp + + smu->thermal_range.software_shutdown_temp_offset; + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + high); + dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", + high, + smu->thermal_range.software_shutdown_temp_offset); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + case 0x9: + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; } } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index f0121d171630..87d7c66e49ef 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -70,6 +70,26 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 +#define mmMP1_SMN_C2PMSG_66 0x0282 +#define mmMP1_SMN_C2PMSG_66_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_82 0x0292 +#define mmMP1_SMN_C2PMSG_82_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_90 0x029a +#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_75 0x028b +#define mmMP1_SMN_C2PMSG_75_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_53 0x0275 +#define mmMP1_SMN_C2PMSG_53_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_54 0x0276 +#define mmMP1_SMN_C2PMSG_54_BASE_IDX 0 + +#define DEBUGSMC_MSG_Mode1Reset 2 + static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -121,6 +141,9 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), + MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, + PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -1789,6 +1812,69 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu, NULL); } +static int smu_v13_0_0_mode1_reset(struct smu_context *smu) +{ + int ret; + struct amdgpu_device *adev = smu->adev; + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) + ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset); + else + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + + if (!ret) + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + +static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); + + smu->debug_param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_53); + smu->debug_msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_75); + smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54); +} + +static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad page number on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetNumBadMemoryPagesRetired, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages number\n", + __func__); + + return ret; +} + +static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad channel info on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages channel info\n", + __func__); + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1856,9 +1942,11 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_enter = smu_v13_0_0_baco_enter, .baco_exit = smu_v13_0_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, - .mode1_reset = smu_v13_0_mode1_reset, + .mode1_reset = smu_v13_0_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, .set_df_cstate = smu_v13_0_0_set_df_cstate, + .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, + .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) @@ -1870,5 +1958,5 @@ void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) smu->table_map = smu_v13_0_0_table_map; smu->pwr_src_map = smu_v13_0_0_pwr_src_map; smu->workload_map = smu_v13_0_0_workload_map; - smu_v13_0_set_smu_mailbox_registers(smu); + smu_v13_0_0_set_smu_mailbox_registers(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 97e1d55dcaad..8fa9a36c38b6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { .set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu, }; +static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); +} + void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) smu->feature_map = smu_v13_0_4_feature_mask_map; smu->table_map = smu_v13_0_4_table_map; smu->is_apu = true; - smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); - smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); - smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4)) + smu_v13_0_4_set_smu_mailbox_registers(smu); + else + smu_v13_0_set_smu_mailbox_registers(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index d74debc584f8..c3c9ef523e59 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1223,6 +1223,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index e4f8f90ac5aa..768b6e7dbd77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -233,6 +233,18 @@ static void __smu_cmn_send_msg(struct smu_context *smu, WREG32(smu->msg_reg, msg); } +static int __smu_cmn_send_debug_msg(struct smu_context *smu, + u32 msg, + u32 param) +{ + struct amdgpu_device *adev = smu->adev; + + WREG32(smu->debug_param_reg, param); + WREG32(smu->debug_msg_reg, msg); + WREG32(smu->debug_resp_reg, 0); + + return 0; +} /** * smu_cmn_send_msg_without_waiting -- send the message; don't wait for status * @smu: pointer to an SMU context @@ -386,6 +398,12 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, read_arg); } +int smu_cmn_send_debug_smc_msg(struct smu_context *smu, + uint32_t msg) +{ + return __smu_cmn_send_debug_msg(smu, msg, 0); +} + int smu_cmn_to_asic_specific_index(struct smu_context *smu, enum smu_cmn2asic_mapping_type type, uint32_t index) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1526ce09c399..f82cf76dd3a4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -42,6 +42,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t *read_arg); +int smu_cmn_send_debug_smc_msg(struct smu_context *smu, + uint32_t msg); + int smu_cmn_wait_for_response(struct smu_context *smu); int smu_cmn_to_asic_specific_index(struct smu_context *smu, |