diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 00:40:51 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 00:40:51 +0200 |
commit | 36824f198c621cebeb22966b5e244378fa341295 (patch) | |
tree | ee1e358a4ed0cd022ae12b4b7ba1fa3d0e5746d5 /arch/arm64/include/asm | |
parent | Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/a... (diff) | |
parent | Merge tag 'kvmarm-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kvma... (diff) | |
download | linux-36824f198c621cebeb22966b5e244378fa341295.tar.xz linux-36824f198c621cebeb22966b5e244378fa341295.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"This covers all architectures (except MIPS) so I don't expect any
other feature pull requests this merge window.
ARM:
- Add MTE support in guests, complete with tag save/restore interface
- Reduce the impact of CMOs by moving them in the page-table code
- Allow device block mappings at stage-2
- Reduce the footprint of the vmemmap in protected mode
- Support the vGIC on dumb systems such as the Apple M1
- Add selftest infrastructure to support multiple configuration and
apply that to PMU/non-PMU setups
- Add selftests for the debug architecture
- The usual crop of PMU fixes
PPC:
- Support for the H_RPT_INVALIDATE hypercall
- Conversion of Book3S entry/exit to C
- Bug fixes
S390:
- new HW facilities for guests
- make inline assembly more robust with KASAN and co
x86:
- Allow userspace to handle emulation errors (unknown instructions)
- Lazy allocation of the rmap (host physical -> guest physical
address)
- Support for virtualizing TSC scaling on VMX machines
- Optimizations to avoid shattering huge pages at the beginning of
live migration
- Support for initializing the PDPTRs without loading them from
memory
- Many TLB flushing cleanups
- Refuse to load if two-stage paging is available but NX is not (this
has been a requirement in practice for over a year)
- A large series that separates the MMU mode (WP/SMAP/SMEP etc.) from
CR0/CR4/EFER, using the MMU mode everywhere once it is computed
from the CPU registers
- Use PM notifier to notify the guest about host suspend or hibernate
- Support for passing arguments to Hyper-V hypercalls using XMM
registers
- Support for Hyper-V TLB flush hypercalls and enlightened MSR bitmap
on AMD processors
- Hide Hyper-V hypercalls that are not included in the guest CPUID
- Fixes for live migration of virtual machines that use the Hyper-V
"enlightened VMCS" optimization of nested virtualization
- Bugfixes (not many)
Generic:
- Support for retrieving statistics without debugfs
- Cleanups for the KVM selftests API"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (314 commits)
KVM: x86: rename apic_access_page_done to apic_access_memslot_enabled
kvm: x86: disable the narrow guest module parameter on unload
selftests: kvm: Allows userspace to handle emulation errors.
kvm: x86: Allow userspace to handle emulation errors
KVM: x86/mmu: Let guest use GBPAGES if supported in hardware and TDP is on
KVM: x86/mmu: Get CR4.SMEP from MMU, not vCPU, in shadow page fault
KVM: x86/mmu: Get CR0.WP from MMU, not vCPU, in shadow page fault
KVM: x86/mmu: Drop redundant rsvd bits reset for nested NPT
KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic
KVM: x86: Enhance comments for MMU roles and nested transition trickiness
KVM: x86/mmu: WARN on any reserved SPTE value when making a valid SPTE
KVM: x86/mmu: Add helpers to do full reserved SPTE checks w/ generic MMU
KVM: x86/mmu: Use MMU's role to determine PTTYPE
KVM: x86/mmu: Collapse 32-bit PAE and 64-bit statements for helpers
KVM: x86/mmu: Add a helper to calculate root from role_regs
KVM: x86/mmu: Add helper to update paging metadata
KVM: x86/mmu: Don't update nested guest's paging bitmasks if CR0.PG=0
KVM: x86/mmu: Consolidate reset_rsvds_bits_mask() calls
KVM: x86/mmu: Use MMU role_regs to get LA57, and drop vCPU LA57 helper
KVM: x86/mmu: Get nested MMU's root level from the MMU's role
...
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r-- | arch/arm64/include/asm/kvm_arm.h | 3 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 3 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 23 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 12 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mte.h | 66 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 42 | ||||
-rw-r--r-- | arch/arm64/include/asm/mte-def.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/mte.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 22 | ||||
-rw-r--r-- | arch/arm64/include/asm/sysreg.h | 3 |
10 files changed, 139 insertions, 40 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 692c9049befa..d436831dd706 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,7 +12,8 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ -#define HCR_ATA (UL(1) << 56) +#define HCR_ATA_SHIFT 56 +#define HCR_ATA (UL(1) << HCR_ATA_SHIFT) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 01b9857757f2..fd418955e31e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID2; + + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..41911585ae0c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -132,6 +133,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Memory Tagging Extension enabled for the guest */ + bool mte_enabled; }; struct kvm_vcpu_fault_info { @@ -206,6 +210,12 @@ enum vcpu_sysreg { CNTP_CVAL_EL0, CNTP_CTL_EL0, + /* Memory Tagging Extension registers */ + RGSR_EL1, /* Random Allocation Tag Seed Register */ + GCR_EL1, /* Tag Control Register */ + TFSR_EL1, /* Tag Fault Status Register (EL1) */ + TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -556,16 +566,11 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) } struct kvm_vm_stat { - ulong remote_tlb_flush; + struct kvm_vm_stat_generic generic; }; struct kvm_vcpu_stat { - u64 halt_successful_poll; - u64 halt_attempted_poll; - u64 halt_poll_success_ns; - u64 halt_poll_fail_ns; - u64 halt_poll_invalid; - u64 halt_wakeup; + struct kvm_vcpu_stat_generic generic; u64 hvc_exit_stat; u64 wfe_exit_stat; u64 wfi_exit_stat; @@ -721,6 +726,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); + /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); @@ -769,6 +777,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index f4cbfa9025a8..b52c5c4b9a3d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -188,10 +188,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +static inline void __clean_dcache_guest_page(void *va, size_t size) { - void *va = page_address(pfn_to_page(pfn)); - /* * With FWB, we ensure that the guest always accesses memory using * cacheable attributes, and we don't have to clean to PoC when @@ -204,18 +202,14 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) kvm_flush_dcache_to_poc(va, size); } -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, - unsigned long size) +static inline void __invalidate_icache_guest_page(void *va, size_t size) { if (icache_is_aliasing()) { /* any kind of VIPT cache */ icache_inval_all_pou(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ - void *va = page_address(pfn_to_page(pfn)); - - icache_inval_pou((unsigned long)va, - (unsigned long)va + size); + icache_inval_pou((unsigned long)va, (unsigned long)va + size); } } diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h new file mode 100644 index 000000000000..de002636eb1f --- /dev/null +++ b/arch/arm64/include/asm/kvm_mte.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 ARM Ltd. + */ +#ifndef __ASM_KVM_MTE_H +#define __ASM_KVM_MTE_H + +#ifdef __ASSEMBLY__ + +#include <asm/sysreg.h> + +#ifdef CONFIG_ARM64_MTE + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\h_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\h_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\g_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + +.L__skip_switch\@: +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\g_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\g_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\h_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + + isb + +.L__skip_switch\@: +.endm + +#else /* !CONFIG_ARM64_MTE */ + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +.endm + +#endif /* CONFIG_ARM64_MTE */ +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_KVM_MTE_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c3674c47d48c..f004c0115d89 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -27,23 +27,29 @@ typedef u64 kvm_pte_t; /** * struct kvm_pgtable_mm_ops - Memory management callbacks. - * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter - * can be used by the walker to pass a memcache. The - * initial refcount of the page is 1. - * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The - * @size parameter is in bytes, and is rounded-up to the - * next page boundary. The resulting allocation is - * physically contiguous. - * @free_pages_exact: Free an exact number of memory pages previously - * allocated by zalloc_pages_exact. - * @get_page: Increment the refcount on a page. - * @put_page: Decrement the refcount on a page. When the refcount - * reaches 0 the page is automatically freed. - * @page_count: Return the refcount of a page. - * @phys_to_virt: Convert a physical address into a virtual address mapped - * in the current context. - * @virt_to_phys: Convert a virtual address mapped in the current context - * into a physical address. + * @zalloc_page: Allocate a single zeroed memory page. + * The @arg parameter can be used by the walker + * to pass a memcache. The initial refcount of + * the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. + * The @size parameter is in bytes, and is rounded + * up to the next page boundary. The resulting + * allocation is physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the + * refcount reaches 0 the page is automatically + * freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual + * address mapped in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current + * context into a physical address. + * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC + * for the specified memory address range. + * @icache_inval_pou: Invalidate the instruction cache to the PoU + * for the specified memory address range. */ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); @@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops { int (*page_count)(void *addr); void* (*phys_to_virt)(phys_addr_t phys); phys_addr_t (*virt_to_phys)(void *addr); + void (*dcache_clean_inval_poc)(void *addr, size_t size); + void (*icache_inval_pou)(void *addr, size_t size); }; /** diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index cf241b0f0a42..626d359b396e 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -7,6 +7,7 @@ #define MTE_GRANULE_SIZE UL(16) #define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE) #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 67bf259ae768..58c7f80f5596 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -38,7 +38,7 @@ void mte_free_tag_storage(char *storage); #define PG_mte_tagged PG_arch_2 void mte_zero_clear_page_tags(void *addr); -void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); @@ -57,7 +57,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, static inline void mte_zero_clear_page_tags(void *addr) { } -static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +static inline void mte_sync_tags(pte_t old_pte, pte_t pte) { } static inline void mte_copy_page_tags(void *kto, const void *kfrom) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 11e60d0cd9b6..c0ba8cdfa10a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); - if (system_supports_mte() && - pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) - mte_sync_tags(ptep, pte); + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pte, false) && + !pte_special(pte)) { + pte_t old_pte = READ_ONCE(*ptep); + /* + * We only need to synchronise if the new PTE has tags enabled + * or if swapping in (in which case another mapping may have + * set tags in the past even if this PTE isn't tagged). + * (!pte_none() && !pte_present()) is an open coded version of + * is_swap_pte() + */ + if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte))) + mte_sync_tags(old_pte, pte); + } __check_racy_pte_update(mm, ptep, pte); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9ea84bcddf85..7b9c3acba684 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -651,7 +651,8 @@ #define INIT_SCTLR_EL2_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ - SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ + SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) |