summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/rockchip/rk3066a.dtsi1
-rw-r--r--arch/arm/include/asm/cmpxchg.h7
-rw-r--r--arch/arm/kernel/Makefile2
-rw-r--r--arch/arm/kernel/perf_event_v6.c448
-rw-r--r--arch/arm/kernel/perf_event_v7.c2005
-rw-r--r--arch/arm/kernel/perf_event_xscale.c748
-rw-r--r--arch/arm/mach-davinci/pm.c2
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig40
-rw-r--r--arch/arm64/Kconfig.platforms2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/qdu1000.dtsi16
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-crd.dts3
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts15
-rw-r--r--arch/arm64/boot/dts/qcom/sm6115.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-crd.dts4
-rw-r--r--arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi11
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3368.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts2
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/acpi.h12
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h15
-rw-r--r--arch/arm64/include/asm/arch_timer.h2
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h2
-rw-r--r--arch/arm64/include/asm/asm-extable.h3
-rw-r--r--arch/arm64/include/asm/cpucaps.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h4
-rw-r--r--arch/arm64/include/asm/cputype.h6
-rw-r--r--arch/arm64/include/asm/esr.h33
-rw-r--r--arch/arm64/include/asm/mmu_context.h4
-rw-r--r--arch/arm64/include/asm/mte.h4
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/arm64/include/asm/ptrace.h35
-rw-r--r--arch/arm64/include/asm/runtime-const.h88
-rw-r--r--arch/arm64/include/asm/smp.h13
-rw-r--r--arch/arm64/include/asm/sysreg.h4
-rw-r--r--arch/arm64/include/asm/uaccess.h169
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h11
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/acpi.c129
-rw-r--r--arch/arm64/kernel/acpi_numa.c11
-rw-r--r--arch/arm64/kernel/cpu_errata.c17
-rw-r--r--arch/arm64/kernel/cpufeature.c5
-rw-r--r--arch/arm64/kernel/cpuidle.c74
-rw-r--r--arch/arm64/kernel/image-vars.h5
-rw-r--r--arch/arm64/kernel/mte.c12
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c2
-rw-r--r--arch/arm64/kernel/proton-pack.c2
-rw-r--r--arch/arm64/kernel/psci.c2
-rw-r--r--arch/arm64/kernel/reloc_test_core.c1
-rw-r--r--arch/arm64/kernel/smp.c74
-rw-r--r--arch/arm64/kernel/syscall.c16
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S3
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h10
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c1
-rw-r--r--arch/arm64/mm/mmu.c3
-rw-r--r--arch/csky/include/uapi/asm/unistd.h1
-rw-r--r--arch/csky/kernel/syscall.c2
-rw-r--r--arch/hexagon/include/asm/syscalls.h6
-rw-r--r--arch/hexagon/include/uapi/asm/unistd.h1
-rw-r--r--arch/hexagon/kernel/syscalltab.c7
-rw-r--r--arch/loongarch/kernel/syscall.c2
-rw-r--r--arch/m68k/amiga/config.c9
-rw-r--r--arch/m68k/atari/ataints.c6
-rw-r--r--arch/m68k/configs/amiga_defconfig1
-rw-r--r--arch/m68k/configs/apollo_defconfig1
-rw-r--r--arch/m68k/configs/atari_defconfig1
-rw-r--r--arch/m68k/configs/bvme6000_defconfig1
-rw-r--r--arch/m68k/configs/hp300_defconfig1
-rw-r--r--arch/m68k/configs/mac_defconfig1
-rw-r--r--arch/m68k/configs/multi_defconfig1
-rw-r--r--arch/m68k/configs/mvme147_defconfig1
-rw-r--r--arch/m68k/configs/mvme16x_defconfig1
-rw-r--r--arch/m68k/configs/q40_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig1
-rw-r--r--arch/m68k/configs/sun3x_defconfig1
-rw-r--r--arch/m68k/emu/nfblock.c4
-rw-r--r--arch/m68k/emu/nfcon.c1
-rw-r--r--arch/m68k/include/asm/cmpxchg.h2
-rw-r--r--arch/microblaze/kernel/sys_microblaze.c2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/kernel/sys_parisc32.c9
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/kernel/eeh_pe.c7
-rw-r--r--arch/powerpc/kernel/head_64.S5
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/kexec/core_64.c11
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c8
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c5
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts7
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi23
-rw-r--r--arch/riscv/boot/dts/canaan/k210_generic.dts5
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts9
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts7
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts9
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts10
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-common.dtsi2
-rw-r--r--arch/riscv/include/asm/insn.h2
-rw-r--r--arch/riscv/kernel/ftrace.c7
-rw-r--r--arch/riscv/kernel/machine_kexec.c10
-rw-r--r--arch/riscv/kernel/patch.c26
-rw-r--r--arch/riscv/kernel/stacktrace.c5
-rw-r--r--arch/riscv/kernel/sys_riscv.c4
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c2
-rw-r--r--arch/s390/boot/startup.c11
-rw-r--r--arch/s390/configs/debug_defconfig5
-rw-r--r--arch/s390/configs/defconfig5
-rw-r--r--arch/s390/include/asm/entry-common.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kernel/syscall.c27
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/kvm-s390.h15
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/pci/pci_irq.c2
-rw-r--r--arch/sh/kernel/sys_sh32.c11
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/sparc/kernel/sys32.S221
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl8
-rw-r--r--arch/um/drivers/ubd_kern.c53
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/Kconfig.assembler2
-rw-r--r--arch/x86/boot/compressed/misc.c5
-rw-r--r--arch/x86/boot/compressed/sev.c86
-rw-r--r--arch/x86/boot/cpucheck.c2
-rw-r--r--arch/x86/boot/main.c42
-rw-r--r--arch/x86/coco/Makefile1
-rw-r--r--arch/x86/coco/core.c1
-rw-r--r--arch/x86/coco/sev/Makefile15
-rw-r--r--arch/x86/coco/sev/core.c (renamed from arch/x86/kernel/sev.c)449
-rw-r--r--arch/x86/coco/sev/shared.c (renamed from arch/x86/kernel/sev-shared.c)460
-rw-r--r--arch/x86/coco/tdx/tdx.c121
-rw-r--r--arch/x86/entry/entry_64_compat.S14
-rw-r--r--arch/x86/entry/syscall_32.c10
-rw-r--r--arch/x86/entry/syscall_64.c9
-rw-r--r--arch/x86/entry/syscall_x32.c7
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl9
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl7
-rw-r--r--arch/x86/events/intel/core.c212
-rw-r--r--arch/x86/events/rapl.c90
-rw-r--r--arch/x86/hyperv/ivm.c22
-rw-r--r--arch/x86/include/asm/acpi.h7
-rw-r--r--arch/x86/include/asm/alternative.h241
-rw-r--r--arch/x86/include/asm/amd_nb.h4
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h12
-rw-r--r--arch/x86/include/asm/cpu_device_id.h8
-rw-r--r--arch/x86/include/asm/cpufeatures.h801
-rw-r--r--arch/x86/include/asm/entry-common.h15
-rw-r--r--arch/x86/include/asm/init.h3
-rw-r--r--arch/x86/include/asm/intel_pconfig.h65
-rw-r--r--arch/x86/include/asm/irqflags.h20
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/include/asm/runtime-const.h61
-rw-r--r--arch/x86/include/asm/set_memory.h3
-rw-r--r--arch/x86/include/asm/setup.h8
-rw-r--r--arch/x86/include/asm/sev-common.h18
-rw-r--r--arch/x86/include/asm/sev.h135
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/tsc.h3
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h5
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h1
-rw-r--r--arch/x86/include/asm/vgtod.h5
-rw-r--r--arch/x86/include/asm/vmware.h336
-rw-r--r--arch/x86/include/asm/vmxfeatures.h110
-rw-r--r--arch/x86/include/asm/word-at-a-time.h57
-rw-r--r--arch/x86/include/asm/x86_init.h14
-rw-r--r--arch/x86/include/uapi/asm/svm.h1
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c86
-rw-r--r--arch/x86/kernel/acpi/madt_playdead.S28
-rw-r--r--arch/x86/kernel/acpi/madt_wakeup.c292
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/amd_nb.c44
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c11
-rw-r--r--arch/x86/kernel/cpu/bugs.c16
-rw-r--r--arch/x86/kernel/cpu/intel.c188
-rw-r--r--arch/x86/kernel/cpu/intel_pconfig.c84
-rw-r--r--arch/x86/kernel/cpu/mce/core.c7
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c9
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh3
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c312
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c89
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h108
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c250
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c42
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c288
-rw-r--r--arch/x86/kernel/cpu/vmware.c225
-rw-r--r--arch/x86/kernel/crash.c12
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/e820.c9
-rw-r--r--arch/x86/kernel/fpu/xstate.h14
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S27
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/time.c20
-rw-r--r--arch/x86/kernel/tsc.c92
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/lib/cmdline.c8
-rw-r--r--arch/x86/lib/getuser.S69
-rw-r--r--arch/x86/lib/iomem.c5
-rw-r--r--arch/x86/mm/ident_map.c73
-rw-r--r--arch/x86/mm/init_64.c16
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c16
-rw-r--r--arch/x86/mm/pat/set_memory.c75
-rw-r--r--arch/x86/pci/intel_mid_pci.c8
-rw-r--r--arch/x86/pci/xen.c4
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c6
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c4
-rw-r--r--arch/x86/um/sys_call_table_32.c10
-rw-r--r--arch/x86/um/sys_call_table_64.c11
-rw-r--r--arch/x86/virt/svm/sev.c44
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c8
-rw-r--r--arch/xtensa/include/asm/current.h2
-rw-r--r--arch/xtensa/include/asm/thread_info.h2
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c5
243 files changed, 4909 insertions, 6105 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ee5115252aac..a867a7d967aa 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -34,6 +34,7 @@ config ARM
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
+ select ARCH_NEED_CMPXCHG_1_EMU if CPU_V6
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_CFI_CLANG
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
diff --git a/arch/arm/boot/dts/rockchip/rk3066a.dtsi b/arch/arm/boot/dts/rockchip/rk3066a.dtsi
index a4e815cd6707..5e0750547ab5 100644
--- a/arch/arm/boot/dts/rockchip/rk3066a.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3066a.dtsi
@@ -144,6 +144,7 @@
pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>;
power-domains = <&power RK3066_PD_VIO>;
rockchip,grf = <&grf>;
+ #sound-dai-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 44667bdb4707..9beb64d30586 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -5,6 +5,7 @@
#include <linux/irqflags.h>
#include <linux/prefetch.h>
#include <asm/barrier.h>
+#include <linux/cmpxchg-emu.h>
#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
/*
@@ -162,7 +163,11 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
prefetchw((const void *)ptr);
switch (size) {
-#ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */
+#ifdef CONFIG_CPU_V6 /* ARCH == ARMv6 */
+ case 1:
+ oldval = cmpxchg_emu_u8((volatile u8 *)ptr, old, new);
+ break;
+#else /* min ARCH > ARMv6 */
case 1:
do {
asm volatile("@ __cmpxchg1\n"
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 89a77e3f51d2..aaae31b8c4a5 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -78,8 +78,6 @@ obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o
obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o
obj-$(CONFIG_IWMMXT) += iwmmxt.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \
- perf_event_v7.o
AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt
obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
obj-$(CONFIG_VDSO) += vdso.o
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
deleted file mode 100644
index d9fd53841591..000000000000
--- a/arch/arm/kernel/perf_event_v6.c
+++ /dev/null
@@ -1,448 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARMv6 Performance counter handling code.
- *
- * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
- *
- * ARMv6 has 2 configurable performance counters and a single cycle counter.
- * They all share a single reset bit but can be written to zero so we can use
- * that for a reset.
- *
- * The counters can't be individually enabled or disabled so when we remove
- * one event and replace it with another we could get spurious counts from the
- * wrong event. However, we can take advantage of the fact that the
- * performance counters can export events to the event bus, and the event bus
- * itself can be monitored. This requires that we *don't* export the events to
- * the event bus. The procedure for disabling a configurable counter is:
- * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
- * effectively stops the counter from counting.
- * - disable the counter's interrupt generation (each counter has it's
- * own interrupt enable bit).
- * Once stopped, the counter value can be written as 0 to reset.
- *
- * To enable a counter:
- * - enable the counter's interrupt generation.
- * - set the new event type.
- *
- * Note: the dedicated cycle counter only counts cycles and can't be
- * enabled/disabled independently of the others. When we want to disable the
- * cycle counter, we have to just disable the interrupt reporting and start
- * ignoring that counter. When re-enabling, we have to reset the value and
- * enable the interrupt.
- */
-
-#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-
-#include <linux/of.h>
-#include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
-
-enum armv6_perf_types {
- ARMV6_PERFCTR_ICACHE_MISS = 0x0,
- ARMV6_PERFCTR_IBUF_STALL = 0x1,
- ARMV6_PERFCTR_DDEP_STALL = 0x2,
- ARMV6_PERFCTR_ITLB_MISS = 0x3,
- ARMV6_PERFCTR_DTLB_MISS = 0x4,
- ARMV6_PERFCTR_BR_EXEC = 0x5,
- ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
- ARMV6_PERFCTR_INSTR_EXEC = 0x7,
- ARMV6_PERFCTR_DCACHE_HIT = 0x9,
- ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
- ARMV6_PERFCTR_DCACHE_MISS = 0xB,
- ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
- ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
- ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
- ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
- ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
- ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
- ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
- ARMV6_PERFCTR_NOP = 0x20,
-};
-
-enum armv6_counters {
- ARMV6_CYCLE_COUNTER = 0,
- ARMV6_COUNTER0,
- ARMV6_COUNTER1,
-};
-
-/*
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
-};
-
-static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- /*
- * The performance counters don't differentiate between read and write
- * accesses/misses so this isn't strictly correct, but it's the best we
- * can do. Writes and reads get combined.
- */
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
-
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
-
- /*
- * The ARM performance counters can count micro DTLB misses, micro ITLB
- * misses and main TLB misses. There isn't an event for TLB misses, so
- * use the micro misses here and if users want the main TLB misses they
- * can use a raw counter.
- */
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
-};
-
-static inline unsigned long
-armv6_pmcr_read(void)
-{
- u32 val;
- asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
- return val;
-}
-
-static inline void
-armv6_pmcr_write(unsigned long val)
-{
- asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
-}
-
-#define ARMV6_PMCR_ENABLE (1 << 0)
-#define ARMV6_PMCR_CTR01_RESET (1 << 1)
-#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
-#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
-#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
-#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
-#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
-#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
-#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
-#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
-#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
-#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
-#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
-#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
-
-#define ARMV6_PMCR_OVERFLOWED_MASK \
- (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
- ARMV6_PMCR_CCOUNT_OVERFLOW)
-
-static inline int
-armv6_pmcr_has_overflowed(unsigned long pmcr)
-{
- return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
-}
-
-static inline int
-armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
- enum armv6_counters counter)
-{
- int ret = 0;
-
- if (ARMV6_CYCLE_COUNTER == counter)
- ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
- else if (ARMV6_COUNTER0 == counter)
- ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
- else if (ARMV6_COUNTER1 == counter)
- ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
- else
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-
- return ret;
-}
-
-static inline u64 armv6pmu_read_counter(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int counter = hwc->idx;
- unsigned long value = 0;
-
- if (ARMV6_CYCLE_COUNTER == counter)
- asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
- else if (ARMV6_COUNTER0 == counter)
- asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
- else if (ARMV6_COUNTER1 == counter)
- asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
- else
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-
- return value;
-}
-
-static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
-{
- struct hw_perf_event *hwc = &event->hw;
- int counter = hwc->idx;
-
- if (ARMV6_CYCLE_COUNTER == counter)
- asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
- else if (ARMV6_COUNTER0 == counter)
- asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
- else if (ARMV6_COUNTER1 == counter)
- asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
- else
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-}
-
-static void armv6pmu_enable_event(struct perf_event *event)
-{
- unsigned long val, mask, evt;
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- if (ARMV6_CYCLE_COUNTER == idx) {
- mask = 0;
- evt = ARMV6_PMCR_CCOUNT_IEN;
- } else if (ARMV6_COUNTER0 == idx) {
- mask = ARMV6_PMCR_EVT_COUNT0_MASK;
- evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
- ARMV6_PMCR_COUNT0_IEN;
- } else if (ARMV6_COUNTER1 == idx) {
- mask = ARMV6_PMCR_EVT_COUNT1_MASK;
- evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
- ARMV6_PMCR_COUNT1_IEN;
- } else {
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- /*
- * Mask out the current event and set the counter to count the event
- * that we're interested in.
- */
- val = armv6_pmcr_read();
- val &= ~mask;
- val |= evt;
- armv6_pmcr_write(val);
-}
-
-static irqreturn_t
-armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
-{
- unsigned long pmcr = armv6_pmcr_read();
- struct perf_sample_data data;
- struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- struct pt_regs *regs;
- int idx;
-
- if (!armv6_pmcr_has_overflowed(pmcr))
- return IRQ_NONE;
-
- regs = get_irq_regs();
-
- /*
- * The interrupts are cleared by writing the overflow flags back to
- * the control register. All of the other bits don't have any effect
- * if they are rewritten, so write the whole value back.
- */
- armv6_pmcr_write(pmcr);
-
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- /* Ignore if we don't have an event. */
- if (!event)
- continue;
-
- /*
- * We have a single interrupt for all counters. Check that
- * each counter has overflowed before we process it.
- */
- if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
-
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
- return IRQ_HANDLED;
-}
-
-static void armv6pmu_start(struct arm_pmu *cpu_pmu)
-{
- unsigned long val;
-
- val = armv6_pmcr_read();
- val |= ARMV6_PMCR_ENABLE;
- armv6_pmcr_write(val);
-}
-
-static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
-{
- unsigned long val;
-
- val = armv6_pmcr_read();
- val &= ~ARMV6_PMCR_ENABLE;
- armv6_pmcr_write(val);
-}
-
-static int
-armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- /* Always place a cycle counter into the cycle counter. */
- if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
- if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
- return -EAGAIN;
-
- return ARMV6_CYCLE_COUNTER;
- } else {
- /*
- * For anything other than a cycle counter, try and use
- * counter0 and counter1.
- */
- if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
- return ARMV6_COUNTER1;
-
- if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
- return ARMV6_COUNTER0;
-
- /* The counters are all in use. */
- return -EAGAIN;
- }
-}
-
-static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- clear_bit(event->hw.idx, cpuc->used_mask);
-}
-
-static void armv6pmu_disable_event(struct perf_event *event)
-{
- unsigned long val, mask, evt;
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- if (ARMV6_CYCLE_COUNTER == idx) {
- mask = ARMV6_PMCR_CCOUNT_IEN;
- evt = 0;
- } else if (ARMV6_COUNTER0 == idx) {
- mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
- evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
- } else if (ARMV6_COUNTER1 == idx) {
- mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
- evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
- } else {
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- /*
- * Mask out the current event and set the counter to count the number
- * of ETM bus signal assertion cycles. The external reporting should
- * be disabled and so this should never increment.
- */
- val = armv6_pmcr_read();
- val &= ~mask;
- val |= evt;
- armv6_pmcr_write(val);
-}
-
-static int armv6_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv6_perf_map,
- &armv6_perf_cache_map, 0xFF);
-}
-
-static void armv6pmu_init(struct arm_pmu *cpu_pmu)
-{
- cpu_pmu->handle_irq = armv6pmu_handle_irq;
- cpu_pmu->enable = armv6pmu_enable_event;
- cpu_pmu->disable = armv6pmu_disable_event;
- cpu_pmu->read_counter = armv6pmu_read_counter;
- cpu_pmu->write_counter = armv6pmu_write_counter;
- cpu_pmu->get_event_idx = armv6pmu_get_event_idx;
- cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx;
- cpu_pmu->start = armv6pmu_start;
- cpu_pmu->stop = armv6pmu_stop;
- cpu_pmu->map_event = armv6_map_event;
- cpu_pmu->num_events = 3;
-}
-
-static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv6pmu_init(cpu_pmu);
- cpu_pmu->name = "armv6_1136";
- return 0;
-}
-
-static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv6pmu_init(cpu_pmu);
- cpu_pmu->name = "armv6_1156";
- return 0;
-}
-
-static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv6pmu_init(cpu_pmu);
- cpu_pmu->name = "armv6_1176";
- return 0;
-}
-
-static const struct of_device_id armv6_pmu_of_device_ids[] = {
- {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
- {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
- { /* sentinel value */ }
-};
-
-static const struct pmu_probe_info armv6_pmu_probe_table[] = {
- ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
- { /* sentinel value */ }
-};
-
-static int armv6_pmu_device_probe(struct platform_device *pdev)
-{
- return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids,
- armv6_pmu_probe_table);
-}
-
-static struct platform_driver armv6_pmu_driver = {
- .driver = {
- .name = "armv6-pmu",
- .of_match_table = armv6_pmu_of_device_ids,
- },
- .probe = armv6_pmu_device_probe,
-};
-
-builtin_platform_driver(armv6_pmu_driver);
-#endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
deleted file mode 100644
index a3322e2b3ea4..000000000000
--- a/arch/arm/kernel/perf_event_v7.c
+++ /dev/null
@@ -1,2005 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
- *
- * ARMv7 support: Jean Pihet <jpihet@mvista.com>
- * 2010 (c) MontaVista Software, LLC.
- *
- * Copied from ARMv6 code, with the low level code inspired
- * by the ARMv7 Oprofile code.
- *
- * Cortex-A8 has up to 4 configurable performance counters and
- * a single cycle counter.
- * Cortex-A9 has up to 31 configurable performance counters and
- * a single cycle counter.
- *
- * All counters can be enabled/disabled and IRQ masked separately. The cycle
- * counter and all 4 performance counters together can be reset separately.
- */
-
-#ifdef CONFIG_CPU_V7
-
-#include <asm/cp15.h>
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-#include <asm/vfp.h>
-#include "../vfp/vfpinstr.h"
-
-#include <linux/of.h>
-#include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
-
-/*
- * Common ARMv7 event types
- *
- * Note: An implementation may not be able to count all of these events
- * but the encodings are considered to be `reserved' in the case that
- * they are not available.
- */
-#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00
-#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01
-#define ARMV7_PERFCTR_ITLB_REFILL 0x02
-#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03
-#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04
-#define ARMV7_PERFCTR_DTLB_REFILL 0x05
-#define ARMV7_PERFCTR_MEM_READ 0x06
-#define ARMV7_PERFCTR_MEM_WRITE 0x07
-#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08
-#define ARMV7_PERFCTR_EXC_TAKEN 0x09
-#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A
-#define ARMV7_PERFCTR_CID_WRITE 0x0B
-
-/*
- * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
- * It counts:
- * - all (taken) branch instructions,
- * - instructions that explicitly write the PC,
- * - exception generating instructions.
- */
-#define ARMV7_PERFCTR_PC_WRITE 0x0C
-#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D
-#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E
-#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F
-#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10
-#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11
-#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12
-
-/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */
-#define ARMV7_PERFCTR_MEM_ACCESS 0x13
-#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14
-#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15
-#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16
-#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17
-#define ARMV7_PERFCTR_L2_CACHE_WB 0x18
-#define ARMV7_PERFCTR_BUS_ACCESS 0x19
-#define ARMV7_PERFCTR_MEM_ERROR 0x1A
-#define ARMV7_PERFCTR_INSTR_SPEC 0x1B
-#define ARMV7_PERFCTR_TTBR_WRITE 0x1C
-#define ARMV7_PERFCTR_BUS_CYCLES 0x1D
-
-#define ARMV7_PERFCTR_CPU_CYCLES 0xFF
-
-/* ARMv7 Cortex-A8 specific event types */
-#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43
-#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44
-#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50
-#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56
-
-/* ARMv7 Cortex-A9 specific event types */
-#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68
-#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60
-#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66
-
-/* ARMv7 Cortex-A5 specific event types */
-#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2
-#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3
-
-/* ARMv7 Cortex-A15 specific event types */
-#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
-#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
-#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42
-#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43
-
-#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C
-#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D
-
-#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50
-#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
-#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52
-#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53
-
-#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76
-
-/* ARMv7 Cortex-A12 specific event types */
-#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40
-#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41
-
-#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50
-#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51
-
-#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76
-
-#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7
-
-/* ARMv7 Krait specific event types */
-#define KRAIT_PMRESR0_GROUP0 0xcc
-#define KRAIT_PMRESR1_GROUP0 0xd0
-#define KRAIT_PMRESR2_GROUP0 0xd4
-#define KRAIT_VPMRESR0_GROUP0 0xd8
-
-#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011
-#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010
-
-#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222
-#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210
-
-/* ARMv7 Scorpion specific event types */
-#define SCORPION_LPM0_GROUP0 0x4c
-#define SCORPION_LPM1_GROUP0 0x50
-#define SCORPION_LPM2_GROUP0 0x54
-#define SCORPION_L2LPM_GROUP0 0x58
-#define SCORPION_VLPM_GROUP0 0x5c
-
-#define SCORPION_ICACHE_ACCESS 0x10053
-#define SCORPION_ICACHE_MISS 0x10052
-
-#define SCORPION_DTLB_ACCESS 0x12013
-#define SCORPION_DTLB_MISS 0x12012
-
-#define SCORPION_ITLB_MISS 0x12021
-
-/*
- * Cortex-A8 HW events mapping
- *
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE,
-};
-
-static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- /*
- * The performance counters don't differentiate between read and write
- * accesses/misses so this isn't strictly correct, but it's the best we
- * can do. Writes and reads get combined.
- */
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
-
- [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
- [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
- [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
- [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Cortex-A9 HW events mapping
- */
-static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH,
-};
-
-static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- /*
- * The performance counters don't differentiate between read and write
- * accesses/misses so this isn't strictly correct, but it's the best we
- * can do. Writes and reads get combined.
- */
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
-
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Cortex-A5 HW events mapping
- */
-static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- /*
- * The prefetch counters don't differentiate between the I side and the
- * D side.
- */
- [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
- [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Cortex-A15 HW events mapping
- */
-static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
-
- /*
- * Not all performance counters differentiate between read and write
- * accesses/misses so we're not always strictly correct, but it's the
- * best we can do. Writes and reads get combined in these cases.
- */
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
-
- [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
- [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
- [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
- [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Cortex-A7 HW events mapping
- */
-static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- /*
- * The performance counters don't differentiate between read and write
- * accesses/misses so this isn't strictly correct, but it's the best we
- * can do. Writes and reads get combined.
- */
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
-
- [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
- [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
- [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Cortex-A12 HW events mapping
- */
-static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
-
- /*
- * Not all performance counters differentiate between read and write
- * accesses/misses so we're not always strictly correct, but it's the
- * best we can do. Writes and reads get combined in these cases.
- */
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
-
- [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
- [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
- [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Krait HW events mapping
- */
-static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
-};
-
-static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
-};
-
-static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- /*
- * The performance counters don't differentiate between read and write
- * accesses/misses so this isn't strictly correct, but it's the best we
- * can do. Writes and reads get combined.
- */
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/*
- * Scorpion HW events mapping
- */
-static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
-};
-
-static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
- /*
- * The performance counters don't differentiate between read and write
- * accesses/misses so this isn't strictly correct, but it's the best we
- * can do. Writes and reads get combined.
- */
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
- /*
- * Only ITLB misses and DTLB refills are supported. If users want the
- * DTLB refills misses a raw counter must be used.
- */
- [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7");
-
-static struct attribute *armv7_pmu_format_attrs[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group armv7_pmu_format_attr_group = {
- .name = "format",
- .attrs = armv7_pmu_format_attrs,
-};
-
-#define ARMV7_EVENT_ATTR_RESOLVE(m) #m
-#define ARMV7_EVENT_ATTR(name, config) \
- PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \
- "event=" ARMV7_EVENT_ATTR_RESOLVE(config))
-
-ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR);
-ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL);
-ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL);
-ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL);
-ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS);
-ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL);
-ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ);
-ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE);
-ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED);
-ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN);
-ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED);
-ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE);
-ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE);
-ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH);
-ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN);
-ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS);
-ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED);
-ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES);
-ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED);
-
-static struct attribute *armv7_pmuv1_event_attrs[] = {
- &armv7_event_attr_sw_incr.attr.attr,
- &armv7_event_attr_l1i_cache_refill.attr.attr,
- &armv7_event_attr_l1i_tlb_refill.attr.attr,
- &armv7_event_attr_l1d_cache_refill.attr.attr,
- &armv7_event_attr_l1d_cache.attr.attr,
- &armv7_event_attr_l1d_tlb_refill.attr.attr,
- &armv7_event_attr_ld_retired.attr.attr,
- &armv7_event_attr_st_retired.attr.attr,
- &armv7_event_attr_inst_retired.attr.attr,
- &armv7_event_attr_exc_taken.attr.attr,
- &armv7_event_attr_exc_return.attr.attr,
- &armv7_event_attr_cid_write_retired.attr.attr,
- &armv7_event_attr_pc_write_retired.attr.attr,
- &armv7_event_attr_br_immed_retired.attr.attr,
- &armv7_event_attr_br_return_retired.attr.attr,
- &armv7_event_attr_unaligned_ldst_retired.attr.attr,
- &armv7_event_attr_br_mis_pred.attr.attr,
- &armv7_event_attr_cpu_cycles.attr.attr,
- &armv7_event_attr_br_pred.attr.attr,
- NULL,
-};
-
-static struct attribute_group armv7_pmuv1_events_attr_group = {
- .name = "events",
- .attrs = armv7_pmuv1_event_attrs,
-};
-
-ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS);
-ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS);
-ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB);
-ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS);
-ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL);
-ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB);
-ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS);
-ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR);
-ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC);
-ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE);
-ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES);
-
-static struct attribute *armv7_pmuv2_event_attrs[] = {
- &armv7_event_attr_sw_incr.attr.attr,
- &armv7_event_attr_l1i_cache_refill.attr.attr,
- &armv7_event_attr_l1i_tlb_refill.attr.attr,
- &armv7_event_attr_l1d_cache_refill.attr.attr,
- &armv7_event_attr_l1d_cache.attr.attr,
- &armv7_event_attr_l1d_tlb_refill.attr.attr,
- &armv7_event_attr_ld_retired.attr.attr,
- &armv7_event_attr_st_retired.attr.attr,
- &armv7_event_attr_inst_retired.attr.attr,
- &armv7_event_attr_exc_taken.attr.attr,
- &armv7_event_attr_exc_return.attr.attr,
- &armv7_event_attr_cid_write_retired.attr.attr,
- &armv7_event_attr_pc_write_retired.attr.attr,
- &armv7_event_attr_br_immed_retired.attr.attr,
- &armv7_event_attr_br_return_retired.attr.attr,
- &armv7_event_attr_unaligned_ldst_retired.attr.attr,
- &armv7_event_attr_br_mis_pred.attr.attr,
- &armv7_event_attr_cpu_cycles.attr.attr,
- &armv7_event_attr_br_pred.attr.attr,
- &armv7_event_attr_mem_access.attr.attr,
- &armv7_event_attr_l1i_cache.attr.attr,
- &armv7_event_attr_l1d_cache_wb.attr.attr,
- &armv7_event_attr_l2d_cache.attr.attr,
- &armv7_event_attr_l2d_cache_refill.attr.attr,
- &armv7_event_attr_l2d_cache_wb.attr.attr,
- &armv7_event_attr_bus_access.attr.attr,
- &armv7_event_attr_memory_error.attr.attr,
- &armv7_event_attr_inst_spec.attr.attr,
- &armv7_event_attr_ttbr_write_retired.attr.attr,
- &armv7_event_attr_bus_cycles.attr.attr,
- NULL,
-};
-
-static struct attribute_group armv7_pmuv2_events_attr_group = {
- .name = "events",
- .attrs = armv7_pmuv2_event_attrs,
-};
-
-/*
- * Perf Events' indices
- */
-#define ARMV7_IDX_CYCLE_COUNTER 0
-#define ARMV7_IDX_COUNTER0 1
-#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \
- (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
-
-#define ARMV7_MAX_COUNTERS 32
-#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1)
-
-/*
- * ARMv7 low level PMNC access
- */
-
-/*
- * Perf Event to low level counters mapping
- */
-#define ARMV7_IDX_TO_COUNTER(x) \
- (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
-
-/*
- * Per-CPU PMNC: config reg
- */
-#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
-#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
-#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
-#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
-#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
-#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
-#define ARMV7_PMNC_N_MASK 0x1f
-#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
-
-/*
- * FLAG: counters overflow flag status reg
- */
-#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */
-#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */
-
-/*
- * Event filters for PMUv2
- */
-#define ARMV7_EXCLUDE_PL1 BIT(31)
-#define ARMV7_EXCLUDE_USER BIT(30)
-#define ARMV7_INCLUDE_HYP BIT(27)
-
-/*
- * Secure debug enable reg
- */
-#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */
-
-static inline u32 armv7_pmnc_read(void)
-{
- u32 val;
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
- return val;
-}
-
-static inline void armv7_pmnc_write(u32 val)
-{
- val &= ARMV7_PMNC_MASK;
- isb();
- asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
-}
-
-static inline int armv7_pmnc_has_overflowed(u32 pmnc)
-{
- return pmnc & ARMV7_OVERFLOWED_MASK;
-}
-
-static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
-{
- return idx >= ARMV7_IDX_CYCLE_COUNTER &&
- idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu);
-}
-
-static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
-{
- return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
-}
-
-static inline void armv7_pmnc_select_counter(int idx)
-{
- u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
- isb();
-}
-
-static inline u64 armv7pmu_read_counter(struct perf_event *event)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
- u32 value = 0;
-
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU%u reading wrong counter %d\n",
- smp_processor_id(), idx);
- } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
- } else {
- armv7_pmnc_select_counter(idx);
- asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
- }
-
- return value;
-}
-
-static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU%u writing wrong counter %d\n",
- smp_processor_id(), idx);
- } else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
- asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value));
- } else {
- armv7_pmnc_select_counter(idx);
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value));
- }
-}
-
-static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
-{
- armv7_pmnc_select_counter(idx);
- val &= ARMV7_EVTYPE_MASK;
- asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
-}
-
-static inline void armv7_pmnc_enable_counter(int idx)
-{
- u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
-}
-
-static inline void armv7_pmnc_disable_counter(int idx)
-{
- u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
-}
-
-static inline void armv7_pmnc_enable_intens(int idx)
-{
- u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
-}
-
-static inline void armv7_pmnc_disable_intens(int idx)
-{
- u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
- isb();
- /* Clear the overflow flag in case an interrupt is pending. */
- asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
- isb();
-}
-
-static inline u32 armv7_pmnc_getreset_flags(void)
-{
- u32 val;
-
- /* Read */
- asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
-
- /* Write to clear flags */
- val &= ARMV7_FLAG_MASK;
- asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
-
- return val;
-}
-
-#ifdef DEBUG
-static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
-{
- u32 val;
- unsigned int cnt;
-
- pr_info("PMNC registers dump:\n");
-
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
- pr_info("PMNC =0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
- pr_info("CNTENS=0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
- pr_info("INTENS=0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
- pr_info("FLAGS =0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
- pr_info("SELECT=0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
- pr_info("CCNT =0x%08x\n", val);
-
- for (cnt = ARMV7_IDX_COUNTER0;
- cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
- armv7_pmnc_select_counter(cnt);
- asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
- pr_info("CNT[%d] count =0x%08x\n",
- ARMV7_IDX_TO_COUNTER(cnt), val);
- asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
- pr_info("CNT[%d] evtsel=0x%08x\n",
- ARMV7_IDX_TO_COUNTER(cnt), val);
- }
-}
-#endif
-
-static void armv7pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- int idx = hwc->idx;
-
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
- smp_processor_id(), idx);
- return;
- }
-
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
-
- /*
- * Disable counter
- */
- armv7_pmnc_disable_counter(idx);
-
- /*
- * Set event (if destined for PMNx counters)
- * We only need to set the event for the cycle counter if we
- * have the ability to perform event filtering.
- */
- if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
- armv7_pmnc_write_evtsel(idx, hwc->config_base);
-
- /*
- * Enable interrupt for this counter
- */
- armv7_pmnc_enable_intens(idx);
-
- /*
- * Enable counter
- */
- armv7_pmnc_enable_counter(idx);
-}
-
-static void armv7pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- int idx = hwc->idx;
-
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
- pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
- smp_processor_id(), idx);
- return;
- }
-
- /*
- * Disable counter and interrupt
- */
-
- /*
- * Disable counter
- */
- armv7_pmnc_disable_counter(idx);
-
- /*
- * Disable interrupt for this counter
- */
- armv7_pmnc_disable_intens(idx);
-}
-
-static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
-{
- u32 pmnc;
- struct perf_sample_data data;
- struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- struct pt_regs *regs;
- int idx;
-
- /*
- * Get and reset the IRQ flags
- */
- pmnc = armv7_pmnc_getreset_flags();
-
- /*
- * Did an overflow occur?
- */
- if (!armv7_pmnc_has_overflowed(pmnc))
- return IRQ_NONE;
-
- /*
- * Handle the counter(s) overflow(s)
- */
- regs = get_irq_regs();
-
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- /* Ignore if we don't have an event. */
- if (!event)
- continue;
-
- /*
- * We have a single interrupt for all counters. Check that
- * each counter has overflowed before we process it.
- */
- if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
-
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
- return IRQ_HANDLED;
-}
-
-static void armv7pmu_start(struct arm_pmu *cpu_pmu)
-{
- /* Enable all counters */
- armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-}
-
-static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
-{
- /* Disable all counters */
- armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-}
-
-static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
-
- /* Always place a cycle counter into the cycle counter. */
- if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
- if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
- return -EAGAIN;
-
- return ARMV7_IDX_CYCLE_COUNTER;
- }
-
- /*
- * For anything other than a cycle counter, try and use
- * the events counters
- */
- for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EAGAIN;
-}
-
-static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- clear_bit(event->hw.idx, cpuc->used_mask);
-}
-
-/*
- * Add an event filter to a given event. This will only work for PMUv2 PMUs.
- */
-static int armv7pmu_set_event_filter(struct hw_perf_event *event,
- struct perf_event_attr *attr)
-{
- unsigned long config_base = 0;
-
- if (attr->exclude_idle) {
- pr_debug("ARM performance counters do not support mode exclusion\n");
- return -EOPNOTSUPP;
- }
- if (attr->exclude_user)
- config_base |= ARMV7_EXCLUDE_USER;
- if (attr->exclude_kernel)
- config_base |= ARMV7_EXCLUDE_PL1;
- if (!attr->exclude_hv)
- config_base |= ARMV7_INCLUDE_HYP;
-
- /*
- * Install the filter into config_base as this is used to
- * construct the event type.
- */
- event->config_base = config_base;
-
- return 0;
-}
-
-static void armv7pmu_reset(void *info)
-{
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 idx, nb_cnt = cpu_pmu->num_events, val;
-
- if (cpu_pmu->secure_access) {
- asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
- val |= ARMV7_SDER_SUNIDEN;
- asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
- }
-
- /* The counter and interrupt enable registers are unknown at reset. */
- for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv7_pmnc_disable_counter(idx);
- armv7_pmnc_disable_intens(idx);
- }
-
- /* Initialize & Reset PMNC: C and P bits */
- armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
-}
-
-static int armv7_a8_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv7_a8_perf_map,
- &armv7_a8_perf_cache_map, 0xFF);
-}
-
-static int armv7_a9_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv7_a9_perf_map,
- &armv7_a9_perf_cache_map, 0xFF);
-}
-
-static int armv7_a5_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv7_a5_perf_map,
- &armv7_a5_perf_cache_map, 0xFF);
-}
-
-static int armv7_a15_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv7_a15_perf_map,
- &armv7_a15_perf_cache_map, 0xFF);
-}
-
-static int armv7_a7_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv7_a7_perf_map,
- &armv7_a7_perf_cache_map, 0xFF);
-}
-
-static int armv7_a12_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv7_a12_perf_map,
- &armv7_a12_perf_cache_map, 0xFF);
-}
-
-static int krait_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &krait_perf_map,
- &krait_perf_cache_map, 0xFFFFF);
-}
-
-static int krait_map_event_no_branch(struct perf_event *event)
-{
- return armpmu_map_event(event, &krait_perf_map_no_branch,
- &krait_perf_cache_map, 0xFFFFF);
-}
-
-static int scorpion_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &scorpion_perf_map,
- &scorpion_perf_cache_map, 0xFFFFF);
-}
-
-static void armv7pmu_init(struct arm_pmu *cpu_pmu)
-{
- cpu_pmu->handle_irq = armv7pmu_handle_irq;
- cpu_pmu->enable = armv7pmu_enable_event;
- cpu_pmu->disable = armv7pmu_disable_event;
- cpu_pmu->read_counter = armv7pmu_read_counter;
- cpu_pmu->write_counter = armv7pmu_write_counter;
- cpu_pmu->get_event_idx = armv7pmu_get_event_idx;
- cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx;
- cpu_pmu->start = armv7pmu_start;
- cpu_pmu->stop = armv7pmu_stop;
- cpu_pmu->reset = armv7pmu_reset;
-};
-
-static void armv7_read_num_pmnc_events(void *info)
-{
- int *nb_cnt = info;
-
- /* Read the nb of CNTx counters supported from PMNC */
- *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
-
- /* Add the CPU cycles counter */
- *nb_cnt += 1;
-}
-
-static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
-{
- return smp_call_function_any(&arm_pmu->supported_cpus,
- armv7_read_num_pmnc_events,
- &arm_pmu->num_events, 1);
-}
-
-static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a8";
- cpu_pmu->map_event = armv7_a8_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv1_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a9";
- cpu_pmu->map_event = armv7_a9_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv1_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a5";
- cpu_pmu->map_event = armv7_a5_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv1_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a15";
- cpu_pmu->map_event = armv7_a15_map_event;
- cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv2_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a7";
- cpu_pmu->map_event = armv7_a7_map_event;
- cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv2_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a12";
- cpu_pmu->map_event = armv7_a12_map_event;
- cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv2_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv7_a12_pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_cortex_a17";
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv7_pmuv2_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv7_pmu_format_attr_group;
- return ret;
-}
-
-/*
- * Krait Performance Monitor Region Event Selection Register (PMRESRn)
- *
- * 31 30 24 16 8 0
- * +--------------------------------+
- * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0
- * +--------------------------------+
- * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1
- * +--------------------------------+
- * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2
- * +--------------------------------+
- * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ?
- * +--------------------------------+
- * EN | G=3 | G=2 | G=1 | G=0
- *
- * Event Encoding:
- *
- * hwc->config_base = 0xNRCCG
- *
- * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR)
- * R = region register
- * CC = class of events the group G is choosing from
- * G = group or particular event
- *
- * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2
- *
- * A region (R) corresponds to a piece of the CPU (execution unit, instruction
- * unit, etc.) while the event code (CC) corresponds to a particular class of
- * events (interrupts for example). An event code is broken down into
- * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
- * example).
- */
-
-#define KRAIT_EVENT (1 << 16)
-#define VENUM_EVENT (2 << 16)
-#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT)
-#define PMRESRn_EN BIT(31)
-
-#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */
-#define EVENT_GROUP(event) ((event) & 0xf) /* G */
-#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */
-#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */
-#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */
-
-static u32 krait_read_pmresrn(int n)
-{
- u32 val;
-
- switch (n) {
- case 0:
- asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val));
- break;
- case 1:
- asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val));
- break;
- case 2:
- asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val));
- break;
- default:
- BUG(); /* Should be validated in krait_pmu_get_event_idx() */
- }
-
- return val;
-}
-
-static void krait_write_pmresrn(int n, u32 val)
-{
- switch (n) {
- case 0:
- asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val));
- break;
- case 1:
- asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val));
- break;
- case 2:
- asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val));
- break;
- default:
- BUG(); /* Should be validated in krait_pmu_get_event_idx() */
- }
-}
-
-static u32 venum_read_pmresr(void)
-{
- u32 val;
- asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
- return val;
-}
-
-static void venum_write_pmresr(u32 val)
-{
- asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
-}
-
-static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
-{
- u32 venum_new_val;
- u32 fp_new_val;
-
- BUG_ON(preemptible());
- /* CPACR Enable CP10 and CP11 access */
- *venum_orig_val = get_copro_access();
- venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11);
- set_copro_access(venum_new_val);
-
- /* Enable FPEXC */
- *fp_orig_val = fmrx(FPEXC);
- fp_new_val = *fp_orig_val | FPEXC_EN;
- fmxr(FPEXC, fp_new_val);
-}
-
-static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
-{
- BUG_ON(preemptible());
- /* Restore FPEXC */
- fmxr(FPEXC, fp_orig_val);
- isb();
- /* Restore CPACR */
- set_copro_access(venum_orig_val);
-}
-
-static u32 krait_get_pmresrn_event(unsigned int region)
-{
- static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0,
- KRAIT_PMRESR1_GROUP0,
- KRAIT_PMRESR2_GROUP0 };
- return pmresrn_table[region];
-}
-
-static void krait_evt_setup(int idx, u32 config_base)
-{
- u32 val;
- u32 mask;
- u32 vval, fval;
- unsigned int region = EVENT_REGION(config_base);
- unsigned int group = EVENT_GROUP(config_base);
- unsigned int code = EVENT_CODE(config_base);
- unsigned int group_shift;
- bool venum_event = EVENT_VENUM(config_base);
-
- group_shift = group * 8;
- mask = 0xff << group_shift;
-
- /* Configure evtsel for the region and group */
- if (venum_event)
- val = KRAIT_VPMRESR0_GROUP0;
- else
- val = krait_get_pmresrn_event(region);
- val += group;
- /* Mix in mode-exclusion bits */
- val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
- armv7_pmnc_write_evtsel(idx, val);
-
- if (venum_event) {
- venum_pre_pmresr(&vval, &fval);
- val = venum_read_pmresr();
- val &= ~mask;
- val |= code << group_shift;
- val |= PMRESRn_EN;
- venum_write_pmresr(val);
- venum_post_pmresr(vval, fval);
- } else {
- val = krait_read_pmresrn(region);
- val &= ~mask;
- val |= code << group_shift;
- val |= PMRESRn_EN;
- krait_write_pmresrn(region, val);
- }
-}
-
-static u32 clear_pmresrn_group(u32 val, int group)
-{
- u32 mask;
- int group_shift;
-
- group_shift = group * 8;
- mask = 0xff << group_shift;
- val &= ~mask;
-
- /* Don't clear enable bit if entire region isn't disabled */
- if (val & ~PMRESRn_EN)
- return val |= PMRESRn_EN;
-
- return 0;
-}
-
-static void krait_clearpmu(u32 config_base)
-{
- u32 val;
- u32 vval, fval;
- unsigned int region = EVENT_REGION(config_base);
- unsigned int group = EVENT_GROUP(config_base);
- bool venum_event = EVENT_VENUM(config_base);
-
- if (venum_event) {
- venum_pre_pmresr(&vval, &fval);
- val = venum_read_pmresr();
- val = clear_pmresrn_group(val, group);
- venum_write_pmresr(val);
- venum_post_pmresr(vval, fval);
- } else {
- val = krait_read_pmresrn(region);
- val = clear_pmresrn_group(val, group);
- krait_write_pmresrn(region, val);
- }
-}
-
-static void krait_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- /* Disable counter and interrupt */
-
- /* Disable counter */
- armv7_pmnc_disable_counter(idx);
-
- /*
- * Clear pmresr code (if destined for PMNx counters)
- */
- if (hwc->config_base & KRAIT_EVENT_MASK)
- krait_clearpmu(hwc->config_base);
-
- /* Disable interrupt for this counter */
- armv7_pmnc_disable_intens(idx);
-}
-
-static void krait_pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
-
- /* Disable counter */
- armv7_pmnc_disable_counter(idx);
-
- /*
- * Set event (if destined for PMNx counters)
- * We set the event for the cycle counter because we
- * have the ability to perform event filtering.
- */
- if (hwc->config_base & KRAIT_EVENT_MASK)
- krait_evt_setup(idx, hwc->config_base);
- else
- armv7_pmnc_write_evtsel(idx, hwc->config_base);
-
- /* Enable interrupt for this counter */
- armv7_pmnc_enable_intens(idx);
-
- /* Enable counter */
- armv7_pmnc_enable_counter(idx);
-}
-
-static void krait_pmu_reset(void *info)
-{
- u32 vval, fval;
- struct arm_pmu *cpu_pmu = info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
-
- armv7pmu_reset(info);
-
- /* Clear all pmresrs */
- krait_write_pmresrn(0, 0);
- krait_write_pmresrn(1, 0);
- krait_write_pmresrn(2, 0);
-
- venum_pre_pmresr(&vval, &fval);
- venum_write_pmresr(0);
- venum_post_pmresr(vval, fval);
-
- /* Reset PMxEVNCTCR to sane default */
- for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv7_pmnc_select_counter(idx);
- asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
- }
-
-}
-
-static int krait_event_to_bit(struct perf_event *event, unsigned int region,
- unsigned int group)
-{
- int bit;
- struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-
- if (hwc->config_base & VENUM_EVENT)
- bit = KRAIT_VPMRESR0_GROUP0;
- else
- bit = krait_get_pmresrn_event(region);
- bit -= krait_get_pmresrn_event(0);
- bit += group;
- /*
- * Lower bits are reserved for use by the counters (see
- * armv7pmu_get_event_idx() for more info)
- */
- bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
-
- return bit;
-}
-
-/*
- * We check for column exclusion constraints here.
- * Two events cant use the same group within a pmresr register.
- */
-static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx;
- int bit = -1;
- struct hw_perf_event *hwc = &event->hw;
- unsigned int region = EVENT_REGION(hwc->config_base);
- unsigned int code = EVENT_CODE(hwc->config_base);
- unsigned int group = EVENT_GROUP(hwc->config_base);
- bool venum_event = EVENT_VENUM(hwc->config_base);
- bool krait_event = EVENT_CPU(hwc->config_base);
-
- if (venum_event || krait_event) {
- /* Ignore invalid events */
- if (group > 3 || region > 2)
- return -EINVAL;
- if (venum_event && (code & 0xe0))
- return -EINVAL;
-
- bit = krait_event_to_bit(event, region, group);
- if (test_and_set_bit(bit, cpuc->used_mask))
- return -EAGAIN;
- }
-
- idx = armv7pmu_get_event_idx(cpuc, event);
- if (idx < 0 && bit >= 0)
- clear_bit(bit, cpuc->used_mask);
-
- return idx;
-}
-
-static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int bit;
- struct hw_perf_event *hwc = &event->hw;
- unsigned int region = EVENT_REGION(hwc->config_base);
- unsigned int group = EVENT_GROUP(hwc->config_base);
- bool venum_event = EVENT_VENUM(hwc->config_base);
- bool krait_event = EVENT_CPU(hwc->config_base);
-
- armv7pmu_clear_event_idx(cpuc, event);
- if (venum_event || krait_event) {
- bit = krait_event_to_bit(event, region, group);
- clear_bit(bit, cpuc->used_mask);
- }
-}
-
-static int krait_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_krait";
- /* Some early versions of Krait don't support PC write events */
- if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
- "qcom,no-pc-write"))
- cpu_pmu->map_event = krait_map_event_no_branch;
- else
- cpu_pmu->map_event = krait_map_event;
- cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
- cpu_pmu->reset = krait_pmu_reset;
- cpu_pmu->enable = krait_pmu_enable_event;
- cpu_pmu->disable = krait_pmu_disable_event;
- cpu_pmu->get_event_idx = krait_pmu_get_event_idx;
- cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-/*
- * Scorpion Local Performance Monitor Register (LPMn)
- *
- * 31 30 24 16 8 0
- * +--------------------------------+
- * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0
- * +--------------------------------+
- * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1
- * +--------------------------------+
- * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2
- * +--------------------------------+
- * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3
- * +--------------------------------+
- * VLPM | EN | CC | CC | CC | CC | N = 2, R = ?
- * +--------------------------------+
- * EN | G=3 | G=2 | G=1 | G=0
- *
- *
- * Event Encoding:
- *
- * hwc->config_base = 0xNRCCG
- *
- * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
- * R = region register
- * CC = class of events the group G is choosing from
- * G = group or particular event
- *
- * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
- *
- * A region (R) corresponds to a piece of the CPU (execution unit, instruction
- * unit, etc.) while the event code (CC) corresponds to a particular class of
- * events (interrupts for example). An event code is broken down into
- * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
- * example).
- */
-
-static u32 scorpion_read_pmresrn(int n)
-{
- u32 val;
-
- switch (n) {
- case 0:
- asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
- break;
- case 1:
- asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
- break;
- case 2:
- asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
- break;
- case 3:
- asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
- break;
- default:
- BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
- }
-
- return val;
-}
-
-static void scorpion_write_pmresrn(int n, u32 val)
-{
- switch (n) {
- case 0:
- asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
- break;
- case 1:
- asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
- break;
- case 2:
- asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
- break;
- case 3:
- asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
- break;
- default:
- BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
- }
-}
-
-static u32 scorpion_get_pmresrn_event(unsigned int region)
-{
- static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
- SCORPION_LPM1_GROUP0,
- SCORPION_LPM2_GROUP0,
- SCORPION_L2LPM_GROUP0 };
- return pmresrn_table[region];
-}
-
-static void scorpion_evt_setup(int idx, u32 config_base)
-{
- u32 val;
- u32 mask;
- u32 vval, fval;
- unsigned int region = EVENT_REGION(config_base);
- unsigned int group = EVENT_GROUP(config_base);
- unsigned int code = EVENT_CODE(config_base);
- unsigned int group_shift;
- bool venum_event = EVENT_VENUM(config_base);
-
- group_shift = group * 8;
- mask = 0xff << group_shift;
-
- /* Configure evtsel for the region and group */
- if (venum_event)
- val = SCORPION_VLPM_GROUP0;
- else
- val = scorpion_get_pmresrn_event(region);
- val += group;
- /* Mix in mode-exclusion bits */
- val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
- armv7_pmnc_write_evtsel(idx, val);
-
- asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
-
- if (venum_event) {
- venum_pre_pmresr(&vval, &fval);
- val = venum_read_pmresr();
- val &= ~mask;
- val |= code << group_shift;
- val |= PMRESRn_EN;
- venum_write_pmresr(val);
- venum_post_pmresr(vval, fval);
- } else {
- val = scorpion_read_pmresrn(region);
- val &= ~mask;
- val |= code << group_shift;
- val |= PMRESRn_EN;
- scorpion_write_pmresrn(region, val);
- }
-}
-
-static void scorpion_clearpmu(u32 config_base)
-{
- u32 val;
- u32 vval, fval;
- unsigned int region = EVENT_REGION(config_base);
- unsigned int group = EVENT_GROUP(config_base);
- bool venum_event = EVENT_VENUM(config_base);
-
- if (venum_event) {
- venum_pre_pmresr(&vval, &fval);
- val = venum_read_pmresr();
- val = clear_pmresrn_group(val, group);
- venum_write_pmresr(val);
- venum_post_pmresr(vval, fval);
- } else {
- val = scorpion_read_pmresrn(region);
- val = clear_pmresrn_group(val, group);
- scorpion_write_pmresrn(region, val);
- }
-}
-
-static void scorpion_pmu_disable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- /* Disable counter and interrupt */
-
- /* Disable counter */
- armv7_pmnc_disable_counter(idx);
-
- /*
- * Clear pmresr code (if destined for PMNx counters)
- */
- if (hwc->config_base & KRAIT_EVENT_MASK)
- scorpion_clearpmu(hwc->config_base);
-
- /* Disable interrupt for this counter */
- armv7_pmnc_disable_intens(idx);
-}
-
-static void scorpion_pmu_enable_event(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
-
- /* Disable counter */
- armv7_pmnc_disable_counter(idx);
-
- /*
- * Set event (if destined for PMNx counters)
- * We don't set the event for the cycle counter because we
- * don't have the ability to perform event filtering.
- */
- if (hwc->config_base & KRAIT_EVENT_MASK)
- scorpion_evt_setup(idx, hwc->config_base);
- else if (idx != ARMV7_IDX_CYCLE_COUNTER)
- armv7_pmnc_write_evtsel(idx, hwc->config_base);
-
- /* Enable interrupt for this counter */
- armv7_pmnc_enable_intens(idx);
-
- /* Enable counter */
- armv7_pmnc_enable_counter(idx);
-}
-
-static void scorpion_pmu_reset(void *info)
-{
- u32 vval, fval;
- struct arm_pmu *cpu_pmu = info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
-
- armv7pmu_reset(info);
-
- /* Clear all pmresrs */
- scorpion_write_pmresrn(0, 0);
- scorpion_write_pmresrn(1, 0);
- scorpion_write_pmresrn(2, 0);
- scorpion_write_pmresrn(3, 0);
-
- venum_pre_pmresr(&vval, &fval);
- venum_write_pmresr(0);
- venum_post_pmresr(vval, fval);
-
- /* Reset PMxEVNCTCR to sane default */
- for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv7_pmnc_select_counter(idx);
- asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
- }
-}
-
-static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
- unsigned int group)
-{
- int bit;
- struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-
- if (hwc->config_base & VENUM_EVENT)
- bit = SCORPION_VLPM_GROUP0;
- else
- bit = scorpion_get_pmresrn_event(region);
- bit -= scorpion_get_pmresrn_event(0);
- bit += group;
- /*
- * Lower bits are reserved for use by the counters (see
- * armv7pmu_get_event_idx() for more info)
- */
- bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
-
- return bit;
-}
-
-/*
- * We check for column exclusion constraints here.
- * Two events cant use the same group within a pmresr register.
- */
-static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx;
- int bit = -1;
- struct hw_perf_event *hwc = &event->hw;
- unsigned int region = EVENT_REGION(hwc->config_base);
- unsigned int group = EVENT_GROUP(hwc->config_base);
- bool venum_event = EVENT_VENUM(hwc->config_base);
- bool scorpion_event = EVENT_CPU(hwc->config_base);
-
- if (venum_event || scorpion_event) {
- /* Ignore invalid events */
- if (group > 3 || region > 3)
- return -EINVAL;
-
- bit = scorpion_event_to_bit(event, region, group);
- if (test_and_set_bit(bit, cpuc->used_mask))
- return -EAGAIN;
- }
-
- idx = armv7pmu_get_event_idx(cpuc, event);
- if (idx < 0 && bit >= 0)
- clear_bit(bit, cpuc->used_mask);
-
- return idx;
-}
-
-static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int bit;
- struct hw_perf_event *hwc = &event->hw;
- unsigned int region = EVENT_REGION(hwc->config_base);
- unsigned int group = EVENT_GROUP(hwc->config_base);
- bool venum_event = EVENT_VENUM(hwc->config_base);
- bool scorpion_event = EVENT_CPU(hwc->config_base);
-
- armv7pmu_clear_event_idx(cpuc, event);
- if (venum_event || scorpion_event) {
- bit = scorpion_event_to_bit(event, region, group);
- clear_bit(bit, cpuc->used_mask);
- }
-}
-
-static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_scorpion";
- cpu_pmu->map_event = scorpion_map_event;
- cpu_pmu->reset = scorpion_pmu_reset;
- cpu_pmu->enable = scorpion_pmu_enable_event;
- cpu_pmu->disable = scorpion_pmu_disable_event;
- cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
- cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "armv7_scorpion_mp";
- cpu_pmu->map_event = scorpion_map_event;
- cpu_pmu->reset = scorpion_pmu_reset;
- cpu_pmu->enable = scorpion_pmu_enable_event;
- cpu_pmu->disable = scorpion_pmu_disable_event;
- cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx;
- cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
- return armv7_probe_num_events(cpu_pmu);
-}
-
-static const struct of_device_id armv7_pmu_of_device_ids[] = {
- {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init},
- {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init},
- {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init},
- {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init},
- {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init},
- {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
- {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
- {.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
- {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init},
- {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init},
- {},
-};
-
-static const struct pmu_probe_info armv7_pmu_probe_table[] = {
- ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
- ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
- { /* sentinel value */ }
-};
-
-
-static int armv7_pmu_device_probe(struct platform_device *pdev)
-{
- return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids,
- armv7_pmu_probe_table);
-}
-
-static struct platform_driver armv7_pmu_driver = {
- .driver = {
- .name = "armv7-pmu",
- .of_match_table = armv7_pmu_of_device_ids,
- .suppress_bind_attrs = true,
- },
- .probe = armv7_pmu_device_probe,
-};
-
-builtin_platform_driver(armv7_pmu_driver);
-#endif /* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
deleted file mode 100644
index 7a2ba1c689a7..000000000000
--- a/arch/arm/kernel/perf_event_xscale.c
+++ /dev/null
@@ -1,748 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARMv5 [xscale] Performance counter handling code.
- *
- * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
- *
- * Based on the previous xscale OProfile code.
- *
- * There are two variants of the xscale PMU that we support:
- * - xscale1pmu: 2 event counters and a cycle counter
- * - xscale2pmu: 4 event counters and a cycle counter
- * The two variants share event definitions, but have different
- * PMU structures.
- */
-
-#ifdef CONFIG_CPU_XSCALE
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-
-#include <linux/of.h>
-#include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
-
-enum xscale_perf_types {
- XSCALE_PERFCTR_ICACHE_MISS = 0x00,
- XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
- XSCALE_PERFCTR_DATA_STALL = 0x02,
- XSCALE_PERFCTR_ITLB_MISS = 0x03,
- XSCALE_PERFCTR_DTLB_MISS = 0x04,
- XSCALE_PERFCTR_BRANCH = 0x05,
- XSCALE_PERFCTR_BRANCH_MISS = 0x06,
- XSCALE_PERFCTR_INSTRUCTION = 0x07,
- XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
- XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
- XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
- XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
- XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
- XSCALE_PERFCTR_PC_CHANGED = 0x0D,
- XSCALE_PERFCTR_BCU_REQUEST = 0x10,
- XSCALE_PERFCTR_BCU_FULL = 0x11,
- XSCALE_PERFCTR_BCU_DRAIN = 0x12,
- XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
- XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
- XSCALE_PERFCTR_RMW = 0x16,
- /* XSCALE_PERFCTR_CCNT is not hardware defined */
- XSCALE_PERFCTR_CCNT = 0xFE,
- XSCALE_PERFCTR_UNUSED = 0xFF,
-};
-
-enum xscale_counters {
- XSCALE_CYCLE_COUNTER = 0,
- XSCALE_COUNTER0,
- XSCALE_COUNTER1,
- XSCALE_COUNTER2,
- XSCALE_COUNTER3,
-};
-
-static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
- [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
- [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
-};
-
-static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
-
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
- [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
-};
-
-#define XSCALE_PMU_ENABLE 0x001
-#define XSCALE_PMN_RESET 0x002
-#define XSCALE_CCNT_RESET 0x004
-#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
-#define XSCALE_PMU_CNT64 0x008
-
-#define XSCALE1_OVERFLOWED_MASK 0x700
-#define XSCALE1_CCOUNT_OVERFLOW 0x400
-#define XSCALE1_COUNT0_OVERFLOW 0x100
-#define XSCALE1_COUNT1_OVERFLOW 0x200
-#define XSCALE1_CCOUNT_INT_EN 0x040
-#define XSCALE1_COUNT0_INT_EN 0x010
-#define XSCALE1_COUNT1_INT_EN 0x020
-#define XSCALE1_COUNT0_EVT_SHFT 12
-#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
-#define XSCALE1_COUNT1_EVT_SHFT 20
-#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
-
-static inline u32
-xscale1pmu_read_pmnc(void)
-{
- u32 val;
- asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
- return val;
-}
-
-static inline void
-xscale1pmu_write_pmnc(u32 val)
-{
- /* upper 4bits and 7, 11 are write-as-0 */
- val &= 0xffff77f;
- asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
-}
-
-static inline int
-xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
- enum xscale_counters counter)
-{
- int ret = 0;
-
- switch (counter) {
- case XSCALE_CYCLE_COUNTER:
- ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
- break;
- case XSCALE_COUNTER0:
- ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
- break;
- case XSCALE_COUNTER1:
- ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
- break;
- default:
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
- }
-
- return ret;
-}
-
-static irqreturn_t
-xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
-{
- unsigned long pmnc;
- struct perf_sample_data data;
- struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- struct pt_regs *regs;
- int idx;
-
- /*
- * NOTE: there's an A stepping erratum that states if an overflow
- * bit already exists and another occurs, the previous
- * Overflow bit gets cleared. There's no workaround.
- * Fixed in B stepping or later.
- */
- pmnc = xscale1pmu_read_pmnc();
-
- /*
- * Write the value back to clear the overflow flags. Overflow
- * flags remain in pmnc for use below. We also disable the PMU
- * while we process the interrupt.
- */
- xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
-
- if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
- return IRQ_NONE;
-
- regs = get_irq_regs();
-
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- if (!event)
- continue;
-
- if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
-
- irq_work_run();
-
- /*
- * Re-enable the PMU.
- */
- pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
- xscale1pmu_write_pmnc(pmnc);
-
- return IRQ_HANDLED;
-}
-
-static void xscale1pmu_enable_event(struct perf_event *event)
-{
- unsigned long val, mask, evt;
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- switch (idx) {
- case XSCALE_CYCLE_COUNTER:
- mask = 0;
- evt = XSCALE1_CCOUNT_INT_EN;
- break;
- case XSCALE_COUNTER0:
- mask = XSCALE1_COUNT0_EVT_MASK;
- evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
- XSCALE1_COUNT0_INT_EN;
- break;
- case XSCALE_COUNTER1:
- mask = XSCALE1_COUNT1_EVT_MASK;
- evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
- XSCALE1_COUNT1_INT_EN;
- break;
- default:
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- val = xscale1pmu_read_pmnc();
- val &= ~mask;
- val |= evt;
- xscale1pmu_write_pmnc(val);
-}
-
-static void xscale1pmu_disable_event(struct perf_event *event)
-{
- unsigned long val, mask, evt;
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- switch (idx) {
- case XSCALE_CYCLE_COUNTER:
- mask = XSCALE1_CCOUNT_INT_EN;
- evt = 0;
- break;
- case XSCALE_COUNTER0:
- mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
- evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
- break;
- case XSCALE_COUNTER1:
- mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
- evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
- break;
- default:
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- val = xscale1pmu_read_pmnc();
- val &= ~mask;
- val |= evt;
- xscale1pmu_write_pmnc(val);
-}
-
-static int
-xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
- if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
- return -EAGAIN;
-
- return XSCALE_CYCLE_COUNTER;
- } else {
- if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
- return XSCALE_COUNTER1;
-
- if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
- return XSCALE_COUNTER0;
-
- return -EAGAIN;
- }
-}
-
-static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- clear_bit(event->hw.idx, cpuc->used_mask);
-}
-
-static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
-{
- unsigned long val;
-
- val = xscale1pmu_read_pmnc();
- val |= XSCALE_PMU_ENABLE;
- xscale1pmu_write_pmnc(val);
-}
-
-static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
-{
- unsigned long val;
-
- val = xscale1pmu_read_pmnc();
- val &= ~XSCALE_PMU_ENABLE;
- xscale1pmu_write_pmnc(val);
-}
-
-static inline u64 xscale1pmu_read_counter(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int counter = hwc->idx;
- u32 val = 0;
-
- switch (counter) {
- case XSCALE_CYCLE_COUNTER:
- asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
- break;
- case XSCALE_COUNTER0:
- asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
- break;
- case XSCALE_COUNTER1:
- asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
- break;
- }
-
- return val;
-}
-
-static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val)
-{
- struct hw_perf_event *hwc = &event->hw;
- int counter = hwc->idx;
-
- switch (counter) {
- case XSCALE_CYCLE_COUNTER:
- asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
- break;
- case XSCALE_COUNTER0:
- asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
- break;
- case XSCALE_COUNTER1:
- asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
- break;
- }
-}
-
-static int xscale_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &xscale_perf_map,
- &xscale_perf_cache_map, 0xFF);
-}
-
-static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
-{
- cpu_pmu->name = "armv5_xscale1";
- cpu_pmu->handle_irq = xscale1pmu_handle_irq;
- cpu_pmu->enable = xscale1pmu_enable_event;
- cpu_pmu->disable = xscale1pmu_disable_event;
- cpu_pmu->read_counter = xscale1pmu_read_counter;
- cpu_pmu->write_counter = xscale1pmu_write_counter;
- cpu_pmu->get_event_idx = xscale1pmu_get_event_idx;
- cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
- cpu_pmu->start = xscale1pmu_start;
- cpu_pmu->stop = xscale1pmu_stop;
- cpu_pmu->map_event = xscale_map_event;
- cpu_pmu->num_events = 3;
-
- return 0;
-}
-
-#define XSCALE2_OVERFLOWED_MASK 0x01f
-#define XSCALE2_CCOUNT_OVERFLOW 0x001
-#define XSCALE2_COUNT0_OVERFLOW 0x002
-#define XSCALE2_COUNT1_OVERFLOW 0x004
-#define XSCALE2_COUNT2_OVERFLOW 0x008
-#define XSCALE2_COUNT3_OVERFLOW 0x010
-#define XSCALE2_CCOUNT_INT_EN 0x001
-#define XSCALE2_COUNT0_INT_EN 0x002
-#define XSCALE2_COUNT1_INT_EN 0x004
-#define XSCALE2_COUNT2_INT_EN 0x008
-#define XSCALE2_COUNT3_INT_EN 0x010
-#define XSCALE2_COUNT0_EVT_SHFT 0
-#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
-#define XSCALE2_COUNT1_EVT_SHFT 8
-#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
-#define XSCALE2_COUNT2_EVT_SHFT 16
-#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
-#define XSCALE2_COUNT3_EVT_SHFT 24
-#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
-
-static inline u32
-xscale2pmu_read_pmnc(void)
-{
- u32 val;
- asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
- /* bits 1-2 and 4-23 are read-unpredictable */
- return val & 0xff000009;
-}
-
-static inline void
-xscale2pmu_write_pmnc(u32 val)
-{
- /* bits 4-23 are write-as-0, 24-31 are write ignored */
- val &= 0xf;
- asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
-}
-
-static inline u32
-xscale2pmu_read_overflow_flags(void)
-{
- u32 val;
- asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
- return val;
-}
-
-static inline void
-xscale2pmu_write_overflow_flags(u32 val)
-{
- asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
-}
-
-static inline u32
-xscale2pmu_read_event_select(void)
-{
- u32 val;
- asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
- return val;
-}
-
-static inline void
-xscale2pmu_write_event_select(u32 val)
-{
- asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
-}
-
-static inline u32
-xscale2pmu_read_int_enable(void)
-{
- u32 val;
- asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
- return val;
-}
-
-static void
-xscale2pmu_write_int_enable(u32 val)
-{
- asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
-}
-
-static inline int
-xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
- enum xscale_counters counter)
-{
- int ret = 0;
-
- switch (counter) {
- case XSCALE_CYCLE_COUNTER:
- ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
- break;
- case XSCALE_COUNTER0:
- ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
- break;
- case XSCALE_COUNTER1:
- ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
- break;
- case XSCALE_COUNTER2:
- ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
- break;
- case XSCALE_COUNTER3:
- ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
- break;
- default:
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
- }
-
- return ret;
-}
-
-static irqreturn_t
-xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
-{
- unsigned long pmnc, of_flags;
- struct perf_sample_data data;
- struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- struct pt_regs *regs;
- int idx;
-
- /* Disable the PMU. */
- pmnc = xscale2pmu_read_pmnc();
- xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
-
- /* Check the overflow flag register. */
- of_flags = xscale2pmu_read_overflow_flags();
- if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
- return IRQ_NONE;
-
- /* Clear the overflow bits. */
- xscale2pmu_write_overflow_flags(of_flags);
-
- regs = get_irq_regs();
-
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- if (!event)
- continue;
-
- if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
-
- irq_work_run();
-
- /*
- * Re-enable the PMU.
- */
- pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
- xscale2pmu_write_pmnc(pmnc);
-
- return IRQ_HANDLED;
-}
-
-static void xscale2pmu_enable_event(struct perf_event *event)
-{
- unsigned long ien, evtsel;
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- ien = xscale2pmu_read_int_enable();
- evtsel = xscale2pmu_read_event_select();
-
- switch (idx) {
- case XSCALE_CYCLE_COUNTER:
- ien |= XSCALE2_CCOUNT_INT_EN;
- break;
- case XSCALE_COUNTER0:
- ien |= XSCALE2_COUNT0_INT_EN;
- evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
- evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
- break;
- case XSCALE_COUNTER1:
- ien |= XSCALE2_COUNT1_INT_EN;
- evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
- evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
- break;
- case XSCALE_COUNTER2:
- ien |= XSCALE2_COUNT2_INT_EN;
- evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
- evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
- break;
- case XSCALE_COUNTER3:
- ien |= XSCALE2_COUNT3_INT_EN;
- evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
- evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
- break;
- default:
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- xscale2pmu_write_event_select(evtsel);
- xscale2pmu_write_int_enable(ien);
-}
-
-static void xscale2pmu_disable_event(struct perf_event *event)
-{
- unsigned long ien, evtsel, of_flags;
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- ien = xscale2pmu_read_int_enable();
- evtsel = xscale2pmu_read_event_select();
-
- switch (idx) {
- case XSCALE_CYCLE_COUNTER:
- ien &= ~XSCALE2_CCOUNT_INT_EN;
- of_flags = XSCALE2_CCOUNT_OVERFLOW;
- break;
- case XSCALE_COUNTER0:
- ien &= ~XSCALE2_COUNT0_INT_EN;
- evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
- evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
- of_flags = XSCALE2_COUNT0_OVERFLOW;
- break;
- case XSCALE_COUNTER1:
- ien &= ~XSCALE2_COUNT1_INT_EN;
- evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
- evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
- of_flags = XSCALE2_COUNT1_OVERFLOW;
- break;
- case XSCALE_COUNTER2:
- ien &= ~XSCALE2_COUNT2_INT_EN;
- evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
- evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
- of_flags = XSCALE2_COUNT2_OVERFLOW;
- break;
- case XSCALE_COUNTER3:
- ien &= ~XSCALE2_COUNT3_INT_EN;
- evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
- evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
- of_flags = XSCALE2_COUNT3_OVERFLOW;
- break;
- default:
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- xscale2pmu_write_event_select(evtsel);
- xscale2pmu_write_int_enable(ien);
- xscale2pmu_write_overflow_flags(of_flags);
-}
-
-static int
-xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx = xscale1pmu_get_event_idx(cpuc, event);
- if (idx >= 0)
- goto out;
-
- if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
- idx = XSCALE_COUNTER3;
- else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
- idx = XSCALE_COUNTER2;
-out:
- return idx;
-}
-
-static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
-{
- unsigned long val;
-
- val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
- val |= XSCALE_PMU_ENABLE;
- xscale2pmu_write_pmnc(val);
-}
-
-static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
-{
- unsigned long val;
-
- val = xscale2pmu_read_pmnc();
- val &= ~XSCALE_PMU_ENABLE;
- xscale2pmu_write_pmnc(val);
-}
-
-static inline u64 xscale2pmu_read_counter(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int counter = hwc->idx;
- u32 val = 0;
-
- switch (counter) {
- case XSCALE_CYCLE_COUNTER:
- asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
- break;
- case XSCALE_COUNTER0:
- asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
- break;
- case XSCALE_COUNTER1:
- asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
- break;
- case XSCALE_COUNTER2:
- asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
- break;
- case XSCALE_COUNTER3:
- asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
- break;
- }
-
- return val;
-}
-
-static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val)
-{
- struct hw_perf_event *hwc = &event->hw;
- int counter = hwc->idx;
-
- switch (counter) {
- case XSCALE_CYCLE_COUNTER:
- asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
- break;
- case XSCALE_COUNTER0:
- asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
- break;
- case XSCALE_COUNTER1:
- asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
- break;
- case XSCALE_COUNTER2:
- asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
- break;
- case XSCALE_COUNTER3:
- asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
- break;
- }
-}
-
-static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
-{
- cpu_pmu->name = "armv5_xscale2";
- cpu_pmu->handle_irq = xscale2pmu_handle_irq;
- cpu_pmu->enable = xscale2pmu_enable_event;
- cpu_pmu->disable = xscale2pmu_disable_event;
- cpu_pmu->read_counter = xscale2pmu_read_counter;
- cpu_pmu->write_counter = xscale2pmu_write_counter;
- cpu_pmu->get_event_idx = xscale2pmu_get_event_idx;
- cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx;
- cpu_pmu->start = xscale2pmu_start;
- cpu_pmu->stop = xscale2pmu_stop;
- cpu_pmu->map_event = xscale_map_event;
- cpu_pmu->num_events = 5;
-
- return 0;
-}
-
-static const struct pmu_probe_info xscale_pmu_probe_table[] = {
- XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
- XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
- { /* sentinel value */ }
-};
-
-static int xscale_pmu_device_probe(struct platform_device *pdev)
-{
- return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
-}
-
-static struct platform_driver xscale_pmu_driver = {
- .driver = {
- .name = "xscale-pmu",
- },
- .probe = xscale_pmu_device_probe,
-};
-
-builtin_platform_driver(xscale_pmu_driver);
-#endif /* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c
index 8aa39db095d7..2c5155bd376b 100644
--- a/arch/arm/mach-davinci/pm.c
+++ b/arch/arm/mach-davinci/pm.c
@@ -61,7 +61,7 @@ static void davinci_pm_suspend(void)
/* Configure sleep count in deep sleep register */
val = __raw_readl(pm_config.deepsleep_reg);
- val &= ~DEEPSLEEP_SLEEPCOUNT_MASK,
+ val &= ~DEEPSLEEP_SLEEPCOUNT_MASK;
val |= pm_config.sleepcount;
__raw_writel(val, pm_config.deepsleep_reg);
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 2ed7d229c8f9..23c98203c40f 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# Linux system call numbers and entry vectors
#
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..79a656a62cbc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -5,6 +5,7 @@ config ARM64
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_GTDT if ACPI
+ select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU
select ACPI_IORT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if (ACPI && PCI)
@@ -381,7 +382,7 @@ config BROKEN_GAS_INST
config BUILTIN_RETURN_ADDRESS_STRIPS_PAC
bool
- # Clang's __builtin_return_adddress() strips the PAC since 12.0.0
+ # Clang's __builtin_return_address() strips the PAC since 12.0.0
# https://github.com/llvm/llvm-project/commit/2a96f47c5ffca84cd774ad402cacd137f4bf45e2
default y if CC_IS_CLANG
# GCC's __builtin_return_address() strips the PAC since 11.1.0,
@@ -1067,34 +1068,21 @@ config ARM64_ERRATUM_3117295
If unsure, say Y.
-config ARM64_WORKAROUND_SPECULATIVE_SSBS
- bool
-
config ARM64_ERRATUM_3194386
- bool "Cortex-X4: 3194386: workaround for MSR SSBS not self-synchronizing"
- select ARM64_WORKAROUND_SPECULATIVE_SSBS
+ bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing"
default y
help
- This option adds the workaround for ARM Cortex-X4 erratum 3194386.
+ This option adds the workaround for the following errata:
- On affected cores "MSR SSBS, #0" instructions may not affect
- subsequent speculative instructions, which may permit unexepected
- speculative store bypassing.
-
- Work around this problem by placing a speculation barrier after
- kernel changes to SSBS. The presence of the SSBS special-purpose
- register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such
- that userspace will use the PR_SPEC_STORE_BYPASS prctl to change
- SSBS.
-
- If unsure, say Y.
-
-config ARM64_ERRATUM_3312417
- bool "Neoverse-V3: 3312417: workaround for MSR SSBS not self-synchronizing"
- select ARM64_WORKAROUND_SPECULATIVE_SSBS
- default y
- help
- This option adds the workaround for ARM Neoverse-V3 erratum 3312417.
+ * ARM Cortex-A710 erratam 3324338
+ * ARM Cortex-A720 erratum 3456091
+ * ARM Cortex-X2 erratum 3324338
+ * ARM Cortex-X3 erratum 3324335
+ * ARM Cortex-X4 erratum 3194386
+ * ARM Cortex-X925 erratum 3324334
+ * ARM Neoverse N2 erratum 3324339
+ * ARM Neoverse V2 erratum 3324336
+ * ARM Neoverse-V3 erratum 3312417
On affected cores "MSR SSBS, #0" instructions may not affect
subsequent speculative instructions, which may permit unexepected
@@ -1108,7 +1096,6 @@ config ARM64_ERRATUM_3312417
If unsure, say Y.
-
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -1649,6 +1636,7 @@ config RODATA_FULL_DEFAULT_ENABLED
config ARM64_SW_TTBR0_PAN
bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+ depends on !KCSAN
help
Enabling this option prevents the kernel from accessing
user-space memory directly by pointing TTBR0_EL1 to a reserved
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index a52618073de2..3738a9fc2d6c 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -312,6 +312,8 @@ config ARCH_STM32
select STM32_EXTI
select ARM_SMC_MBOX
select ARM_SCMI_PROTOCOL
+ select REGULATOR
+ select REGULATOR_ARM_SCMI
select COMMON_CLK_SCMI
select STM32_FIREWALL
help
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
index c204dd43c726..ce90327e1b2e 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts
@@ -191,7 +191,7 @@
compatible = "x-powers,axp803";
reg = <0x3a3>;
interrupt-parent = <&r_intc>;
- interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_LOW>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_LOW>;
x-powers,drive-vbus-en;
vin1-supply = <&reg_vcc5v>;
diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi
index 31329cbe0976..642ca8f0236b 100644
--- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi
+++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi
@@ -1581,9 +1581,23 @@
system-cache-controller@19200000 {
compatible = "qcom,qdu1000-llcc";
- reg = <0 0x19200000 0 0xd80000>,
+ reg = <0 0x19200000 0 0x80000>,
+ <0 0x19300000 0 0x80000>,
+ <0 0x19600000 0 0x80000>,
+ <0 0x19700000 0 0x80000>,
+ <0 0x19a00000 0 0x80000>,
+ <0 0x19b00000 0 0x80000>,
+ <0 0x19e00000 0 0x80000>,
+ <0 0x19f00000 0 0x80000>,
<0 0x1a200000 0 0x80000>;
reg-names = "llcc0_base",
+ "llcc1_base",
+ "llcc2_base",
+ "llcc3_base",
+ "llcc4_base",
+ "llcc5_base",
+ "llcc6_base",
+ "llcc7_base",
"llcc_broadcast_base";
interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
index a2627ab4db9a..b98b2f7752b5 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts
@@ -977,8 +977,7 @@
reset-n-pins {
pins = "gpio99";
function = "gpio";
- output-high;
- drive-strength = <16>;
+ bias-disable;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index 40da95b202da..b27143f81867 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -676,15 +676,16 @@
status = "okay";
- /* FIXME: verify */
touchscreen@10 {
- compatible = "hid-over-i2c";
+ compatible = "elan,ekth5015m", "elan,ekth6915";
reg = <0x10>;
- hid-descr-addr = <0x1>;
interrupts-extended = <&tlmm 175 IRQ_TYPE_LEVEL_LOW>;
- vdd-supply = <&vreg_misc_3p3>;
- vddl-supply = <&vreg_s10b>;
+ reset-gpios = <&tlmm 99 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>;
+ no-reset-on-power-off;
+
+ vcc33-supply = <&vreg_misc_3p3>;
+ vccio-supply = <&vreg_misc_3p3>;
pinctrl-names = "default";
pinctrl-0 = <&ts0_default>;
@@ -1619,8 +1620,8 @@
reset-n-pins {
pins = "gpio99";
function = "gpio";
- output-high;
- drive-strength = <16>;
+ drive-strength = <2>;
+ bias-disable;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi
index aec6ca5941c2..e374733f3b85 100644
--- a/arch/arm64/boot/dts/qcom/sm6115.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi
@@ -1092,6 +1092,7 @@
power-domains = <&rpmpd SM6115_VDDCX>;
operating-points-v2 = <&sdhc1_opp_table>;
+ iommus = <&apps_smmu 0x00c0 0x0>;
interconnects = <&system_noc MASTER_SDCC_1 RPM_ALWAYS_TAG
&bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>,
<&bimc MASTER_AMPSS_M0 RPM_ALWAYS_TAG
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
index 96f51850644b..6152bcd0bc1f 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
@@ -200,7 +200,7 @@
};
codec {
- sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>;
+ sound-dai = <&wcd938x 1>, <&swr2 1>, <&lpass_txmacro 0>;
};
platform {
@@ -873,7 +873,7 @@
wcd_tx: codec@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
- qcom,tx-port-mapping = <1 1 2 3>;
+ qcom,tx-port-mapping = <2 2 3 4>;
};
};
diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi
index b7a3e6caa386..b34855956ae0 100644
--- a/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi
@@ -54,14 +54,6 @@
};
};
- usb0_vbus_otg: regulator-usb0-vbus-otg {
- compatible = "regulator-fixed";
-
- regulator-name = "USB0_VBUS_OTG";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- };
-
vccq_sdhi1: regulator-vccq-sdhi1 {
compatible = "regulator-gpio";
regulator-name = "SDHI1 VccQ";
@@ -139,6 +131,9 @@
&phyrst {
status = "okay";
+ usb0_vbus_otg: regulator-vbus {
+ regulator-name = "vbus";
+ };
};
&scif0 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
index 0b973c1a5ef9..62d18ca769a1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
@@ -5,6 +5,8 @@
*/
/dts-v1/;
+
+#include <dt-bindings/leds/common.h>
#include "rk3308.dtsi"
/ {
@@ -25,17 +27,21 @@
leds {
compatible = "gpio-leds";
pinctrl-names = "default";
- pinctrl-0 = <&green_led_gio>, <&heartbeat_led_gpio>;
+ pinctrl-0 = <&green_led>, <&heartbeat_led>;
green-led {
+ color = <LED_COLOR_ID_GREEN>;
default-state = "on";
+ function = LED_FUNCTION_POWER;
gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>;
label = "rockpis:green:power";
linux,default-trigger = "default-on";
};
blue-led {
+ color = <LED_COLOR_ID_BLUE>;
default-state = "on";
+ function = LED_FUNCTION_HEARTBEAT;
gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>;
label = "rockpis:blue:user";
linux,default-trigger = "heartbeat";
@@ -127,10 +133,12 @@
};
&emmc {
- bus-width = <4>;
cap-mmc-highspeed;
- mmc-hs200-1_8v;
+ cap-sd-highspeed;
+ no-sdio;
non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>;
vmmc-supply = <&vcc_io>;
status = "okay";
};
@@ -259,11 +267,11 @@
};
leds {
- green_led_gio: green-led-gpio {
+ green_led: green-led {
rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
};
- heartbeat_led_gpio: heartbeat-led-gpio {
+ heartbeat_led: heartbeat-led {
rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index 9996e022fef8..31c25de2d689 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -840,7 +840,7 @@
clocks = <&cru SCLK_I2S2_8CH_TX_OUT>,
<&cru SCLK_I2S2_8CH_RX_OUT>,
<&cru PCLK_ACODEC>;
- reset-names = "codec-reset";
+ reset-names = "codec";
resets = <&cru SRST_ACODEC_P>;
#sound-dai-cells = <0>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
index f09d60bbe6c4..a608a219543e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
@@ -241,8 +241,8 @@
rk805: pmic@18 {
compatible = "rockchip,rk805";
reg = <0x18>;
- interrupt-parent = <&gpio2>;
- interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
#clock-cells = <1>;
clock-output-names = "xin32k", "rk805-clkout2";
gpio-controller;
diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
index 734f87db4d11..73618df7a889 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
@@ -793,6 +793,7 @@
dma-names = "tx";
pinctrl-names = "default";
pinctrl-0 = <&spdif_tx>;
+ #sound-dai-cells = <0>;
status = "disabled";
};
@@ -804,6 +805,7 @@
clocks = <&cru SCLK_I2S_2CH>, <&cru HCLK_I2S_2CH>;
dmas = <&dmac_bus 6>, <&dmac_bus 7>;
dma-names = "tx", "rx";
+ #sound-dai-cells = <0>;
status = "disabled";
};
@@ -817,6 +819,7 @@
dma-names = "tx", "rx";
pinctrl-names = "default";
pinctrl-0 = <&i2s_8ch_bus>;
+ #sound-dai-cells = <0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index 789fd0dcc88b..3cd63d1e8f15 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -450,7 +450,7 @@ ap_i2c_audio: &i2c8 {
dlg,btn-cfg = <50>;
dlg,mic-det-thr = <500>;
dlg,jack-ins-deb = <20>;
- dlg,jack-det-rate = "32ms_64ms";
+ dlg,jack-det-rate = "32_64";
dlg,jack-rem-deb = <1>;
dlg,a-d-btn-thr = <0xa>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
index a9dd60464b3f..13e599a85eb8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
@@ -289,7 +289,7 @@
regulator-name = "vdd_gpu";
regulator-always-on;
regulator-boot-on;
- regulator-min-microvolt = <900000>;
+ regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
regulator-ramp-delay = <6001>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
index 1a604429fb26..e74871491ef5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
@@ -444,6 +444,7 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
index baeb08d665c7..e4a20cda65ed 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
@@ -435,6 +435,7 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index d0ba3bf16fc6..966bbc582d89 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -420,6 +420,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
sd-uhs-sdr104;
vmmc-supply = <&vcc_3v3_s3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
index aebe1fedd2d8..615094bb8ba3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
@@ -344,6 +344,11 @@
};
};
+&pwm0 {
+ pinctrl-0 = <&pwm0m1_pins>;
+ pinctrl-names = "default";
+};
+
&saradc {
vref-supply = <&vcc_1v8_s0>;
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index 3b2ec1d0c542..074c316a9a69 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -288,9 +288,9 @@
pinctrl-0 = <&i2c7m0_xfer>;
status = "okay";
- es8316: audio-codec@11 {
+ es8316: audio-codec@10 {
compatible = "everest,es8316";
- reg = <0x11>;
+ reg = <0x10>;
assigned-clocks = <&cru I2S0_8CH_MCLKOUT>;
assigned-clock-rates = <12288000>;
clocks = <&cru I2S0_8CH_MCLKOUT>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
index 4cc1790ebd08..03ed48246d36 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
@@ -366,6 +366,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
@@ -406,6 +407,7 @@
pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc5v0_sys>;
vcc2-supply = <&vcc5v0_sys>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 57a9abe78ee4..2c7bf4da0b80 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1036,6 +1036,7 @@ CONFIG_SND_AUDIO_GRAPH_CARD2=m
CONFIG_HID_MULTITOUCH=m
CONFIG_I2C_HID_ACPI=m
CONFIG_I2C_HID_OF=m
+CONFIG_I2C_HID_OF_ELAN=m
CONFIG_USB=y
CONFIG_USB_OTG=y
CONFIG_USB_XHCI_HCD=y
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 6792a1f83f2a..a407f9cd549e 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -119,6 +119,18 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
return acpi_cpu_get_madt_gicc(cpu)->uid;
}
+static inline int get_cpu_for_acpi_id(u32 uid)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (acpi_cpu_get_madt_gicc(cpu) &&
+ uid == get_acpi_id_for_cpu(cpu))
+ return cpu;
+
+ return -EINVAL;
+}
+
static inline void arch_fix_phys_package_id(int num, u32 slot) { }
void __init acpi_init_cpus(void);
int apei_claim_sea(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 5f172611654b..9e96f024b2f1 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -175,21 +175,6 @@ static inline bool gic_prio_masking_enabled(void)
static inline void gic_pmr_mask_irqs(void)
{
- BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF |
- GIC_PRIO_PSR_I_SET));
- BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
- /*
- * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared
- * and non-secure PMR accesses are not subject to the shifts that
- * are applied to IRQ priorities
- */
- BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON);
- /*
- * Same situation as above, but now we make sure that we can mask
- * regular interrupts.
- */
- BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS |
- GIC_PRIO_PSR_I_SET));
gic_write_pmr(GIC_PRIO_IRQOFF);
}
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 934c658ee947..f5794d50f51d 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -15,7 +15,7 @@
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/jump_label.h>
-#include <linux/smp.h>
+#include <linux/percpu.h>
#include <linux/types.h>
#include <clocksource/arm_arch_timer.h>
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index c27404fa4418..a4697a0b6835 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -6,7 +6,7 @@
#ifndef __ASM_PMUV3_H
#define __ASM_PMUV3_H
-#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index 980d1dd8e1a3..b8a5861dc7b7 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -112,6 +112,9 @@
#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
+#define _ASM_EXTABLE_KACCESS(insn, fixup) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 7529c0263933..a6e5b07b64fd 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -59,7 +59,7 @@ cpucap_is_possible(const unsigned int cap)
case ARM64_WORKAROUND_REPEAT_TLBI:
return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
case ARM64_WORKAROUND_SPECULATIVE_SSBS:
- return IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS);
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
}
return true;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8b904a757bd3..558434267271 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -588,14 +588,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
{
u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT);
- return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_EL1_AARCH32;
}
static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
{
u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT);
- return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_EL0_AARCH32;
}
static inline bool id_aa64pfr0_sve(u64 pfr0)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7b32b99023a2..1cb0704c6163 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -86,9 +86,12 @@
#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
+#define ARM_CPU_PART_CORTEX_X3 0xD4E
#define ARM_CPU_PART_NEOVERSE_V2 0xD4F
+#define ARM_CPU_PART_CORTEX_A720 0xD81
#define ARM_CPU_PART_CORTEX_X4 0xD82
#define ARM_CPU_PART_NEOVERSE_V3 0xD84
+#define ARM_CPU_PART_CORTEX_X925 0xD85
#define APM_CPU_PART_XGENE 0x000
#define APM_CPU_VAR_POTENZA 0x00
@@ -162,9 +165,12 @@
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3)
#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
+#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720)
#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4)
#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
+#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 7abf09df7033..3f482500f71f 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -121,6 +121,14 @@
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
+/* Status codes for individual page table levels */
+#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n)
+#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n)
+
+#define ESR_ELx_FSC_FAULT_nL (0x2C)
+#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \
+ ESR_ELx_FSC_FAULT) + (n))
+
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
@@ -388,20 +396,33 @@ static inline bool esr_is_data_abort(unsigned long esr)
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
{
- /* Translation fault, level -1 */
- if ((esr & ESR_ELx_FSC) == 0b101011)
- return true;
- return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_FAULT_L(3)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(2)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(1)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(0)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(-1));
}
static inline bool esr_fsc_is_permission_fault(unsigned long esr)
{
- return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_PERM_L(3)) ||
+ (esr == ESR_ELx_FSC_PERM_L(2)) ||
+ (esr == ESR_ELx_FSC_PERM_L(1)) ||
+ (esr == ESR_ELx_FSC_PERM_L(0));
}
static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
{
- return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ACCESS_L(3)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(2)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(1)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(0));
}
/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index c768d16b81a4..bd19f4c758b7 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -72,11 +72,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
unsigned long tcr = read_sysreg(tcr_el1);
- if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
+ if ((tcr & TCR_T0SZ_MASK) == t0sz)
return;
tcr &= ~TCR_T0SZ_MASK;
- tcr |= t0sz << TCR_T0SZ_OFFSET;
+ tcr |= t0sz;
write_sysreg(tcr, tcr_el1);
isb();
}
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 91fbd5c8a391..0f84518632b4 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -182,7 +182,7 @@ void mte_check_tfsr_el1(void);
static inline void mte_check_tfsr_entry(void)
{
- if (!system_supports_mte())
+ if (!kasan_hw_tags_enabled())
return;
mte_check_tfsr_el1();
@@ -190,7 +190,7 @@ static inline void mte_check_tfsr_entry(void)
static inline void mte_check_tfsr_exit(void)
{
- if (!system_supports_mte())
+ if (!kasan_hw_tags_enabled())
return;
/*
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 9943ff0af4c9..1f60aa1bc750 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -170,6 +170,7 @@
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
+#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55)))
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 47ec58031f11..0abe975d68a8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -21,35 +21,12 @@
#define INIT_PSTATE_EL2 \
(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
-/*
- * PMR values used to mask/unmask interrupts.
- *
- * GIC priority masking works as follows: if an IRQ's priority is a higher value
- * than the value held in PMR, that IRQ is masked. Lowering the value of PMR
- * means masking more IRQs (or at least that the same IRQs remain masked).
- *
- * To mask interrupts, we clear the most significant bit of PMR.
- *
- * Some code sections either automatically switch back to PSR.I or explicitly
- * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
- * in the priority mask, it indicates that PSR.I should be set and
- * interrupt disabling temporarily does not rely on IRQ priorities.
- */
-#define GIC_PRIO_IRQON 0xe0
-#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
-#define __GIC_PRIO_IRQOFF_NS 0xa0
-#define GIC_PRIO_PSR_I_SET (1 << 4)
-
-#define GIC_PRIO_IRQOFF \
- ({ \
- extern struct static_key_false gic_nonsecure_priorities;\
- u8 __prio = __GIC_PRIO_IRQOFF; \
- \
- if (static_branch_unlikely(&gic_nonsecure_priorities)) \
- __prio = __GIC_PRIO_IRQOFF_NS; \
- \
- __prio; \
- })
+#include <linux/irqchip/arm-gic-v3-prio.h>
+
+#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED
+#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ
+
+#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET
/* Additional SPSR bits not exposed in the UABI */
#define PSR_MODE_THREAD_BIT (1 << 0)
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
new file mode 100644
index 000000000000..be5915669d23
--- /dev/null
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#include <asm/cacheflush.h>
+
+/* Sigh. You can still run arm64 in BE mode */
+#include <asm/byteorder.h>
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("1:\t" \
+ "movz %0, #0xcdef\n\t" \
+ "movk %0, #0x89ab, lsl #16\n\t" \
+ "movk %0, #0x4567, lsl #32\n\t" \
+ "movk %0, #0x0123, lsl #48\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret; })
+
+#define runtime_const_shift_right_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "lsr %w0,%w1,#12\n\t" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"r" (0u+(val))); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
+static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
+{
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffe0001f;
+ insn |= (val & 0xffff) << 5;
+ *p = cpu_to_le32(insn);
+}
+
+static inline void __runtime_fixup_caches(void *where, unsigned int insns)
+{
+ unsigned long va = (unsigned long)where;
+ caches_clean_inval_pou(va, va + 4*insns);
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+ __runtime_fixup_16(p+2, val >> 32);
+ __runtime_fixup_16(p+3, val >> 48);
+ __runtime_fixup_caches(where, 4);
+}
+
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffc0ffff;
+ insn |= (val & 63) << 16;
+ *p = cpu_to_le32(insn);
+ __runtime_fixup_caches(where, 1);
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index efb13112b408..2510eec026f7 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -25,22 +25,11 @@
#ifndef __ASSEMBLY__
-#include <asm/percpu.h>
-
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
-
-/*
- * We don't use this_cpu_read(cpu_number) as that has implicit writes to
- * preempt_count, and associated (compiler) barriers, that we'd like to avoid
- * the expense of. If we're preemptible, the value can be stale at use anyway.
- * And we can't use this_cpu_ptr() either, as that winds up recursing back
- * here under CONFIG_DEBUG_PREEMPT=y.
- */
-#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number))
+#define raw_smp_processor_id() (current_thread_info()->cpu)
/*
* Logical CPU mapping.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index af3b206fa423..1b6e436dbb55 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -872,10 +872,6 @@
/* Position the attr at the correct index */
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
-/* id_aa64pfr0 */
-#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1
-#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2
-
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 14be5000c5a0..28f665e0975a 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -184,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_mem_asm(load, reg, x, addr, err, type) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_mem_asm(load, reg, x, addr, label, type) \
+ asm_goto_output( \
+ "1: " load " " reg "0, [%1]\n" \
+ _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \
+ : "=r" (x) \
+ : "r" (addr) : : label)
+#else
+#define __get_mem_asm(load, reg, x, addr, label, type) do { \
+ int __gma_err = 0; \
asm volatile( \
"1: " load " " reg "1, [%2]\n" \
"2:\n" \
_ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
- : "+r" (err), "=r" (x) \
- : "r" (addr))
+ : "+r" (__gma_err), "=r" (x) \
+ : "r" (addr)); \
+ if (__gma_err) goto label; } while (0)
+#endif
-#define __raw_get_mem(ldr, x, ptr, err, type) \
+#define __raw_get_mem(ldr, x, ptr, label, type) \
do { \
unsigned long __gu_val; \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \
break; \
case 2: \
- __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \
break; \
case 4: \
- __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \
break; \
case 8: \
- __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \
break; \
default: \
BUILD_BUG(); \
@@ -219,27 +230,34 @@ do { \
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
* we must evaluate these outside of the critical section.
*/
-#define __raw_get_user(x, ptr, err) \
+#define __raw_get_user(x, ptr, label) \
do { \
__typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
__typeof__(x) __rgu_val; \
__chk_user_ptr(ptr); \
- \
- uaccess_ttbr0_enable(); \
- __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \
- uaccess_ttbr0_disable(); \
- \
- (x) = __rgu_val; \
+ do { \
+ __label__ __rgu_failed; \
+ uaccess_ttbr0_enable(); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ (x) = __rgu_val; \
+ break; \
+ __rgu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
} while (0)
#define __get_user_error(x, ptr, err) \
do { \
+ __label__ __gu_failed; \
__typeof__(*(ptr)) __user *__p = (ptr); \
might_fault(); \
if (access_ok(__p, sizeof(*__p))) { \
__p = uaccess_mask_ptr(__p); \
- __raw_get_user((x), __p, (err)); \
+ __raw_get_user((x), __p, __gu_failed); \
} else { \
+ __gu_failed: \
(x) = (__force __typeof__(x))0; (err) = -EFAULT; \
} \
} while (0)
@@ -262,40 +280,42 @@ do { \
do { \
__typeof__(dst) __gkn_dst = (dst); \
__typeof__(src) __gkn_src = (src); \
- int __gkn_err = 0; \
- \
- __mte_enable_tco_async(); \
- __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
- (__force type *)(__gkn_src), __gkn_err, K); \
- __mte_disable_tco_async(); \
+ do { \
+ __label__ __gkn_label; \
\
- if (unlikely(__gkn_err)) \
+ __mte_enable_tco_async(); \
+ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
+ (__force type *)(__gkn_src), __gkn_label, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __gkn_label: \
+ __mte_disable_tco_async(); \
goto err_label; \
+ } while (0); \
} while (0)
-#define __put_mem_asm(store, reg, x, addr, err, type) \
- asm volatile( \
- "1: " store " " reg "1, [%2]\n" \
+#define __put_mem_asm(store, reg, x, addr, label, type) \
+ asm goto( \
+ "1: " store " " reg "0, [%1]\n" \
"2:\n" \
- _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \
- : "+r" (err) \
- : "rZ" (x), "r" (addr))
+ _ASM_EXTABLE_##type##ACCESS(1b, %l2) \
+ : : "rZ" (x), "r" (addr) : : label)
-#define __raw_put_mem(str, x, ptr, err, type) \
+#define __raw_put_mem(str, x, ptr, label, type) \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
switch (sizeof(*(ptr))) { \
case 1: \
- __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \
break; \
case 2: \
- __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \
break; \
case 4: \
- __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \
break; \
case 8: \
- __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \
break; \
default: \
BUILD_BUG(); \
@@ -307,25 +327,34 @@ do { \
* uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
* we must evaluate these outside of the critical section.
*/
-#define __raw_put_user(x, ptr, err) \
+#define __raw_put_user(x, ptr, label) \
do { \
+ __label__ __rpu_failed; \
__typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
__typeof__(*(ptr)) __rpu_val = (x); \
__chk_user_ptr(__rpu_ptr); \
\
- uaccess_ttbr0_enable(); \
- __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \
- uaccess_ttbr0_disable(); \
+ do { \
+ uaccess_ttbr0_enable(); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ break; \
+ __rpu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
} while (0)
#define __put_user_error(x, ptr, err) \
do { \
+ __label__ __pu_failed; \
__typeof__(*(ptr)) __user *__p = (ptr); \
might_fault(); \
if (access_ok(__p, sizeof(*__p))) { \
__p = uaccess_mask_ptr(__p); \
- __raw_put_user((x), __p, (err)); \
+ __raw_put_user((x), __p, __pu_failed); \
} else { \
+ __pu_failed: \
(err) = -EFAULT; \
} \
} while (0)
@@ -348,15 +377,18 @@ do { \
do { \
__typeof__(dst) __pkn_dst = (dst); \
__typeof__(src) __pkn_src = (src); \
- int __pkn_err = 0; \
\
- __mte_enable_tco_async(); \
- __raw_put_mem("str", *((type *)(__pkn_src)), \
- (__force type *)(__pkn_dst), __pkn_err, K); \
- __mte_disable_tco_async(); \
- \
- if (unlikely(__pkn_err)) \
+ do { \
+ __label__ __pkn_err; \
+ __mte_enable_tco_async(); \
+ __raw_put_mem("str", *((type *)(__pkn_src)), \
+ (__force type *)(__pkn_dst), __pkn_err, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __pkn_err: \
+ __mte_disable_tco_async(); \
goto err_label; \
+ } while (0); \
} while(0)
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
@@ -381,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
__actu_ret; \
})
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr,len)))
+ return 0;
+ uaccess_ttbr0_enable();
+ return 1;
+}
+#define user_access_begin(a,b) user_access_begin(a,b)
+#define user_access_end() uaccess_ttbr0_disable()
+#define unsafe_put_user(x, ptr, label) \
+ __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U)
+#define unsafe_get_user(x, ptr, label) \
+ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
+
+/*
+ * KCSAN uses these to save and restore ttbr state.
+ * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so
+ * they are no-ops.
+ */
+static inline unsigned long user_access_save(void) { return 0; }
+static inline void user_access_restore(unsigned long enabled) { }
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 266b96acc014..1386e8e751f2 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -840,7 +840,7 @@ __SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SYSCALL(__NR_io_pgetevents_time64, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 14251abee23c..824ca6987a51 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -27,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
}
#define prep_zero_mask(a, bits, c) (bits)
+#define create_zero_mask(bits) (bits)
+#define find_zero(bits) (__ffs(bits) >> 3)
-static inline unsigned long create_zero_mask(unsigned long bits)
+static inline unsigned long zero_bytemask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
return bits >> 7;
}
-static inline unsigned long find_zero(unsigned long mask)
-{
- return fls64(mask) >> 3;
-}
-
-#define zero_bytemask(mask) (mask)
-
#else /* __AARCH64EB__ */
#include <asm-generic/word-at-a-time.h>
#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 763824963ed1..2b112f3b7510 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -46,7 +46,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index e0e7b93c16cc..e6f66491fbe9 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -30,6 +30,7 @@
#include <linux/pgtable.h>
#include <acpi/ghes.h>
+#include <acpi/processor.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
@@ -45,6 +46,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
static bool param_acpi_off __initdata;
static bool param_acpi_on __initdata;
static bool param_acpi_force __initdata;
+static bool param_acpi_nospcr __initdata;
static int __init parse_acpi(char *arg)
{
@@ -58,6 +60,8 @@ static int __init parse_acpi(char *arg)
param_acpi_on = true;
else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
param_acpi_force = true;
+ else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */
+ param_acpi_nospcr = true;
else
return -EINVAL; /* Core will print when we return error */
@@ -237,7 +241,20 @@ done:
acpi_put_table(facs);
}
#endif
- acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
+
+ /*
+ * For varying privacy and security reasons, sometimes need
+ * to completely silence the serial console output, and only
+ * enable it when needed.
+ * But there are many existing systems that depend on this
+ * behaviour, use acpi=nospcr to disable console in ACPI SPCR
+ * table as default serial console.
+ */
+ acpi_parse_spcr(earlycon_acpi_spcr_enable,
+ !param_acpi_nospcr);
+ pr_info("Use ACPI SPCR as default console: %s\n",
+ param_acpi_nospcr ? "No" : "Yes");
+
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
}
@@ -423,107 +440,23 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
memblock_mark_nomap(addr, size);
}
-#ifdef CONFIG_ACPI_FFH
-/*
- * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as
- * specified in https://developer.arm.com/docs/den0048/latest
- */
-struct acpi_ffh_data {
- struct acpi_ffh_info info;
- void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- unsigned long a4, unsigned long a5,
- unsigned long a6, unsigned long a7,
- struct arm_smccc_res *args,
- struct arm_smccc_quirk *res);
- void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args,
- struct arm_smccc_1_2_regs *res);
-};
-
-int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id,
+ int *pcpu)
{
- enum arm_smccc_conduit conduit;
- struct acpi_ffh_data *ffh_ctxt;
-
- if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
- return -EOPNOTSUPP;
-
- conduit = arm_smccc_1_1_get_conduit();
- if (conduit == SMCCC_CONDUIT_NONE) {
- pr_err("%s: invalid SMCCC conduit\n", __func__);
- return -EOPNOTSUPP;
+ /* If an error code is passed in this stub can't fix it */
+ if (*pcpu < 0) {
+ pr_warn_once("Unable to map CPU to valid ID\n");
+ return *pcpu;
}
- ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
- if (!ffh_ctxt)
- return -ENOMEM;
-
- if (conduit == SMCCC_CONDUIT_SMC) {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
- } else {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc;
- }
-
- memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info));
-
- *region_ctxt = ffh_ctxt;
- return AE_OK;
-}
-
-static bool acpi_ffh_smccc_owner_allowed(u32 fid)
-{
- int owner = ARM_SMCCC_OWNER_NUM(fid);
-
- if (owner == ARM_SMCCC_OWNER_STANDARD ||
- owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM)
- return true;
-
- return false;
+ return 0;
}
+EXPORT_SYMBOL(acpi_map_cpu);
-int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context)
+int acpi_unmap_cpu(int cpu)
{
- int ret = 0;
- struct acpi_ffh_data *ffh_ctxt = region_context;
-
- if (ffh_ctxt->info.offset == 0) {
- /* SMC/HVC 32bit call */
- struct arm_smccc_res res;
- u32 a[8] = { 0 }, *ptr = (u32 *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) ||
- !acpi_ffh_smccc_owner_allowed(*ptr) ||
- ffh_ctxt->info.length > 32) {
- ret = AE_ERROR;
- } else {
- int idx, len = ffh_ctxt->info.length >> 2;
-
- for (idx = 0; idx < len; idx++)
- a[idx] = *(ptr + idx);
-
- ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4],
- a[5], a[6], a[7], &res, NULL);
- memcpy(value, &res, sizeof(res));
- }
-
- } else if (ffh_ctxt->info.offset == 1) {
- /* SMC/HVC 64bit call */
- struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) ||
- !acpi_ffh_smccc_owner_allowed(r->a0) ||
- ffh_ctxt->info.length > sizeof(*r)) {
- ret = AE_ERROR;
- } else {
- ffh_ctxt->invoke_ffh64_fn(r, r);
- memcpy(value, r, ffh_ctxt->info.length);
- }
- } else {
- ret = AE_ERROR;
- }
-
- return ret;
+ return 0;
}
-#endif /* CONFIG_ACPI_FFH */
+EXPORT_SYMBOL(acpi_unmap_cpu);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index e51535a5f939..0c036a9a3c33 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu)
return acpi_early_node_map[cpu];
}
-static inline int get_cpu_for_acpi_id(u32 uid)
-{
- int cpu;
-
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- if (uid == get_acpi_id_for_cpu(cpu))
- return cpu;
-
- return -EINVAL;
-}
-
static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
const unsigned long end)
{
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 828be635e7e1..617424b73f8c 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -432,14 +432,17 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = {
};
#endif
-#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS
-static const struct midr_range erratum_spec_ssbs_list[] = {
#ifdef CONFIG_ARM64_ERRATUM_3194386
+static const struct midr_range erratum_spec_ssbs_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X4),
-#endif
-#ifdef CONFIG_ARM64_ERRATUM_3312417
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
-#endif
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
{}
};
#endif
@@ -741,9 +744,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
},
#endif
-#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS
+#ifdef CONFIG_ARM64_ERRATUM_3194386
{
- .desc = "ARM errata 3194386, 3312417",
+ .desc = "SSBS not fully self-synchronizing",
.capability = ARM64_WORKAROUND_SPECULATIVE_SSBS,
ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
},
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 48e7029f1054..646ecd3069fd 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -285,8 +285,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP),
ARM64_FTR_END,
};
@@ -429,6 +429,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
deleted file mode 100644
index f372295207fb..000000000000
--- a/arch/arm64/kernel/cpuidle.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ARM64 CPU idle arch support
- *
- * Copyright (C) 2014 ARM Ltd.
- * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
- */
-
-#include <linux/acpi.h>
-#include <linux/cpuidle.h>
-#include <linux/cpu_pm.h>
-#include <linux/psci.h>
-
-#ifdef CONFIG_ACPI_PROCESSOR_IDLE
-
-#include <acpi/processor.h>
-
-#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
-
-static int psci_acpi_cpu_init_idle(unsigned int cpu)
-{
- int i, count;
- struct acpi_lpi_state *lpi;
- struct acpi_processor *pr = per_cpu(processors, cpu);
-
- if (unlikely(!pr || !pr->flags.has_lpi))
- return -EINVAL;
-
- /*
- * If the PSCI cpu_suspend function hook has not been initialized
- * idle states must not be enabled, so bail out
- */
- if (!psci_ops.cpu_suspend)
- return -EOPNOTSUPP;
-
- count = pr->power.count - 1;
- if (count <= 0)
- return -ENODEV;
-
- for (i = 0; i < count; i++) {
- u32 state;
-
- lpi = &pr->power.lpi_states[i + 1];
- /*
- * Only bits[31:0] represent a PSCI power_state while
- * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
- */
- state = lpi->address;
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-int acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
- return psci_acpi_cpu_init_idle(cpu);
-}
-
-__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
-{
- u32 state = lpi->address;
-
- if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
- return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
- else
- return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
-}
-#endif
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index ba4f8f7d6a91..8f5422ed1b75 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -105,11 +105,6 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-/* Static key checked in GIC_PRIO_IRQOFF. */
-KVM_NVHE_ALIAS(gic_nonsecure_priorities);
-#endif
-
/* EL2 exception handling */
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index dcdcccd40891..6174671be7c1 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -582,12 +582,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl);
size_t mte_probe_user_range(const char __user *uaddr, size_t size)
{
const char __user *end = uaddr + size;
- int err = 0;
char val;
- __raw_get_user(val, uaddr, err);
- if (err)
- return size;
+ __raw_get_user(val, uaddr, efault);
uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
while (uaddr < end) {
@@ -595,12 +592,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size)
* A read is sufficient for mte, the caller should have probed
* for the pte write permission if required.
*/
- __raw_get_user(val, uaddr, err);
- if (err)
- return end - uaddr;
+ __raw_get_user(val, uaddr, efault);
uaddr += MTE_GRANULE_SIZE;
}
(void)val;
return 0;
+
+efault:
+ return end - uaddr;
}
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index 5fa08e13e17e..f374a3e5a5fe 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -173,7 +173,7 @@ static void __init remap_idmap_for_lpa2(void)
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
- (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
+ (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index baca47bd443c..da53722f95d4 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -567,7 +567,7 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
* Mitigate this with an unconditional speculation barrier, as CPUs
* could mis-speculate branches and bypass a conditional barrier.
*/
- if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS))
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386))
spec_bar();
return SPECTRE_MITIGATED;
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 29a8e444db83..fabd732d0a2d 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
{
phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry);
int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry);
- if (err)
+ if (err && err != -EPERM)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
return err;
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index 99f2ffe9fc05..5b0891146054 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void)
module_init(reloc_test_init);
module_exit(reloc_test_exit);
+MODULE_DESCRIPTION("Relocation testing module");
MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 31c8b3094dd7..5e18fbcee9a2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -55,9 +55,6 @@
#include <trace/events/ipi.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -132,7 +129,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
if (ret) {
- pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+ if (ret != -EPERM)
+ pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
@@ -510,6 +508,59 @@ static int __init smp_cpu_setup(int cpu)
static bool bootcpu_valid __initdata;
static unsigned int cpu_count = 1;
+int arch_register_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+ if (!acpi_disabled && !acpi_handle &&
+ IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU))
+ return -EPROBE_DEFER;
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+ /* For now block anything that looks like physical CPU Hotplug */
+ if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return -ENODEV;
+ }
+#endif
+
+ /*
+ * Availability of the acpi handle is sufficient to establish
+ * that _STA has aleady been checked. No need to recheck here.
+ */
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
+
+ return register_cpu(c, cpu);
+}
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+void arch_unregister_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ acpi_status status;
+ unsigned long long sta;
+
+ if (!acpi_handle) {
+ pr_err_once("Removing a CPU without associated ACPI handle\n");
+ return;
+ }
+
+ status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta);
+ if (ACPI_FAILURE(status))
+ return;
+
+ /* For now do not allow anything that looks like physical CPU HP */
+ if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return;
+ }
+
+ unregister_cpu(c);
+}
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
#ifdef CONFIG_ACPI
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
@@ -530,7 +581,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
- if (!acpi_gicc_is_usable(processor)) {
+ if (!(processor->flags &
+ (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@@ -749,8 +801,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
- per_cpu(cpu_number, cpu) = cpu;
-
if (cpu == smp_processor_id())
continue;
@@ -767,13 +817,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-static const char *ipi_types[NR_IPI] __tracepoint_string = {
+static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
[IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
+ [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
@@ -784,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
{
unsigned int cpu, i;
- for (i = 0; i < NR_IPI; i++) {
+ for (i = 0; i < MAX_IPI; i++) {
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
@@ -1028,12 +1080,12 @@ void __init set_smp_ipi_range(int ipi_base, int n)
if (ipi_should_be_nmi(i)) {
err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
+ "IPI", &irq_stat);
WARN(err, "Could not request IPI %d as NMI, err=%d\n",
i, err);
} else {
err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
+ "IPI", &irq_stat);
WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
i, err);
}
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index ad198262b981..7230f6e20ab8 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
syscall_set_return_value(current, regs, 0, ret);
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for arm64 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 9 bits.
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte aligned SP at function boundaries, which will remove the
+ * 4 low bits from any entropy chosen here.
*
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints: the AAPCS mandates a
- * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
- *
- * The resulting 5 bits of entropy is seen in SP[8:4].
+ * The resulting 6 bits of entropy is seen in SP[9:4].
*/
- choose_random_kstack_offset(get_random_u16() & 0x1FF);
+ choose_random_kstack_offset(get_random_u16());
}
static inline bool has_syscall_work(unsigned long flags)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 755a22d4f840..55a8e310ea12 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -264,6 +264,9 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index 51f043649146..f957890c7e38 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -52,11 +52,11 @@
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
+ SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \
)
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 95cf18574251..187a5f4d56c0 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -33,9 +33,9 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
/* Protected KVM does not support AArch32 guests. */
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP);
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP);
/*
* Linux guests assume support for floating-point and Advanced SIMD. Do
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index edd969a1f36b..2860548d4250 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -276,7 +276,7 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
* of AArch32 feature id registers.
*/
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_EL1_IMP);
return pvm_access_raz_wi(vcpu, p, r);
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index a35ce10e0a9f..d1a476b08f54 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -14,7 +14,6 @@
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
-#include <asm/arm_pmuv3.h>
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c927e9312f10..353ea5dc32b8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -124,7 +124,8 @@ bool pgattr_change_is_safe(u64 old, u64 new)
* The following mapping attributes may be updated in live
* kernel mappings without the need for break-before-make.
*/
- pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
+ pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG |
+ PTE_SWBITS_MASK;
/* creating or taking down mappings is always safe */
if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index 7ff6a2466af1..e0594b6370a6 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -6,6 +6,7 @@
#define __ARCH_WANT_SYS_CLONE3
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYNC_FILE_RANGE2
#include <asm-generic/unistd.h>
#define __NR_set_thread_area (__NR_arch_specific_syscall + 0)
diff --git a/arch/csky/kernel/syscall.c b/arch/csky/kernel/syscall.c
index 3d30e58a45d2..4540a271ee39 100644
--- a/arch/csky/kernel/syscall.c
+++ b/arch/csky/kernel/syscall.c
@@ -20,7 +20,7 @@ SYSCALL_DEFINE6(mmap2,
unsigned long, prot,
unsigned long, flags,
unsigned long, fd,
- off_t, offset)
+ unsigned long, offset)
{
if (unlikely(offset & (~PAGE_MASK >> 12)))
return -EINVAL;
diff --git a/arch/hexagon/include/asm/syscalls.h b/arch/hexagon/include/asm/syscalls.h
new file mode 100644
index 000000000000..40f2d08bec92
--- /dev/null
+++ b/arch/hexagon/include/asm/syscalls.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/syscalls.h>
+
+asmlinkage long sys_hexagon_fadvise64_64(int fd, int advice,
+ u32 a2, u32 a3, u32 a4, u32 a5);
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 432c4db1b623..21ae22306b5d 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -36,5 +36,6 @@
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYNC_FILE_RANGE2
#include <asm-generic/unistd.h>
diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c
index 0fadd582cfc7..5d98bdc494ec 100644
--- a/arch/hexagon/kernel/syscalltab.c
+++ b/arch/hexagon/kernel/syscalltab.c
@@ -14,6 +14,13 @@
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
+SYSCALL_DEFINE6(hexagon_fadvise64_64, int, fd, int, advice,
+ SC_ARG64(offset), SC_ARG64(len))
+{
+ return ksys_fadvise64_64(fd, SC_VAL64(loff_t, offset), SC_VAL64(loff_t, len), advice);
+}
+#define sys_fadvise64_64 sys_hexagon_fadvise64_64
+
void *sys_call_table[__NR_syscalls] = {
#include <asm/unistd.h>
};
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index b4c5acd7aa3b..8801611143ab 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -22,7 +22,7 @@
#define __SYSCALL(nr, call) [nr] = (call),
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long,
- prot, unsigned long, flags, unsigned long, fd, off_t, offset)
+ prot, unsigned long, flags, unsigned long, fd, unsigned long, offset)
{
if (offset & ~PAGE_MASK)
return -EINVAL;
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index d4b170c861bf..0147130dc34e 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -180,6 +180,15 @@ int __init amiga_parse_bootinfo(const struct bi_record *record)
dev->slotsize = be16_to_cpu(cd->cd_SlotSize);
dev->boardaddr = be32_to_cpu(cd->cd_BoardAddr);
dev->boardsize = be32_to_cpu(cd->cd_BoardSize);
+
+ /* CS-LAB Warp 1260 workaround */
+ if (be16_to_cpu(dev->rom.er_Manufacturer) == ZORRO_MANUF(ZORRO_PROD_CSLAB_WARP_1260) &&
+ dev->rom.er_Product == ZORRO_PROD(ZORRO_PROD_CSLAB_WARP_1260)) {
+
+ /* turn off all interrupts */
+ pr_info("Warp 1260 card detected: applying interrupt storm workaround\n");
+ *(uint32_t *)(dev->boardaddr + 0x1000) = 0xfff;
+ }
} else
pr_warn("amiga_parse_bootinfo: too many AutoConfig devices\n");
#endif /* CONFIG_ZORRO */
diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c
index 23256434191c..0465444ceb21 100644
--- a/arch/m68k/atari/ataints.c
+++ b/arch/m68k/atari/ataints.c
@@ -301,11 +301,7 @@ void __init atari_init_IRQ(void)
if (ATARIHW_PRESENT(SCU)) {
/* init the SCU if present */
- tt_scu.sys_mask = 0x10; /* enable VBL (for the cursor) and
- * disable HSYNC interrupts (who
- * needs them?) MFP and SCC are
- * enabled in VME mask
- */
+ tt_scu.sys_mask = 0x0; /* disable all interrupts */
tt_scu.vme_mask = 0x60; /* enable MFP and SCC ints */
} else {
/* If no SCU and no Hades, the HSYNC interrupt needs to be
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 6bb202d03d34..9a084943a68a 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -373,6 +373,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 3598e0cc66b0..58ec725bf392 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -353,6 +353,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 3fbb8a9a307d..63ab7e892e59 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -368,6 +368,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 9a2b0c7871ce..ca40744eec60 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -351,6 +351,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 2408785e0e48..77bcc6faf468 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -352,6 +352,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 6789d03b7b52..e73d4032b659 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -364,6 +364,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index b57af39db3b4..638df8442c98 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -407,6 +407,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 29b70f27aedd..2248db426081 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -350,6 +350,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 757c582ffe84..2975b66521f6 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -351,6 +351,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index f15d1009108a..0a0e32344033 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -357,6 +357,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 5218c1e614b5..f16f1a99c2ba 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -347,6 +347,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index acdf4eb7af28..667bd61ae9b3 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -348,6 +348,7 @@ CONFIG_VXLAN=m
CONFIG_GENEVE=m
CONFIG_BAREUDP=m
CONFIG_GTP=m
+CONFIG_PFCP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 642fb80c5c4e..83410f8184ec 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -71,7 +71,7 @@ static void nfhd_submit_bio(struct bio *bio)
len = bvec.bv_len;
len >>= 9;
nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
- page_to_phys(bvec.bv_page) + bvec.bv_offset);
+ bvec_phys(&bvec));
sec += len;
}
bio_endio(bio);
@@ -98,6 +98,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
{
struct queue_limits lim = {
.logical_block_size = bsize,
+ .features = BLK_FEAT_ROTATIONAL,
};
struct nfhd_device *dev;
int dev_id = id - NFHD_DEV_OFFSET;
@@ -193,4 +194,5 @@ static void __exit nfhd_exit(void)
module_init(nfhd_init);
module_exit(nfhd_exit);
+MODULE_DESCRIPTION("Atari NatFeat block device driver");
MODULE_LICENSE("GPL");
diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c
index 17b2987c2bf5..d41260672e24 100644
--- a/arch/m68k/emu/nfcon.c
+++ b/arch/m68k/emu/nfcon.c
@@ -173,4 +173,5 @@ static void __exit nfcon_exit(void)
module_init(nfcon_init);
module_exit(nfcon_exit);
+MODULE_DESCRIPTION("Atari NatFeat console driver");
MODULE_LICENSE("GPL");
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index d7f3de9c5d6f..4ba14f3535fc 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -32,7 +32,7 @@ static inline unsigned long __arch_xchg(unsigned long x, volatile void * ptr, in
x = tmp;
break;
default:
- tmp = __invalid_xchg_size(x, ptr, size);
+ x = __invalid_xchg_size(x, ptr, size);
break;
}
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index ed9f34da1a2a..0850b099f300 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -35,7 +35,7 @@
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags, unsigned long, fd,
- off_t, pgoff)
+ unsigned long, pgoff)
{
if (pgoff & ~PAGE_MASK)
return -EINVAL;
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cc869f5d5693..953f5b7dc723 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -354,7 +354,7 @@
412 n32 utimensat_time64 sys_utimensat
413 n32 pselect6_time64 compat_sys_pselect6_time64
414 n32 ppoll_time64 compat_sys_ppoll_time64
-416 n32 io_pgetevents_time64 sys_io_pgetevents
+416 n32 io_pgetevents_time64 compat_sys_io_pgetevents_time64
417 n32 recvmmsg_time64 compat_sys_recvmmsg_time64
418 n32 mq_timedsend_time64 sys_mq_timedsend
419 n32 mq_timedreceive_time64 sys_mq_timedreceive
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 81428a2eb660..2439a2491cff 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -403,7 +403,7 @@
412 o32 utimensat_time64 sys_utimensat sys_utimensat
413 o32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 o32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 o32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 o32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 o32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 o32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 o32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index daafeb20f993..dc9b902de8ea 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
select ARCH_HAS_UBSAN
select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN
+ select ARCH_SPLIT_ARG64 if !64BIT
select ARCH_SUPPORTS_HUGETLBFS if PA20
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_STACKWALK
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 2a12a547b447..826c8e51b585 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -23,12 +23,3 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23,
current->comm, current->pid, r20);
return -ENOSYS;
}
-
-asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags,
- compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd,
- const char __user * pathname)
-{
- return sys_fanotify_mark(fanotify_fd, flags,
- ((__u64)mask1 << 32) | mask0,
- dfd, pathname);
-}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b13c21373974..66dc406b12e4 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -108,7 +108,7 @@
95 common fchown sys_fchown
96 common getpriority sys_getpriority
97 common setpriority sys_setpriority
-98 common recv sys_recv
+98 common recv sys_recv compat_sys_recv
99 common statfs sys_statfs compat_sys_statfs
100 common fstatfs sys_fstatfs compat_sys_fstatfs
101 common stat64 sys_stat64
@@ -135,7 +135,7 @@
120 common clone sys_clone_wrapper
121 common setdomainname sys_setdomainname
122 common sendfile sys_sendfile compat_sys_sendfile
-123 common recvfrom sys_recvfrom
+123 common recvfrom sys_recvfrom compat_sys_recvfrom
124 32 adjtimex sys_adjtimex_time32
124 64 adjtimex sys_adjtimex
125 common mprotect sys_mprotect
@@ -364,7 +364,7 @@
320 common accept4 sys_accept4
321 common prlimit64 sys_prlimit64
322 common fanotify_init sys_fanotify_init
-323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark
+323 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
324 32 clock_adjtime sys_clock_adjtime32
324 64 clock_adjtime sys_clock_adjtime
325 common name_to_handle_at sys_name_to_handle_at
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d1030bc52564..d283d281d28e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
struct eeh_dev *edev;
struct pci_dev *pdev;
+ struct pci_bus *bus = NULL;
if (pe->type & EEH_PE_PHB)
return pe->phb->bus;
@@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
/* Retrieve the parent PCI bus of first (top) PCI device */
edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+ pci_lock_rescan_remove();
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
- return pdev->bus;
+ bus = pdev->bus;
+ pci_unlock_rescan_remove();
- return NULL;
+ return bus;
}
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4690c219bfa4..63432a33ec49 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -647,8 +647,9 @@ __after_prom_start:
* Note: This process overwrites the OF exception vectors.
*/
LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
- mr. r4,r26 /* In some cases the loader may */
- beq 9f /* have already put us at zero */
+ mr r4,r26 /* Load the virtual source address into r4 */
+ cmpld r3,r4 /* Check if source == dest */
+ beq 9f /* If so skip the copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 3656f1ca7a21..ebae8415dfbb 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -230,8 +230,10 @@
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
179 64 pread64 sys_pread64
+179 spu pread64 sys_pread64
180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
180 64 pwrite64 sys_pwrite64
+180 spu pwrite64 sys_pwrite64
181 common chown sys_chown
182 common getcwd sys_getcwd
183 common capget sys_capget
@@ -246,6 +248,7 @@
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
191 64 readahead sys_readahead
+191 spu readahead sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
@@ -293,6 +296,7 @@
232 nospu set_tid_address sys_set_tid_address
233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
233 64 fadvise64 sys_fadvise64
+233 spu fadvise64 sys_fadvise64
234 nospu exit_group sys_exit_group
235 nospu lookup_dcookie sys_ni_syscall
236 common epoll_create sys_epoll_create
@@ -502,7 +506,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 85050be08a23..72b12bc10f90 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -27,6 +27,7 @@
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
+#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/svm.h>
@@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
+#ifdef CONFIG_PPC_PSERIES
+ /*
+ * This must be done after other CPUs have shut down, otherwise they
+ * could execute the 'scv' instruction, which is not supported with
+ * reloc disabled (see configure_exceptions()).
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+#endif
+
printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 096d09ed89f6..431be156ca9b 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
} else
xics_kexec_teardown_cpu(secondary);
}
-
-void pseries_machine_kexec(struct kimage *image)
-{
- if (firmware_has_feature(FW_FEATURE_SET_MODE))
- pseries_disable_reloc_on_exc();
-
- default_machine_kexec(image);
-}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index bba4ad192b0f..3968a6970fa8 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { }
#endif
extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
-void pseries_machine_kexec(struct kimage *image);
extern void pSeries_final_fixup(void);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 284a6fa04b0c..b10a25325238 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void)
{
void (*ctor)(void *) = get_dtl_cache_ctor();
- dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, ctor);
+ dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -1159,7 +1159,6 @@ define_machine(pseries) {
.machine_check_exception = pSeries_machine_check_exception,
.machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
- .machine_kexec = pseries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
index 8df4cf3656f2..a7d753b6fdfd 100644
--- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -15,6 +15,10 @@
model = "Kendryte KD233";
compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -46,7 +50,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -118,6 +121,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&spi0 {
@@ -125,6 +129,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "canaan,kd233-tft", "ilitek,ili9341";
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index f87c5164d9cf..4f5d40fa1e77 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -16,13 +16,6 @@
#size-cells = <1>;
compatible = "canaan,kendryte-k210";
- aliases {
- serial0 = &uarths0;
- serial1 = &uart1;
- serial2 = &uart2;
- serial3 = &uart3;
- };
-
/*
* The K210 has an sv39 MMU following the privileged specification v1.9.
* Since this is a non-ratified draft specification, the kernel does not
@@ -137,6 +130,7 @@
reg = <0x38000000 0x1000>;
interrupts = <33>;
clocks = <&sysclk K210_CLK_CPU>;
+ status = "disabled";
};
gpio0: gpio-controller@38001000 {
@@ -152,6 +146,7 @@
<62>, <63>, <64>, <65>;
gpio-controller;
ngpios = <32>;
+ status = "disabled";
};
dmac0: dma-controller@50000000 {
@@ -187,6 +182,7 @@
<&sysclk K210_CLK_GPIO>;
clock-names = "bus", "db";
resets = <&sysrst K210_RST_GPIO>;
+ status = "disabled";
gpio1_0: gpio-port@0 {
#gpio-cells = <2>;
@@ -214,6 +210,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
uart2: serial@50220000 {
@@ -230,6 +227,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
uart3: serial@50230000 {
@@ -246,6 +244,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
spi2: spi@50240000 {
@@ -259,6 +258,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ssi_clk", "pclk";
resets = <&sysrst K210_RST_SPI2>;
+ status = "disabled";
};
i2s0: i2s@50250000 {
@@ -268,6 +268,7 @@
clocks = <&sysclk K210_CLK_I2S0>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S0>;
+ status = "disabled";
};
i2s1: i2s@50260000 {
@@ -277,6 +278,7 @@
clocks = <&sysclk K210_CLK_I2S1>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S1>;
+ status = "disabled";
};
i2s2: i2s@50270000 {
@@ -286,6 +288,7 @@
clocks = <&sysclk K210_CLK_I2S2>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S2>;
+ status = "disabled";
};
i2c0: i2c@50280000 {
@@ -296,6 +299,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C0>;
+ status = "disabled";
};
i2c1: i2c@50290000 {
@@ -306,6 +310,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C1>;
+ status = "disabled";
};
i2c2: i2c@502a0000 {
@@ -316,6 +321,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C2>;
+ status = "disabled";
};
fpioa: pinmux@502b0000 {
@@ -464,6 +470,7 @@
reset-names = "spi";
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
spi1: spi@53000000 {
@@ -479,6 +486,7 @@
reset-names = "spi";
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
spi3: spi@54000000 {
@@ -495,6 +503,7 @@
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
};
};
diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts
index 396c8ca4d24d..5734cc03753b 100644
--- a/arch/riscv/boot/dts/canaan/k210_generic.dts
+++ b/arch/riscv/boot/dts/canaan/k210_generic.dts
@@ -15,6 +15,10 @@
model = "Kendryte K210 generic";
compatible = "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -24,7 +28,6 @@
&fpioa {
pinctrl-0 = <&jtag_pins>;
pinctrl-names = "default";
- status = "okay";
jtag_pins: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index 6d25bf07481a..2ab376d609d2 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -17,6 +17,10 @@
compatible = "sipeed,maix-bit", "sipeed,maix-bitm",
"canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -58,7 +62,6 @@
&fpioa {
pinctrl-names = "default";
pinctrl-0 = <&jtag_pinctrl>;
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -156,6 +159,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -170,6 +174,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -199,6 +204,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index f4f4d8d5e8b8..d98e20775c07 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -17,6 +17,10 @@
compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w",
"canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -63,7 +67,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -159,6 +162,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -173,6 +177,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 0d86df47e1ed..79ecd549700a 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -16,6 +16,10 @@
model = "SiPeed MAIX GO";
compatible = "sipeed,maix-go", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -69,7 +73,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -167,6 +170,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -181,6 +185,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -209,6 +214,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index 5c05c498e2b8..019c03ae51f6 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -15,6 +15,10 @@
model = "SiPeed MAIXDUINO";
compatible = "sipeed,maixduino", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -39,8 +43,6 @@
};
&fpioa {
- status = "okay";
-
uarths_pinctrl: uarths-pinmux {
pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */
<K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */
@@ -132,6 +134,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -146,6 +149,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -174,6 +178,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
index 20bc8c03b821..ca2d44d59d48 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
@@ -244,7 +244,7 @@
regulator-boot-on;
regulator-always-on;
regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
regulator-name = "emmc_vdd";
};
};
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 06e439eeef9a..09fde95a5e8f 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -145,7 +145,7 @@
/* parts of opcode for RVF, RVD and RVQ */
#define RVFDQ_FL_FS_WIDTH_OFF 12
-#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0)
+#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0)
#define RVFDQ_FL_FS_WIDTH_W 2
#define RVFDQ_FL_FS_WIDTH_D 3
#define RVFDQ_LS_FS_WIDTH_Q 4
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 87cbd86576b2..4b95c574fd04 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
mutex_unlock(&text_mutex);
- if (!mod)
- local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE);
-
return out;
}
@@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data)
} else {
while (atomic_read(&param->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return 0;
}
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ed9cad20c039..3c830a6f7ef4 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void)
for_each_irq_desc(i, desc) {
struct irq_chip *chip;
- int ret;
chip = irq_desc_get_chip(desc);
if (!chip)
continue;
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
+ if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
chip->irq_eoi(&desc->irq_data);
if (chip->irq_mask)
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 4007563fb607..ab03732d06c4 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
memset(waddr, c, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
ret = copy_to_kernel_nofault(waddr, insn, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
ret = patch_insn_set(tp, c, len);
- if (!ret)
- flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_set_nosync);
@@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
ret = patch_insn_write(tp, insns, len);
- if (!ret)
- flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_nosync);
@@ -253,9 +263,9 @@ static int patch_text_cb(void *data)
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return ret;
}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 528ec7cc9a62..10e311b2759d 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int graph_idx = 0;
int level = 0;
if (regs) {
@@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
pc = regs->ra;
} else {
fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
if (pc == (unsigned long)ret_from_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
@@ -156,7 +157,7 @@ unsigned long __get_wchan(struct task_struct *task)
return pc;
}
-noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 64155323cc92..d77afe05578f 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -23,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
#ifdef CONFIG_64BIT
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
@@ -32,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
/*
* Note that the shift for mmap2 is constant (12),
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 04db1f993c47..bcf41d6e0df0 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
- pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
+ pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 48ef5fe5c08a..5a36d5538dae 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -170,11 +170,14 @@ static void kaslr_adjust_got(unsigned long offset)
u64 *entry;
/*
- * Even without -fPIE, Clang still uses a global offset table for some
- * reason. Adjust the GOT entries.
+ * Adjust GOT entries, except for ones for undefined weak symbols
+ * that resolved to zero. This also skips the first three reserved
+ * entries on s390x that are zero.
*/
- for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
- *entry += offset - __START_KERNEL;
+ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
+ if (*entry)
+ *entry += offset - __START_KERNEL;
+ }
}
/*
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 8c4adece8911..f3602414a961 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -601,17 +601,16 @@ CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 6dd11d3b6aaa..d0d8925fdf09 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -592,17 +592,16 @@ CONFIG_WATCHDOG_CORE=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 7f5004065e8a..35555c944630 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+ choose_random_kstack_offset(get_tod_clock_fast());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 95990461888f..9281063636a7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -427,6 +427,7 @@ struct kvm_vcpu_stat {
u64 instruction_io_other;
u64 instruction_lpsw;
u64 instruction_lpswe;
+ u64 instruction_lpswey;
u64 instruction_pfmf;
u64 instruction_ptff;
u64 instruction_sck;
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..50cbcbbaa03d 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -38,33 +38,6 @@
#include "entry.h"
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd0fee24ad10..01071182763e 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 82e9631cd9ef..54b5b2565df8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_io_other),
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+ STATS_DESC_COUNTER(VCPU, instruction_lpswey),
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
STATS_DESC_COUNTER(VCPU, instruction_ptff),
STATS_DESC_COUNTER(VCPU, instruction_sck),
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 111eb5c74784..bf8534218af3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base1 = vcpu->arch.sie_block->ipb >> 28;
+ s64 disp1;
+
+ /* The displacement is a 20bit _SIGNED_ value */
+ disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19);
+
+ if (ar)
+ *ar = base1;
+
+ return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+}
+
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
u64 *address1, u64 *address2,
u8 *ar_b1, u8 *ar_b2)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1be19cc9d73c..1a49b89706f8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
return 0;
}
+static int handle_lpswey(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpswey++;
+
+ if (!test_kvm_facility(vcpu->kvm, 193))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ return 0;
+}
+
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 stidp_data = vcpu->kvm->arch.model.cpuid;
@@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
case 0x61:
case 0x62:
return handle_ri(vcpu);
+ case 0x71:
+ return handle_lpswey(vcpu);
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index abb629d7e131..7e3e767ab87d 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
@@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val)
static void base_crst_free(unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index ff8f24854c64..0ef83b6ac0db 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused)
union zpci_sic_iib iib = {{0}};
union zpci_sic_iib ziib = {{0}};
- iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+ iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 9dca568509a5..d6f4afcb0e87 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -59,3 +59,14 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
(u64)len0 << 32 | len1, advice);
#endif
}
+
+/*
+ * swap the arguments the way that libc wants them instead of
+ * moving flags ahead of the 64-bit nbytes argument
+ */
+SYSCALL_DEFINE6(sh_sync_file_range6, int, fd, SC_ARG64(offset),
+ SC_ARG64(nbytes), unsigned int, flags)
+{
+ return ksys_sync_file_range(fd, SC_VAL64(loff_t, offset),
+ SC_VAL64(loff_t, nbytes), flags);
+}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index bbf83a2db986..c55fd7696d40 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -321,7 +321,7 @@
311 common set_robust_list sys_set_robust_list
312 common get_robust_list sys_get_robust_list
313 common splice sys_splice
-314 common sync_file_range sys_sync_file_range
+314 common sync_file_range sys_sh_sync_file_range6
315 common tee sys_tee
316 common vmsplice sys_vmsplice
317 common move_pages sys_move_pages
@@ -395,6 +395,7 @@
385 common pkey_alloc sys_pkey_alloc
386 common pkey_free sys_pkey_free
387 common rseq sys_rseq
+388 common sync_file_range2 sys_sync_file_range2
# room for arch specific syscalls
393 common semget sys_semget
394 common semctl sys_semctl
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index a45f0f31fe51..a3d308f2043e 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -18,224 +18,3 @@ sys32_mmap2:
sethi %hi(sys_mmap), %g1
jmpl %g1 + %lo(sys_mmap), %g0
sllx %o5, 12, %o5
-
- .align 32
- .globl sys32_socketcall
-sys32_socketcall: /* %o0=call, %o1=args */
- cmp %o0, 1
- bl,pn %xcc, do_einval
- cmp %o0, 18
- bg,pn %xcc, do_einval
- sub %o0, 1, %o0
- sllx %o0, 5, %o0
- sethi %hi(__socketcall_table_begin), %g2
- or %g2, %lo(__socketcall_table_begin), %g2
- jmpl %g2 + %o0, %g0
- nop
-do_einval:
- retl
- mov -EINVAL, %o0
-
- .align 32
-__socketcall_table_begin:
-
- /* Each entry is exactly 32 bytes. */
-do_sys_socket: /* sys_socket(int, int, int) */
-1: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_socket), %g1
-2: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_socket), %g0
-3: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
-4: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_bind), %g1
-5: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_bind), %g0
-6: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
-7: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_connect), %g1
-8: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_connect), %g0
-9: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_listen: /* sys_listen(int, int) */
-10: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_listen), %g1
- jmpl %g1 + %lo(sys_listen), %g0
-11: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
- nop
-do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
-12: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_accept), %g1
-13: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_accept), %g0
-14: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
-15: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getsockname), %g1
-16: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_getsockname), %g0
-17: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
-18: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getpeername), %g1
-19: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_getpeername), %g0
-20: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
-21: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_socketpair), %g1
-22: ldswa [%o1 + 0x8] %asi, %o2
-23: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_socketpair), %g0
-24: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
-25: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_send), %g1
-26: lduwa [%o1 + 0x8] %asi, %o2
-27: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_send), %g0
-28: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
-29: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_recv), %g1
-30: lduwa [%o1 + 0x8] %asi, %o2
-31: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_recv), %g0
-32: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
-33: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_sendto), %g1
-34: lduwa [%o1 + 0x8] %asi, %o2
-35: lduwa [%o1 + 0xc] %asi, %o3
-36: lduwa [%o1 + 0x10] %asi, %o4
-37: ldswa [%o1 + 0x14] %asi, %o5
- jmpl %g1 + %lo(sys_sendto), %g0
-38: lduwa [%o1 + 0x4] %asi, %o1
-do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
-39: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_recvfrom), %g1
-40: lduwa [%o1 + 0x8] %asi, %o2
-41: lduwa [%o1 + 0xc] %asi, %o3
-42: lduwa [%o1 + 0x10] %asi, %o4
-43: lduwa [%o1 + 0x14] %asi, %o5
- jmpl %g1 + %lo(sys_recvfrom), %g0
-44: lduwa [%o1 + 0x4] %asi, %o1
-do_sys_shutdown: /* sys_shutdown(int, int) */
-45: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_shutdown), %g1
- jmpl %g1 + %lo(sys_shutdown), %g0
-46: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
- nop
-do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */
-47: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_setsockopt), %g1
-48: ldswa [%o1 + 0x8] %asi, %o2
-49: lduwa [%o1 + 0xc] %asi, %o3
-50: ldswa [%o1 + 0x10] %asi, %o4
- jmpl %g1 + %lo(sys_setsockopt), %g0
-51: ldswa [%o1 + 0x4] %asi, %o1
- nop
-do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */
-52: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getsockopt), %g1
-53: ldswa [%o1 + 0x8] %asi, %o2
-54: lduwa [%o1 + 0xc] %asi, %o3
-55: lduwa [%o1 + 0x10] %asi, %o4
- jmpl %g1 + %lo(sys_getsockopt), %g0
-56: ldswa [%o1 + 0x4] %asi, %o1
- nop
-do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
-57: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(compat_sys_sendmsg), %g1
-58: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(compat_sys_sendmsg), %g0
-59: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
-60: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(compat_sys_recvmsg), %g1
-61: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(compat_sys_recvmsg), %g0
-62: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
-63: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_accept4), %g1
-64: lduwa [%o1 + 0x8] %asi, %o2
-65: ldswa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_accept4), %g0
-66: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-
- .section __ex_table,"a"
- .align 4
- .word 1b, __retl_efault, 2b, __retl_efault
- .word 3b, __retl_efault, 4b, __retl_efault
- .word 5b, __retl_efault, 6b, __retl_efault
- .word 7b, __retl_efault, 8b, __retl_efault
- .word 9b, __retl_efault, 10b, __retl_efault
- .word 11b, __retl_efault, 12b, __retl_efault
- .word 13b, __retl_efault, 14b, __retl_efault
- .word 15b, __retl_efault, 16b, __retl_efault
- .word 17b, __retl_efault, 18b, __retl_efault
- .word 19b, __retl_efault, 20b, __retl_efault
- .word 21b, __retl_efault, 22b, __retl_efault
- .word 23b, __retl_efault, 24b, __retl_efault
- .word 25b, __retl_efault, 26b, __retl_efault
- .word 27b, __retl_efault, 28b, __retl_efault
- .word 29b, __retl_efault, 30b, __retl_efault
- .word 31b, __retl_efault, 32b, __retl_efault
- .word 33b, __retl_efault, 34b, __retl_efault
- .word 35b, __retl_efault, 36b, __retl_efault
- .word 37b, __retl_efault, 38b, __retl_efault
- .word 39b, __retl_efault, 40b, __retl_efault
- .word 41b, __retl_efault, 42b, __retl_efault
- .word 43b, __retl_efault, 44b, __retl_efault
- .word 45b, __retl_efault, 46b, __retl_efault
- .word 47b, __retl_efault, 48b, __retl_efault
- .word 49b, __retl_efault, 50b, __retl_efault
- .word 51b, __retl_efault, 52b, __retl_efault
- .word 53b, __retl_efault, 54b, __retl_efault
- .word 55b, __retl_efault, 56b, __retl_efault
- .word 57b, __retl_efault, 58b, __retl_efault
- .word 59b, __retl_efault, 60b, __retl_efault
- .word 61b, __retl_efault, 62b, __retl_efault
- .word 63b, __retl_efault, 64b, __retl_efault
- .word 65b, __retl_efault, 66b, __retl_efault
- .previous
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index ac6c281ccfe0..cfdfb3707c16 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -117,7 +117,7 @@
90 common dup2 sys_dup2
91 32 setfsuid32 sys_setfsuid
92 common fcntl sys_fcntl compat_sys_fcntl
-93 common select sys_select
+93 common select sys_select compat_sys_select
94 32 setfsgid32 sys_setfsgid
95 common fsync sys_fsync
96 common setpriority sys_setpriority
@@ -155,7 +155,7 @@
123 32 fchown sys_fchown16
123 64 fchown sys_fchown
124 common fchmod sys_fchmod
-125 common recvfrom sys_recvfrom
+125 common recvfrom sys_recvfrom compat_sys_recvfrom
126 32 setreuid sys_setreuid16
126 64 setreuid sys_setreuid
127 32 setregid sys_setregid16
@@ -247,7 +247,7 @@
204 32 readdir sys_old_readdir compat_sys_old_readdir
204 64 readdir sys_nis_syscall
205 common readahead sys_readahead compat_sys_readahead
-206 common socketcall sys_socketcall sys32_socketcall
+206 common socketcall sys_socketcall compat_sys_socketcall
207 common syslog sys_syslog
208 common lookup_dcookie sys_ni_syscall
209 common fadvise64 sys_fadvise64 compat_sys_fadvise64
@@ -461,7 +461,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ef805eaa9e01..9f1e76ddda5a 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -447,43 +447,31 @@ static int bulk_req_safe_read(
return n;
}
-/* Called without dev->lock held, and only in interrupt context. */
-static void ubd_handler(void)
+static void ubd_end_request(struct io_thread_req *io_req)
{
- int n;
- int count;
-
- while(1){
- n = bulk_req_safe_read(
- thread_fd,
- irq_req_buffer,
- &irq_remainder,
- &irq_remainder_size,
- UBD_REQ_BUFFER_SIZE
- );
- if (n < 0) {
- if(n == -EAGAIN)
- break;
- printk(KERN_ERR "spurious interrupt in ubd_handler, "
- "err = %d\n", -n);
- return;
- }
- for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
- struct io_thread_req *io_req = (*irq_req_buffer)[count];
-
- if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
- blk_queue_max_discard_sectors(io_req->req->q, 0);
- blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
- }
- blk_mq_end_request(io_req->req, io_req->error);
- kfree(io_req);
- }
+ if (io_req->error == BLK_STS_NOTSUPP) {
+ if (req_op(io_req->req) == REQ_OP_DISCARD)
+ blk_queue_disable_discard(io_req->req->q);
+ else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
+ blk_queue_disable_write_zeroes(io_req->req->q);
}
+ blk_mq_end_request(io_req->req, io_req->error);
+ kfree(io_req);
}
static irqreturn_t ubd_intr(int irq, void *dev)
{
- ubd_handler();
+ int len, i;
+
+ while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
+ &irq_remainder, &irq_remainder_size,
+ UBD_REQ_BUFFER_SIZE)) >= 0) {
+ for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
+ ubd_end_request((*irq_req_buffer)[i]);
+ }
+
+ if (len < 0 && len != -EAGAIN)
+ pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
return IRQ_HANDLED;
}
@@ -847,6 +835,7 @@ static int ubd_add(int n, char **error_out)
struct queue_limits lim = {
.max_segments = MAX_SG,
.seg_boundary_mask = PAGE_SIZE - 1,
+ .features = BLK_FEAT_WRITE_CACHE,
};
struct gendisk *disk;
int err = 0;
@@ -893,8 +882,6 @@ static int ubd_add(int n, char **error_out)
goto out_cleanup_tags;
}
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_write_cache(disk->queue, true, false);
disk->major = UBD_MAJOR;
disk->first_minor = n << UBD_SHIFT;
disk->minors = 1 << UBD_SHIFT;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d7122a1883e..125914536825 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1118,6 +1118,13 @@ config X86_LOCAL_APIC
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
select IRQ_DOMAIN_HIERARCHY
+config ACPI_MADT_WAKEUP
+ def_bool y
+ depends on X86_64
+ depends on ACPI
+ depends on SMP
+ depends on X86_LOCAL_APIC
+
config X86_IO_APIC
def_bool y
depends on X86_LOCAL_APIC || X86_UP_IOAPIC
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 59aedf32c4ea..6d20a6ce0507 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -36,6 +36,6 @@ config AS_VPCLMULQDQ
Supported by binutils >= 2.30 and LLVM integrated assembler
config AS_WRUSS
- def_bool $(as-instr,wrussq %rax$(comma)(%rbx))
+ def_bool $(as-instr64,wrussq %rax$(comma)(%rbx))
help
Supported by binutils >= 2.31 and LLVM integrated assembler
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b70e4a21c15f..944454306ef4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -531,8 +531,3 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
return output + entry_offset;
}
-
-void __fortify_panic(const u8 reason, size_t avail, size_t size)
-{
- error("detected buffer overflow");
-}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 0457a9d7e515..cd44e120fe53 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -127,7 +127,35 @@ static bool fault_in_kernel_space(unsigned long address)
#include "../../lib/insn.c"
/* Include code for early handlers */
-#include "../../kernel/sev-shared.c"
+#include "../../coco/sev/shared.c"
+
+static struct svsm_ca *svsm_get_caa(void)
+{
+ return boot_svsm_caa;
+}
+
+static u64 svsm_get_caa_pa(void)
+{
+ return boot_svsm_caa_pa;
+}
+
+static int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb *ghcb;
+ int ret;
+
+ if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
bool sev_snp_enabled(void)
{
@@ -145,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
* If private -> shared then invalidate the page before requesting the
* state change in the RMP table.
*/
- if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(paddr, paddr, false);
/* Issue VMGEXIT to change the page state in RMP table. */
sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
@@ -161,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
* Now that page state is changed in the RMP table, validate it so that it is
* consistent with the RMP entry.
*/
- if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(paddr, paddr, true);
}
void snp_set_page_private(unsigned long paddr)
@@ -256,6 +284,16 @@ void sev_es_shutdown_ghcb(void)
error("SEV-ES CPU Features missing.");
/*
+ * This denotes whether to use the GHCB MSR protocol or the GHCB
+ * shared page to perform a GHCB request. Since the GHCB page is
+ * being changed to encrypted, it can't be used to perform GHCB
+ * requests. Clear the boot_ghcb variable so that the GHCB MSR
+ * protocol is used to change the GHCB page over to an encrypted
+ * page.
+ */
+ boot_ghcb = NULL;
+
+ /*
* GHCB Page must be flushed from the cache and mapped encrypted again.
* Otherwise the running kernel will see strange cache effects when
* trying to use that page.
@@ -463,6 +501,13 @@ static bool early_snp_init(struct boot_params *bp)
setup_cpuid_table(cc_info);
/*
+ * Record the SVSM Calling Area (CA) address if the guest is not
+ * running at VMPL0. The CA will be used to communicate with the
+ * SVSM and request its services.
+ */
+ svsm_setup_ca(cc_info);
+
+ /*
* Pass run-time kernel a pointer to CC info via boot_params so EFI
* config table doesn't need to be searched again during early startup
* phase.
@@ -565,22 +610,31 @@ void sev_enable(struct boot_params *bp)
* features.
*/
if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
- if (!(get_hv_features() & GHCB_HV_FT_SNP))
+ u64 hv_features;
+ int ret;
+
+ hv_features = get_hv_features();
+ if (!(hv_features & GHCB_HV_FT_SNP))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
/*
- * Enforce running at VMPL0.
- *
- * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
- * higher) privilege level. Here, clear the VMPL1 permission mask of the
- * GHCB page. If the guest is not running at VMPL0, this will fail.
+ * Enforce running at VMPL0 or with an SVSM.
*
- * If the guest is running at VMPL0, it will succeed. Even if that operation
- * modifies permission bits, it is still ok to do so currently because Linux
- * SNP guests running at VMPL0 only run at VMPL0, so VMPL1 or higher
- * permission mask changes are a don't-care.
+ * Use RMPADJUST (see the rmpadjust() function for a description of
+ * what the instruction does) to update the VMPL1 permissions of a
+ * page. If the guest is running at VMPL0, this will succeed. If the
+ * guest is running at any other VMPL, this will fail. Linux SNP guests
+ * only ever run at a single VMPL level so permission mask changes of a
+ * lesser-privileged VMPL are a don't-care.
+ */
+ ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1);
+
+ /*
+ * Running at VMPL0 is not required if an SVSM is present and the hypervisor
+ * supports the required SVSM GHCB events.
*/
- if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1))
+ if (ret &&
+ !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL)))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index fed8d13ce252..0aae4d4ed615 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -203,7 +203,7 @@ int check_knl_erratum(void)
*/
if (!is_intel() ||
cpu.family != 6 ||
- cpu.model != INTEL_FAM6_XEON_PHI_KNL)
+ cpu.model != 0x57 /*INTEL_XEON_PHI_KNL*/)
return 0;
/*
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 9049f390d834..9d0fea18d3c8 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -27,34 +27,32 @@ char *heap_end = _end; /* Default end of heap = no heap */
* screws up the old-style command line protocol, adjust by
* filling in the new-style command line pointer instead.
*/
-
static void copy_boot_params(void)
{
struct old_cmdline {
u16 cl_magic;
u16 cl_offset;
};
- const struct old_cmdline * const oldcmd =
- absolute_pointer(OLD_CL_ADDRESS);
+ const struct old_cmdline * const oldcmd = absolute_pointer(OLD_CL_ADDRESS);
BUILD_BUG_ON(sizeof(boot_params) != 4096);
memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
- if (!boot_params.hdr.cmd_line_ptr &&
- oldcmd->cl_magic == OLD_CL_MAGIC) {
- /* Old-style command line protocol. */
+ if (!boot_params.hdr.cmd_line_ptr && oldcmd->cl_magic == OLD_CL_MAGIC) {
+ /* Old-style command line protocol */
u16 cmdline_seg;
- /* Figure out if the command line falls in the region
- of memory that an old kernel would have copied up
- to 0x90000... */
+ /*
+ * Figure out if the command line falls in the region
+ * of memory that an old kernel would have copied up
+ * to 0x90000...
+ */
if (oldcmd->cl_offset < boot_params.hdr.setup_move_size)
cmdline_seg = ds();
else
cmdline_seg = 0x9000;
- boot_params.hdr.cmd_line_ptr =
- (cmdline_seg << 4) + oldcmd->cl_offset;
+ boot_params.hdr.cmd_line_ptr = (cmdline_seg << 4) + oldcmd->cl_offset;
}
}
@@ -66,6 +64,7 @@ static void copy_boot_params(void)
static void keyboard_init(void)
{
struct biosregs ireg, oreg;
+
initregs(&ireg);
ireg.ah = 0x02; /* Get keyboard status */
@@ -83,8 +82,10 @@ static void query_ist(void)
{
struct biosregs ireg, oreg;
- /* Some older BIOSes apparently crash on this call, so filter
- it from machines too old to have SpeedStep at all. */
+ /*
+ * Some older BIOSes apparently crash on this call, so filter
+ * it from machines too old to have SpeedStep at all.
+ */
if (cpu.level < 6)
return;
@@ -119,17 +120,13 @@ static void init_heap(void)
char *stack_end;
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
- asm("leal %n1(%%esp),%0"
- : "=r" (stack_end) : "i" (STACK_SIZE));
-
- heap_end = (char *)
- ((size_t)boot_params.hdr.heap_end_ptr + 0x200);
+ stack_end = (char *) (current_stack_pointer - STACK_SIZE);
+ heap_end = (char *) ((size_t)boot_params.hdr.heap_end_ptr + 0x200);
if (heap_end > stack_end)
heap_end = stack_end;
} else {
/* Boot protocol 2.00 only, no heap available */
- puts("WARNING: Ancient bootloader, some functionality "
- "may be limited!\n");
+ puts("WARNING: Ancient bootloader, some functionality may be limited!\n");
}
}
@@ -150,12 +147,11 @@ void main(void)
/* Make sure we have all the proper CPU support */
if (validate_cpu()) {
- puts("Unable to boot - please use a kernel appropriate "
- "for your CPU.\n");
+ puts("Unable to boot - please use a kernel appropriate for your CPU.\n");
die();
}
- /* Tell the BIOS what CPU mode we intend to run in. */
+ /* Tell the BIOS what CPU mode we intend to run in */
set_bios_mode();
/* Detect memory layout */
diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile
index c816acf78b6a..eabdc7486538 100644
--- a/arch/x86/coco/Makefile
+++ b/arch/x86/coco/Makefile
@@ -6,3 +6,4 @@ CFLAGS_core.o += -fno-stack-protector
obj-y += core.o
obj-$(CONFIG_INTEL_TDX_GUEST) += tdx/
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev/
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index b31ef2424d19..0f81f70aca82 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -29,7 +29,6 @@ static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
case CC_ATTR_GUEST_UNROLL_STRING_IO:
- case CC_ATTR_HOTPLUG_DISABLED:
case CC_ATTR_GUEST_MEM_ENCRYPT:
case CC_ATTR_MEM_ENCRYPT:
return true;
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
new file mode 100644
index 000000000000..4e375e7305ac
--- /dev/null
+++ b/arch/x86/coco/sev/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += core.o
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_core.o = -pg
+endif
+
+KASAN_SANITIZE_core.o := n
+KMSAN_SANITIZE_core.o := n
+KCOV_INSTRUMENT_core.o := n
+
+# With some compiler versions the generated code results in boot hangs, caused
+# by several compilation units. To be safe, disable all instrumentation.
+KCSAN_SANITIZE := n
diff --git a/arch/x86/kernel/sev.c b/arch/x86/coco/sev/core.c
index 3342ed58e168..082d61d85dfc 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/coco/sev/core.c
@@ -133,16 +133,20 @@ struct ghcb_state {
struct ghcb *ghcb;
};
+/* For early boot SVSM communication */
+static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
+
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
+static DEFINE_PER_CPU(u64, svsm_caa_pa);
struct sev_config {
__u64 debug : 1,
/*
- * A flag used by __set_pages_state() that indicates when the
- * per-CPU GHCB has been created and registered and thus can be
- * used by the BSP instead of the early boot GHCB.
+ * Indicates when the per-CPU GHCB has been created and registered
+ * and thus can be used by the BSP instead of the early boot GHCB.
*
* For APs, the per-CPU GHCB is created before they are started
* and registered upon startup, so this flag can be used globally
@@ -150,6 +154,15 @@ struct sev_config {
*/
ghcbs_initialized : 1,
+ /*
+ * Indicates when the per-CPU SVSM CA is to be used instead of the
+ * boot SVSM CA.
+ *
+ * For APs, the per-CPU SVSM CA is created as part of the AP
+ * bringup, so this flag can be used globally for the BSP and APs.
+ */
+ use_cas : 1,
+
__reserved : 62;
};
@@ -572,8 +585,61 @@ fault:
return ES_EXCEPTION;
}
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+ long error_code = ctxt->fi.error_code;
+ int trapnr = ctxt->fi.vector;
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+ switch (trapnr) {
+ case X86_TRAP_GP:
+ exc_general_protection(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_UD:
+ exc_invalid_op(ctxt->regs);
+ break;
+ case X86_TRAP_PF:
+ write_cr2(ctxt->fi.cr2);
+ exc_page_fault(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_AC:
+ exc_alignment_check(ctxt->regs, error_code);
+ break;
+ default:
+ pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+ BUG();
+ }
+}
+
/* Include code shared with pre-decompression boot stage */
-#include "sev-shared.c"
+#include "shared.c"
+
+static inline struct svsm_ca *svsm_get_caa(void)
+{
+ /*
+ * Use rIP-relative references when called early in the boot. If
+ * ->use_cas is set, then it is late in the boot and no need
+ * to worry about rIP-relative references.
+ */
+ if (RIP_REL_REF(sev_cfg).use_cas)
+ return this_cpu_read(svsm_caa);
+ else
+ return RIP_REL_REF(boot_svsm_caa);
+}
+
+static u64 svsm_get_caa_pa(void)
+{
+ /*
+ * Use rIP-relative references when called early in the boot. If
+ * ->use_cas is set, then it is late in the boot and no need
+ * to worry about rIP-relative references.
+ */
+ if (RIP_REL_REF(sev_cfg).use_cas)
+ return this_cpu_read(svsm_caa_pa);
+ else
+ return RIP_REL_REF(boot_svsm_caa_pa);
+}
static noinstr void __sev_put_ghcb(struct ghcb_state *state)
{
@@ -600,6 +666,44 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state)
}
}
+static int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_local_irq_save();
+
+ /*
+ * Use rip-relative references when called early in the boot. If
+ * ghcbs_initialized is set, then it is late in the boot and no need
+ * to worry about rip-relative references in called functions.
+ */
+ if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
+ ghcb = __sev_get_ghcb(&state);
+ else if (RIP_REL_REF(boot_ghcb))
+ ghcb = RIP_REL_REF(boot_ghcb);
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
+ __sev_put_ghcb(&state);
+
+ native_local_irq_restore(flags);
+
+ return ret;
+}
+
void noinstr __sev_es_nmi_complete(void)
{
struct ghcb_state state;
@@ -709,7 +813,6 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
{
unsigned long paddr_end;
u64 val;
- int ret;
vaddr = vaddr & PAGE_MASK;
@@ -717,12 +820,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
paddr_end = paddr + (npages << PAGE_SHIFT);
while (paddr < paddr_end) {
- if (op == SNP_PAGE_STATE_SHARED) {
- /* Page validation must be rescinded before changing to shared */
- ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false);
- if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
- goto e_term;
- }
+ /* Page validation must be rescinded before changing to shared */
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(vaddr, paddr, false);
/*
* Use the MSR protocol because this function can be called before
@@ -744,12 +844,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
paddr, GHCB_MSR_PSC_RESP_VAL(val)))
goto e_term;
- if (op == SNP_PAGE_STATE_PRIVATE) {
- /* Page validation must be performed after changing to private */
- ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true);
- if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
- goto e_term;
- }
+ /* Page validation must be performed after changing to private */
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(vaddr, paddr, true);
vaddr += PAGE_SIZE;
paddr += PAGE_SIZE;
@@ -913,22 +1010,49 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}
-static int snp_set_vmsa(void *va, bool vmsa)
+static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
{
- u64 attrs;
+ int ret;
- /*
- * Running at VMPL0 allows the kernel to change the VMSA bit for a page
- * using the RMPADJUST instruction. However, for the instruction to
- * succeed it must target the permissions of a lesser privileged
- * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
- * instruction in the AMD64 APM Volume 3).
- */
- attrs = 1;
- if (vmsa)
- attrs |= RMPADJUST_VMSA_PAGE_BIT;
+ if (snp_vmpl) {
+ struct svsm_call call = {};
+ unsigned long flags;
+
+ local_irq_save(flags);
- return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+ call.caa = this_cpu_read(svsm_caa);
+ call.rcx = __pa(va);
+
+ if (make_vmsa) {
+ /* Protocol 0, Call ID 2 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
+ call.rdx = __pa(caa);
+ call.r8 = apic_id;
+ } else {
+ /* Protocol 0, Call ID 3 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
+ }
+
+ ret = svsm_perform_call_protocol(&call);
+
+ local_irq_restore(flags);
+ } else {
+ /*
+ * If the kernel runs at VMPL0, it can change the VMSA
+ * bit for a page using the RMPADJUST instruction.
+ * However, for the instruction to succeed it must
+ * target the permissions of a lesser privileged (higher
+ * numbered) VMPL level, so use VMPL1.
+ */
+ u64 attrs = 1;
+
+ if (make_vmsa)
+ attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+ ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+ }
+
+ return ret;
}
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -962,11 +1086,11 @@ static void *snp_alloc_vmsa_page(int cpu)
return page_address(p + 1);
}
-static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
{
int err;
- err = snp_set_vmsa(vmsa, false);
+ err = snp_set_vmsa(vmsa, NULL, apic_id, false);
if (err)
pr_err("clear VMSA page failed (%u), leaking page\n", err);
else
@@ -977,6 +1101,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
{
struct sev_es_save_area *cur_vmsa, *vmsa;
struct ghcb_state state;
+ struct svsm_ca *caa;
unsigned long flags;
struct ghcb *ghcb;
u8 sipi_vector;
@@ -1023,6 +1148,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
if (!vmsa)
return -ENOMEM;
+ /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
+ caa = per_cpu(svsm_caa, cpu);
+
/* CR4 should maintain the MCE value */
cr4 = native_read_cr4() & X86_CR4_MCE;
@@ -1070,11 +1198,11 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
* VMPL level
* SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
*/
- vmsa->vmpl = 0;
+ vmsa->vmpl = snp_vmpl;
vmsa->sev_features = sev_status >> 2;
/* Switch the page over to a VMSA page now that it is initialized */
- ret = snp_set_vmsa(vmsa, true);
+ ret = snp_set_vmsa(vmsa, caa, apic_id, true);
if (ret) {
pr_err("set VMSA page failed (%u)\n", ret);
free_page((unsigned long)vmsa);
@@ -1090,7 +1218,10 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
vc_ghcb_invalidate(ghcb);
ghcb_set_rax(ghcb, vmsa->sev_features);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
- ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
+ ghcb_set_sw_exit_info_1(ghcb,
+ ((u64)apic_id << 32) |
+ ((u64)snp_vmpl << 16) |
+ SVM_VMGEXIT_AP_CREATE);
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
sev_es_wr_ghcb_msr(__pa(ghcb));
@@ -1108,13 +1239,13 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
/* Perform cleanup if there was an error */
if (ret) {
- snp_cleanup_vmsa(vmsa);
+ snp_cleanup_vmsa(vmsa, apic_id);
vmsa = NULL;
}
/* Free up any previous VMSA page */
if (cur_vmsa)
- snp_cleanup_vmsa(cur_vmsa);
+ snp_cleanup_vmsa(cur_vmsa, apic_id);
/* Record the current VMSA page */
per_cpu(sev_vmsa, cpu) = vmsa;
@@ -1209,6 +1340,17 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
/* Is it a WRMSR? */
exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
+ if (regs->cx == MSR_SVSM_CAA) {
+ /* Writes to the SVSM CAA msr are ignored */
+ if (exit_info_1)
+ return ES_OK;
+
+ regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
+ regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
+
+ return ES_OK;
+ }
+
ghcb_set_rcx(ghcb, regs->cx);
if (exit_info_1) {
ghcb_set_rax(ghcb, regs->ax);
@@ -1346,6 +1488,18 @@ static void __init alloc_runtime_data(int cpu)
panic("Can't allocate SEV-ES runtime data");
per_cpu(runtime_data, cpu) = data;
+
+ if (snp_vmpl) {
+ struct svsm_ca *caa;
+
+ /* Allocate the SVSM CA page if an SVSM is present */
+ caa = memblock_alloc(sizeof(*caa), PAGE_SIZE);
+ if (!caa)
+ panic("Can't allocate SVSM CA page\n");
+
+ per_cpu(svsm_caa, cpu) = caa;
+ per_cpu(svsm_caa_pa, cpu) = __pa(caa);
+ }
}
static void __init init_ghcb(int cpu)
@@ -1395,6 +1549,32 @@ void __init sev_es_init_vc_handling(void)
init_ghcb(cpu);
}
+ /* If running under an SVSM, switch to the per-cpu CA */
+ if (snp_vmpl) {
+ struct svsm_call call = {};
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+
+ /*
+ * SVSM_CORE_REMAP_CA call:
+ * RAX = 0 (Protocol=0, CallID=0)
+ * RCX = New CA GPA
+ */
+ call.caa = svsm_get_caa();
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
+ call.rcx = this_cpu_read(svsm_caa_pa);
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
+ ret, call.rax_out);
+
+ sev_cfg.use_cas = true;
+
+ local_irq_restore(flags);
+ }
+
sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */
@@ -1819,33 +1999,6 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
return result;
}
-static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
-{
- long error_code = ctxt->fi.error_code;
- int trapnr = ctxt->fi.vector;
-
- ctxt->regs->orig_ax = ctxt->fi.error_code;
-
- switch (trapnr) {
- case X86_TRAP_GP:
- exc_general_protection(ctxt->regs, error_code);
- break;
- case X86_TRAP_UD:
- exc_invalid_op(ctxt->regs);
- break;
- case X86_TRAP_PF:
- write_cr2(ctxt->fi.cr2);
- exc_page_fault(ctxt->regs, error_code);
- break;
- case X86_TRAP_AC:
- exc_alignment_check(ctxt->regs, error_code);
- break;
- default:
- pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
- BUG();
- }
-}
-
static __always_inline bool is_vc2_stack(unsigned long sp)
{
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
@@ -2095,6 +2248,47 @@ found_cc_info:
return cc_info;
}
+static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
+{
+ struct svsm_call call = {};
+ int ret;
+ u64 pa;
+
+ /*
+ * Record the SVSM Calling Area address (CAA) if the guest is not
+ * running at VMPL0. The CA will be used to communicate with the
+ * SVSM to perform the SVSM services.
+ */
+ if (!svsm_setup_ca(cc_info))
+ return;
+
+ /*
+ * It is very early in the boot and the kernel is running identity
+ * mapped but without having adjusted the pagetables to where the
+ * kernel was loaded (physbase), so the get the CA address using
+ * RIP-relative addressing.
+ */
+ pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
+
+ /*
+ * Switch over to the boot SVSM CA while the current CA is still
+ * addressable. There is no GHCB at this point so use the MSR protocol.
+ *
+ * SVSM_CORE_REMAP_CA call:
+ * RAX = 0 (Protocol=0, CallID=0)
+ * RCX = New CA GPA
+ */
+ call.caa = svsm_get_caa();
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
+ call.rcx = pa;
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", ret, call.rax_out);
+
+ RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
+ RIP_REL_REF(boot_svsm_caa_pa) = pa;
+}
+
bool __head snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -2108,6 +2302,8 @@ bool __head snp_init(struct boot_params *bp)
setup_cpuid_table(cc_info);
+ svsm_setup(cc_info);
+
/*
* The CC blob will be used later to access the secrets page. Cache
* it here like the boot kernel does.
@@ -2156,23 +2352,27 @@ static void dump_cpuid_table(void)
* expected, but that initialization happens too early in boot to print any
* sort of indicator, and there's not really any other good place to do it,
* so do it here.
+ *
+ * If running as an SNP guest, report the current VM privilege level (VMPL).
*/
-static int __init report_cpuid_table(void)
+static int __init report_snp_info(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
- if (!cpuid_table->count)
- return 0;
+ if (cpuid_table->count) {
+ pr_info("Using SNP CPUID table, %d entries present.\n",
+ cpuid_table->count);
- pr_info("Using SNP CPUID table, %d entries present.\n",
- cpuid_table->count);
+ if (sev_cfg.debug)
+ dump_cpuid_table();
+ }
- if (sev_cfg.debug)
- dump_cpuid_table();
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ pr_info("SNP running at VMPL%u.\n", snp_vmpl);
return 0;
}
-arch_initcall(report_cpuid_table);
+arch_initcall(report_snp_info);
static int __init init_sev_config(char *str)
{
@@ -2191,6 +2391,56 @@ static int __init init_sev_config(char *str)
}
__setup("sev=", init_sev_config);
+static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
+{
+ /* If (new) lengths have been returned, propagate them up */
+ if (call->rcx_out != call->rcx)
+ input->manifest_buf.len = call->rcx_out;
+
+ if (call->rdx_out != call->rdx)
+ input->certificates_buf.len = call->rdx_out;
+
+ if (call->r8_out != call->r8)
+ input->report_buf.len = call->r8_out;
+}
+
+int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
+ struct svsm_attest_call *input)
+{
+ struct svsm_attest_call *ac;
+ unsigned long flags;
+ u64 attest_call_pa;
+ int ret;
+
+ if (!snp_vmpl)
+ return -EINVAL;
+
+ local_irq_save(flags);
+
+ call->caa = svsm_get_caa();
+
+ ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
+ attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+
+ *ac = *input;
+
+ /*
+ * Set input registers for the request and set RDX and R8 to known
+ * values in order to detect length values being returned in them.
+ */
+ call->rax = call_id;
+ call->rcx = attest_call_pa;
+ call->rdx = -1;
+ call->r8 = -1;
+ ret = svsm_perform_call_protocol(call);
+ update_attest_input(call, input);
+
+ local_irq_restore(flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
+
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
struct ghcb_state state;
@@ -2299,3 +2549,58 @@ void sev_show_status(void)
}
pr_cont("\n");
}
+
+void __init snp_update_svsm_ca(void)
+{
+ if (!snp_vmpl)
+ return;
+
+ /* Update the CAA to a proper kernel address */
+ boot_svsm_caa = &boot_svsm_ca_page;
+}
+
+#ifdef CONFIG_SYSFS
+static ssize_t vmpl_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", snp_vmpl);
+}
+
+static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
+
+static struct attribute *vmpl_attrs[] = {
+ &vmpl_attr.attr,
+ NULL
+};
+
+static struct attribute_group sev_attr_group = {
+ .attrs = vmpl_attrs,
+};
+
+static int __init sev_sysfs_init(void)
+{
+ struct kobject *sev_kobj;
+ struct device *dev_root;
+ int ret;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return -ENODEV;
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (!dev_root)
+ return -ENODEV;
+
+ sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
+ put_device(dev_root);
+
+ if (!sev_kobj)
+ return -ENOMEM;
+
+ ret = sysfs_create_group(sev_kobj, &sev_attr_group);
+ if (ret)
+ kobject_put(sev_kobj);
+
+ return ret;
+}
+arch_initcall(sev_sysfs_init);
+#endif // CONFIG_SYSFS
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/coco/sev/shared.c
index b4f8fa0f722c..71de53194089 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/coco/sev/shared.c
@@ -21,8 +21,30 @@
#define WARN(condition, format...) (!!(condition))
#define sev_printk(fmt, ...)
#define sev_printk_rtl(fmt, ...)
+#undef vc_forward_exception
+#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
#endif
+/*
+ * SVSM related information:
+ * When running under an SVSM, the VMPL that Linux is executing at must be
+ * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
+ *
+ * During boot, the page tables are set up as identity mapped and later
+ * changed to use kernel virtual addresses. Maintain separate virtual and
+ * physical addresses for the CAA to allow SVSM functions to be used during
+ * early boot, both with identity mapped virtual addresses and proper kernel
+ * virtual addresses.
+ */
+u8 snp_vmpl __ro_after_init;
+EXPORT_SYMBOL_GPL(snp_vmpl);
+static struct svsm_ca *boot_svsm_caa __ro_after_init;
+static u64 boot_svsm_caa_pa __ro_after_init;
+
+static struct svsm_ca *svsm_get_caa(void);
+static u64 svsm_get_caa_pa(void);
+static int svsm_perform_call_protocol(struct svsm_call *call);
+
/* I/O parameters for CPUID-related helpers */
struct cpuid_leaf {
u32 fn;
@@ -229,6 +251,126 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
return ES_VMM_ERROR;
}
+static inline int svsm_process_result_codes(struct svsm_call *call)
+{
+ switch (call->rax_out) {
+ case SVSM_SUCCESS:
+ return 0;
+ case SVSM_ERR_INCOMPLETE:
+ case SVSM_ERR_BUSY:
+ return -EAGAIN;
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Issue a VMGEXIT to call the SVSM:
+ * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9)
+ * - Set the CA call pending field to 1
+ * - Issue VMGEXIT
+ * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9)
+ * - Perform atomic exchange of the CA call pending field
+ *
+ * - See the "Secure VM Service Module for SEV-SNP Guests" specification for
+ * details on the calling convention.
+ * - The calling convention loosely follows the Microsoft X64 calling
+ * convention by putting arguments in RCX, RDX, R8 and R9.
+ * - RAX specifies the SVSM protocol/callid as input and the return code
+ * as output.
+ */
+static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending)
+{
+ register unsigned long rax asm("rax") = call->rax;
+ register unsigned long rcx asm("rcx") = call->rcx;
+ register unsigned long rdx asm("rdx") = call->rdx;
+ register unsigned long r8 asm("r8") = call->r8;
+ register unsigned long r9 asm("r9") = call->r9;
+
+ call->caa->call_pending = 1;
+
+ asm volatile("rep; vmmcall\n\t"
+ : "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9)
+ : : "memory");
+
+ *pending = xchg(&call->caa->call_pending, *pending);
+
+ call->rax_out = rax;
+ call->rcx_out = rcx;
+ call->rdx_out = rdx;
+ call->r8_out = r8;
+ call->r9_out = r9;
+}
+
+static int svsm_perform_msr_protocol(struct svsm_call *call)
+{
+ u8 pending = 0;
+ u64 val, resp;
+
+ /*
+ * When using the MSR protocol, be sure to save and restore
+ * the current MSR value.
+ */
+ val = sev_es_rd_ghcb_msr();
+
+ sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0));
+
+ svsm_issue_call(call, &pending);
+
+ resp = sev_es_rd_ghcb_msr();
+
+ sev_es_wr_ghcb_msr(val);
+
+ if (pending)
+ return -EINVAL;
+
+ if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
+ return -EINVAL;
+
+ if (GHCB_MSR_VMPL_RESP_VAL(resp))
+ return -EINVAL;
+
+ return svsm_process_result_codes(call);
+}
+
+static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
+{
+ struct es_em_ctxt ctxt;
+ u8 pending = 0;
+
+ vc_ghcb_invalidate(ghcb);
+
+ /*
+ * Fill in protocol and format specifiers. This can be called very early
+ * in the boot, so use rip-relative references as needed.
+ */
+ ghcb->protocol_version = RIP_REL_REF(ghcb_version);
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+
+ svsm_issue_call(call, &pending);
+
+ if (pending)
+ return -EINVAL;
+
+ switch (verify_exception_info(ghcb, &ctxt)) {
+ case ES_OK:
+ break;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ fallthrough;
+ default:
+ return -EINVAL;
+ }
+
+ return svsm_process_result_codes(call);
+}
+
static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
struct es_em_ctxt *ctxt,
u64 exit_code, u64 exit_info_1,
@@ -1079,38 +1221,268 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
}
}
-static void pvalidate_pages(struct snp_psc_desc *desc)
+static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
+ int ret, u64 svsm_ret)
+{
+ WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
+ pfn, action, page_size, ret, svsm_ret);
+
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
+{
+ unsigned int page_size;
+ bool action;
+ u64 pfn;
+
+ pfn = pc->entry[pc->cur_index].pfn;
+ action = pc->entry[pc->cur_index].action;
+ page_size = pc->entry[pc->cur_index].page_size;
+
+ __pval_terminate(pfn, action, page_size, ret, svsm_ret);
+}
+
+static void svsm_pval_4k_page(unsigned long paddr, bool validate)
+{
+ struct svsm_pvalidate_call *pc;
+ struct svsm_call call = {};
+ unsigned long flags;
+ u64 pc_pa;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_local_irq_save();
+
+ call.caa = svsm_get_caa();
+
+ pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
+ pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+
+ pc->num_entries = 1;
+ pc->cur_index = 0;
+ pc->entry[0].page_size = RMP_PG_SIZE_4K;
+ pc->entry[0].action = validate;
+ pc->entry[0].ignore_cf = 0;
+ pc->entry[0].pfn = paddr >> PAGE_SHIFT;
+
+ /* Protocol 0, Call ID 1 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
+ call.rcx = pc_pa;
+
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+
+ native_local_irq_restore(flags);
+}
+
+static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, bool validate)
+{
+ int ret;
+
+ /*
+ * This can be called very early during boot, so use rIP-relative
+ * references as needed.
+ */
+ if (RIP_REL_REF(snp_vmpl)) {
+ svsm_pval_4k_page(paddr, validate);
+ } else {
+ ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+ if (ret)
+ __pval_terminate(PHYS_PFN(paddr), validate, RMP_PG_SIZE_4K, ret, 0);
+ }
+}
+
+static void pval_pages(struct snp_psc_desc *desc)
{
struct psc_entry *e;
unsigned long vaddr;
unsigned int size;
unsigned int i;
bool validate;
+ u64 pfn;
int rc;
for (i = 0; i <= desc->hdr.end_entry; i++) {
e = &desc->entries[i];
- vaddr = (unsigned long)pfn_to_kaddr(e->gfn);
+ pfn = e->gfn;
+ vaddr = (unsigned long)pfn_to_kaddr(pfn);
size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
validate = e->operation == SNP_PAGE_STATE_PRIVATE;
rc = pvalidate(vaddr, size, validate);
+ if (!rc)
+ continue;
+
if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
unsigned long vaddr_end = vaddr + PMD_SIZE;
- for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
+ for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
if (rc)
- break;
+ __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
}
+ } else {
+ __pval_terminate(pfn, validate, size, rc, 0);
}
+ }
+}
+
+static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
+ struct svsm_pvalidate_call *pc)
+{
+ struct svsm_pvalidate_entry *pe;
+
+ /* Nothing in the CA yet */
+ pc->num_entries = 0;
+ pc->cur_index = 0;
+
+ pe = &pc->entry[0];
+
+ while (pfn < pfn_end) {
+ pe->page_size = RMP_PG_SIZE_4K;
+ pe->action = action;
+ pe->ignore_cf = 0;
+ pe->pfn = pfn;
+
+ pe++;
+ pfn++;
+
+ pc->num_entries++;
+ if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
+ break;
+ }
+
+ return pfn;
+}
+
+static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
+ struct svsm_pvalidate_call *pc)
+{
+ struct svsm_pvalidate_entry *pe;
+ struct psc_entry *e;
+
+ /* Nothing in the CA yet */
+ pc->num_entries = 0;
+ pc->cur_index = 0;
+
+ pe = &pc->entry[0];
+ e = &desc->entries[desc_entry];
+
+ while (desc_entry <= desc->hdr.end_entry) {
+ pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
+ pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
+ pe->ignore_cf = 0;
+ pe->pfn = e->gfn;
+
+ pe++;
+ e++;
+
+ desc_entry++;
+ pc->num_entries++;
+ if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
+ break;
+ }
+
+ return desc_entry;
+}
+
+static void svsm_pval_pages(struct snp_psc_desc *desc)
+{
+ struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
+ unsigned int i, pv_4k_count = 0;
+ struct svsm_pvalidate_call *pc;
+ struct svsm_call call = {};
+ unsigned long flags;
+ bool action;
+ u64 pc_pa;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_local_irq_save();
+
+ /*
+ * The SVSM calling area (CA) can support processing 510 entries at a
+ * time. Loop through the Page State Change descriptor until the CA is
+ * full or the last entry in the descriptor is reached, at which time
+ * the SVSM is invoked. This repeats until all entries in the descriptor
+ * are processed.
+ */
+ call.caa = svsm_get_caa();
+
+ pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
+ pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+
+ /* Protocol 0, Call ID 1 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
+ call.rcx = pc_pa;
+
+ for (i = 0; i <= desc->hdr.end_entry;) {
+ i = svsm_build_ca_from_psc_desc(desc, i, pc);
+
+ do {
+ ret = svsm_perform_call_protocol(&call);
+ if (!ret)
+ continue;
+
+ /*
+ * Check if the entry failed because of an RMP mismatch (a
+ * PVALIDATE at 2M was requested, but the page is mapped in
+ * the RMP as 4K).
+ */
- if (rc) {
- WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc);
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
+ pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
+ /* Save this entry for post-processing at 4K */
+ pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
+
+ /* Skip to the next one unless at the end of the list */
+ pc->cur_index++;
+ if (pc->cur_index < pc->num_entries)
+ ret = -EAGAIN;
+ else
+ ret = 0;
+ }
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+ }
+
+ /* Process any entries that failed to be validated at 2M and validate them at 4K */
+ for (i = 0; i < pv_4k_count; i++) {
+ u64 pfn, pfn_end;
+
+ action = pv_4k[i].action;
+ pfn = pv_4k[i].pfn;
+ pfn_end = pfn + 512;
+
+ while (pfn < pfn_end) {
+ pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
+
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
}
}
+
+ native_local_irq_restore(flags);
+}
+
+static void pvalidate_pages(struct snp_psc_desc *desc)
+{
+ if (snp_vmpl)
+ svsm_pval_pages(desc);
+ else
+ pval_pages(desc);
}
static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
@@ -1269,3 +1641,77 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
return ES_UNSUPPORTED;
}
+
+/*
+ * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM
+ * services needed when not running in VMPL0.
+ */
+static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
+{
+ struct snp_secrets_page *secrets_page;
+ struct snp_cpuid_table *cpuid_table;
+ unsigned int i;
+ u64 caa;
+
+ BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE);
+
+ /*
+ * Check if running at VMPL0.
+ *
+ * Use RMPADJUST (see the rmpadjust() function for a description of what
+ * the instruction does) to update the VMPL1 permissions of a page. If
+ * the guest is running at VMPL0, this will succeed and implies there is
+ * no SVSM. If the guest is running at any other VMPL, this will fail.
+ * Linux SNP guests only ever run at a single VMPL level so permission mask
+ * changes of a lesser-privileged VMPL are a don't-care.
+ *
+ * Use a rip-relative reference to obtain the proper address, since this
+ * routine is running identity mapped when called, both by the decompressor
+ * code and the early kernel code.
+ */
+ if (!rmpadjust((unsigned long)&RIP_REL_REF(boot_ghcb_page), RMP_PG_SIZE_4K, 1))
+ return false;
+
+ /*
+ * Not running at VMPL0, ensure everything has been properly supplied
+ * for running under an SVSM.
+ */
+ if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE);
+
+ secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys;
+ if (!secrets_page->svsm_size)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM);
+
+ if (!secrets_page->svsm_guest_vmpl)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0);
+
+ RIP_REL_REF(snp_vmpl) = secrets_page->svsm_guest_vmpl;
+
+ caa = secrets_page->svsm_caa;
+
+ /*
+ * An open-coded PAGE_ALIGNED() in order to avoid including
+ * kernel-proper headers into the decompressor.
+ */
+ if (caa & (PAGE_SIZE - 1))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA);
+
+ /*
+ * The CA is identity mapped when this routine is called, both by the
+ * decompressor code and the early kernel code.
+ */
+ RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)caa;
+ RIP_REL_REF(boot_svsm_caa_pa) = caa;
+
+ /* Advertise the SVSM presence via CPUID. */
+ cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table();
+ for (i = 0; i < cpuid_table->count; i++) {
+ struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+ if (fn->eax_in == 0x8000001f)
+ fn->eax |= BIT(28);
+ }
+
+ return true;
+}
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index c1cb90369915..078e2bac2553 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@
#include <linux/cpufeature.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kexec.h>
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
@@ -14,6 +15,7 @@
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
+#include <asm/set_memory.h>
/* MMIO direction */
#define EPT_READ 0
@@ -38,6 +40,8 @@
#define TDREPORT_SUBTYPE_0 0
+static atomic_long_t nr_shared;
+
/* Called from __tdx_hypercall() for unrecoverable failure */
noinstr void __noreturn __tdx_hypercall_failed(void)
{
@@ -798,28 +802,124 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
return true;
}
-static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
- bool enc)
+static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
+ bool enc)
{
/*
* Only handle shared->private conversion here.
* See the comment in tdx_early_init().
*/
- if (enc)
- return tdx_enc_status_changed(vaddr, numpages, enc);
- return true;
+ if (enc && !tdx_enc_status_changed(vaddr, numpages, enc))
+ return -EIO;
+
+ return 0;
}
-static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
bool enc)
{
/*
* Only handle private->shared conversion here.
* See the comment in tdx_early_init().
*/
- if (!enc)
- return tdx_enc_status_changed(vaddr, numpages, enc);
- return true;
+ if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc))
+ return -EIO;
+
+ if (enc)
+ atomic_long_sub(numpages, &nr_shared);
+ else
+ atomic_long_add(numpages, &nr_shared);
+
+ return 0;
+}
+
+/* Stop new private<->shared conversions */
+static void tdx_kexec_begin(void)
+{
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ /*
+ * Crash kernel reaches here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!set_memory_enc_stop_conversion())
+ pr_warn("Failed to stop shared<->private conversions\n");
+}
+
+/* Walk direct mapping and convert all shared memory back to private */
+static void tdx_kexec_finish(void)
+{
+ unsigned long addr, end;
+ long found = 0, shared;
+
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ lockdep_assert_irqs_disabled();
+
+ addr = PAGE_OFFSET;
+ end = PAGE_OFFSET + get_max_mapped();
+
+ while (addr < end) {
+ unsigned long size;
+ unsigned int level;
+ pte_t *pte;
+
+ pte = lookup_address(addr, &level);
+ size = page_level_size(level);
+
+ if (pte && pte_decrypted(*pte)) {
+ int pages = size / PAGE_SIZE;
+
+ /*
+ * Touching memory with shared bit set triggers implicit
+ * conversion to shared.
+ *
+ * Make sure nobody touches the shared range from
+ * now on.
+ */
+ set_pte(pte, __pte(0));
+
+ /*
+ * Memory encryption state persists across kexec.
+ * If tdx_enc_status_changed() fails in the first
+ * kernel, it leaves memory in an unknown state.
+ *
+ * If that memory remains shared, accessing it in the
+ * *next* kernel through a private mapping will result
+ * in an unrecoverable guest shutdown.
+ *
+ * The kdump kernel boot is not impacted as it uses
+ * a pre-reserved memory range that is always private.
+ * However, gathering crash information could lead to
+ * a crash if it accesses unconverted memory through
+ * a private mapping which is possible when accessing
+ * that memory through /proc/vmcore, for example.
+ *
+ * In all cases, print error info in order to leave
+ * enough bread crumbs for debugging.
+ */
+ if (!tdx_enc_status_changed(addr, pages, true)) {
+ pr_err("Failed to unshare range %#lx-%#lx\n",
+ addr, addr + size);
+ }
+
+ found += pages;
+ }
+
+ addr += size;
+ }
+
+ __flush_tlb_all();
+
+ shared = atomic_long_read(&nr_shared);
+ if (shared != found) {
+ pr_err("shared page accounting is off\n");
+ pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
+ }
}
void __init tdx_early_init(void)
@@ -881,6 +981,9 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+
/*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 11c9b8efdc4c..ed0a5f2dc129 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -89,10 +89,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
cld
- IBRS_ENTER
- UNTRAIN_RET
- CLEAR_BRANCH_HISTORY
-
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -116,6 +112,16 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
+ /*
+ * CPU bugs mitigations mechanisms can call other functions. They
+ * should be invoked after making sure TF is cleared because
+ * single-step is ignored only for instructions inside the
+ * entry_SYSENTER_compat function.
+ */
+ IBRS_ENTER
+ UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
+
movq %rsp, %rdi
call do_SYSENTER_32
jmp sysret32_from_system_call
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index c2235bae17ef..8cc9950d7104 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -14,9 +14,12 @@
#endif
#define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
-
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __ia32_##sym(const struct pt_regs *);
#include <asm/syscalls_32.h>
-#undef __SYSCALL
+#undef __SYSCALL
+
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
/*
* The sys_call_table[] is no longer used for system calls, but
@@ -28,11 +31,10 @@
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
-#undef __SYSCALL
+#undef __SYSCALL
#endif
#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
-
long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 33b3f09e6f15..ba8354424860 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,8 +8,12 @@
#include <asm/syscall.h>
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
-#undef __SYSCALL
+#undef __SYSCALL
+
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
/*
* The sys_call_table[] is no longer used for system calls, but
@@ -20,10 +24,9 @@
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
-#undef __SYSCALL
+#undef __SYSCALL
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 03de4a932131..fb77908f44f3 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,11 +8,14 @@
#include <asm/syscall.h>
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_x32.h>
-#undef __SYSCALL
+#undef __SYSCALL
-#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+#undef __SYSCALL_NORETURN
+#define __SYSCALL_NORETURN __SYSCALL
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7fd1f57ad3d3..534c74b14fab 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# 32-bit system call numbers and entry vectors
#
# The format is:
-# <number> <abi> <name> <entry point> <compat entry point>
+# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
#
# The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for
# sys_*() system calls and compat_sys_*() compat system calls if
@@ -12,7 +13,7 @@
# The abi is always "i386" for this file.
#
0 i386 restart_syscall sys_restart_syscall
-1 i386 exit sys_exit
+1 i386 exit sys_exit - noreturn
2 i386 fork sys_fork
3 i386 read sys_read
4 i386 write sys_write
@@ -263,7 +264,7 @@
249 i386 io_cancel sys_io_cancel
250 i386 fadvise64 sys_ia32_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
-252 i386 exit_group sys_exit_group
+252 i386 exit_group sys_exit_group - noreturn
253 i386 lookup_dcookie
254 i386 epoll_create sys_epoll_create
255 i386 epoll_ctl sys_epoll_ctl
@@ -420,7 +421,7 @@
412 i386 utimensat_time64 sys_utimensat
413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 i386 io_pgetevents_time64 sys_io_pgetevents
+416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 i386 mq_timedsend_time64 sys_mq_timedsend
419 i386 mq_timedreceive_time64 sys_mq_timedreceive
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index a396f6e6ab5b..097e5a18db52 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# 64-bit system call numbers and entry vectors
#
# The format is:
-# <number> <abi> <name> <entry point>
+# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
#
# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
#
@@ -68,7 +69,7 @@
57 common fork sys_fork
58 common vfork sys_vfork
59 64 execve sys_execve
-60 common exit sys_exit
+60 common exit sys_exit - noreturn
61 common wait4 sys_wait4
62 common kill sys_kill
63 common uname sys_newuname
@@ -239,7 +240,7 @@
228 common clock_gettime sys_clock_gettime
229 common clock_getres sys_clock_getres
230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
+231 common exit_group sys_exit_group - noreturn
232 common epoll_wait sys_epoll_wait
233 common epoll_ctl sys_epoll_ctl
234 common tgkill sys_tgkill
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 38c1b1f1deaa..0e835dc134a5 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4698,8 +4698,8 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
static inline bool intel_pmu_broken_perf_cap(void)
{
/* The Perf Metric (Bit 15) is always cleared */
- if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) ||
- (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L))
+ if (boot_cpu_data.x86_vfm == INTEL_METEORLAKE ||
+ boot_cpu_data.x86_vfm == INTEL_METEORLAKE_L)
return true;
return false;
@@ -5187,35 +5187,35 @@ static __init void intel_clovertown_quirk(void)
}
static const struct x86_cpu_desc isolation_ucodes[] = {
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_HASWELL, 3, 0x0000001f),
+ INTEL_CPU_DESC(INTEL_HASWELL_L, 1, 0x0000001e),
+ INTEL_CPU_DESC(INTEL_HASWELL_G, 1, 0x00000015),
+ INTEL_CPU_DESC(INTEL_HASWELL_X, 2, 0x00000037),
+ INTEL_CPU_DESC(INTEL_HASWELL_X, 4, 0x0000000a),
+ INTEL_CPU_DESC(INTEL_BROADWELL, 4, 0x00000023),
+ INTEL_CPU_DESC(INTEL_BROADWELL_G, 1, 0x00000014),
+ INTEL_CPU_DESC(INTEL_BROADWELL_D, 2, 0x00000010),
+ INTEL_CPU_DESC(INTEL_BROADWELL_D, 3, 0x07000009),
+ INTEL_CPU_DESC(INTEL_BROADWELL_D, 4, 0x0f000009),
+ INTEL_CPU_DESC(INTEL_BROADWELL_D, 5, 0x0e000002),
+ INTEL_CPU_DESC(INTEL_BROADWELL_X, 1, 0x0b000014),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_X, 3, 0x00000021),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_X, 4, 0x00000000),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_X, 5, 0x00000000),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_X, 6, 0x00000000),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_X, 7, 0x00000000),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_X, 11, 0x00000000),
+ INTEL_CPU_DESC(INTEL_SKYLAKE_L, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_SKYLAKE, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_KABYLAKE, 9, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE_L, 9, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE_L, 10, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE_L, 11, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE_L, 12, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE, 10, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE, 11, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE, 12, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_KABYLAKE, 13, 0x0000004e),
{}
};
@@ -5232,9 +5232,9 @@ static __init void intel_pebs_isolation_quirk(void)
}
static const struct x86_cpu_desc pebs_ucodes[] = {
- INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028),
- INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618),
- INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c),
+ INTEL_CPU_DESC(INTEL_SANDYBRIDGE, 7, 0x00000028),
+ INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 6, 0x00000618),
+ INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 7, 0x0000070c),
{}
};
@@ -6238,19 +6238,19 @@ __init int intel_pmu_init(void)
/*
* Install the hw-cache-events table:
*/
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_CORE_YONAH:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_CORE_YONAH:
pr_cont("Core events, ");
name = "core";
break;
- case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
fallthrough;
- case INTEL_FAM6_CORE2_MEROM_L:
- case INTEL_FAM6_CORE2_PENRYN:
- case INTEL_FAM6_CORE2_DUNNINGTON:
+ case INTEL_CORE2_MEROM_L:
+ case INTEL_CORE2_PENRYN:
+ case INTEL_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6262,9 +6262,9 @@ __init int intel_pmu_init(void)
name = "core2";
break;
- case INTEL_FAM6_NEHALEM:
- case INTEL_FAM6_NEHALEM_EP:
- case INTEL_FAM6_NEHALEM_EX:
+ case INTEL_NEHALEM:
+ case INTEL_NEHALEM_EP:
+ case INTEL_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -6296,11 +6296,11 @@ __init int intel_pmu_init(void)
name = "nehalem";
break;
- case INTEL_FAM6_ATOM_BONNELL:
- case INTEL_FAM6_ATOM_BONNELL_MID:
- case INTEL_FAM6_ATOM_SALTWELL:
- case INTEL_FAM6_ATOM_SALTWELL_MID:
- case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+ case INTEL_ATOM_BONNELL:
+ case INTEL_ATOM_BONNELL_MID:
+ case INTEL_ATOM_SALTWELL:
+ case INTEL_ATOM_SALTWELL_MID:
+ case INTEL_ATOM_SALTWELL_TABLET:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6313,11 +6313,11 @@ __init int intel_pmu_init(void)
name = "bonnell";
break;
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_D:
- case INTEL_FAM6_ATOM_SILVERMONT_MID:
- case INTEL_FAM6_ATOM_AIRMONT:
- case INTEL_FAM6_ATOM_AIRMONT_MID:
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_SILVERMONT_D:
+ case INTEL_ATOM_SILVERMONT_MID:
+ case INTEL_ATOM_AIRMONT:
+ case INTEL_ATOM_AIRMONT_MID:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -6335,8 +6335,8 @@ __init int intel_pmu_init(void)
name = "silvermont";
break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GOLDMONT_D:
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_D:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -6362,7 +6362,7 @@ __init int intel_pmu_init(void)
name = "goldmont";
break;
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_ATOM_GOLDMONT_PLUS:
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -6391,9 +6391,9 @@ __init int intel_pmu_init(void)
name = "goldmont_plus";
break;
- case INTEL_FAM6_ATOM_TREMONT_D:
- case INTEL_FAM6_ATOM_TREMONT:
- case INTEL_FAM6_ATOM_TREMONT_L:
+ case INTEL_ATOM_TREMONT_D:
+ case INTEL_ATOM_TREMONT:
+ case INTEL_ATOM_TREMONT_L:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6420,7 +6420,7 @@ __init int intel_pmu_init(void)
name = "Tremont";
break;
- case INTEL_FAM6_ATOM_GRACEMONT:
+ case INTEL_ATOM_GRACEMONT:
intel_pmu_init_grt(NULL);
intel_pmu_pebs_data_source_grt();
x86_pmu.pebs_latency_data = adl_latency_data_small;
@@ -6432,8 +6432,8 @@ __init int intel_pmu_init(void)
name = "gracemont";
break;
- case INTEL_FAM6_ATOM_CRESTMONT:
- case INTEL_FAM6_ATOM_CRESTMONT_X:
+ case INTEL_ATOM_CRESTMONT:
+ case INTEL_ATOM_CRESTMONT_X:
intel_pmu_init_grt(NULL);
x86_pmu.extra_regs = intel_cmt_extra_regs;
intel_pmu_pebs_data_source_cmt();
@@ -6446,9 +6446,9 @@ __init int intel_pmu_init(void)
name = "crestmont";
break;
- case INTEL_FAM6_WESTMERE:
- case INTEL_FAM6_WESTMERE_EP:
- case INTEL_FAM6_WESTMERE_EX:
+ case INTEL_WESTMERE:
+ case INTEL_WESTMERE_EP:
+ case INTEL_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -6477,8 +6477,8 @@ __init int intel_pmu_init(void)
name = "westmere";
break;
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_SANDYBRIDGE_X:
+ case INTEL_SANDYBRIDGE:
+ case INTEL_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -6491,7 +6491,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
+ if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -6517,8 +6517,8 @@ __init int intel_pmu_init(void)
name = "sandybridge";
break;
- case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_IVYBRIDGE_X:
+ case INTEL_IVYBRIDGE:
+ case INTEL_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6534,7 +6534,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
+ if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -6556,10 +6556,10 @@ __init int intel_pmu_init(void)
break;
- case INTEL_FAM6_HASWELL:
- case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_HASWELL_L:
- case INTEL_FAM6_HASWELL_G:
+ case INTEL_HASWELL:
+ case INTEL_HASWELL_X:
+ case INTEL_HASWELL_L:
+ case INTEL_HASWELL_G:
x86_add_quirk(intel_ht_bug);
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
@@ -6589,10 +6589,10 @@ __init int intel_pmu_init(void)
name = "haswell";
break;
- case INTEL_FAM6_BROADWELL:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_G:
- case INTEL_FAM6_BROADWELL_X:
+ case INTEL_BROADWELL:
+ case INTEL_BROADWELL_D:
+ case INTEL_BROADWELL_G:
+ case INTEL_BROADWELL_X:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -6631,8 +6631,8 @@ __init int intel_pmu_init(void)
name = "broadwell";
break;
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
+ case INTEL_XEON_PHI_KNL:
+ case INTEL_XEON_PHI_KNM:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -6651,15 +6651,15 @@ __init int intel_pmu_init(void)
name = "knights-landing";
break;
- case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_SKYLAKE_X:
pmem = true;
fallthrough;
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_COMETLAKE_L:
- case INTEL_FAM6_COMETLAKE:
+ case INTEL_SKYLAKE_L:
+ case INTEL_SKYLAKE:
+ case INTEL_KABYLAKE_L:
+ case INTEL_KABYLAKE:
+ case INTEL_COMETLAKE_L:
+ case INTEL_COMETLAKE:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -6708,16 +6708,16 @@ __init int intel_pmu_init(void)
name = "skylake";
break;
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_ICELAKE_D:
+ case INTEL_ICELAKE_X:
+ case INTEL_ICELAKE_D:
x86_pmu.pebs_ept = 1;
pmem = true;
fallthrough;
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_ROCKETLAKE:
+ case INTEL_ICELAKE_L:
+ case INTEL_ICELAKE:
+ case INTEL_TIGERLAKE_L:
+ case INTEL_TIGERLAKE:
+ case INTEL_ROCKETLAKE:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -6752,13 +6752,13 @@ __init int intel_pmu_init(void)
name = "icelake";
break;
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_EMERALDRAPIDS_X:
+ case INTEL_SAPPHIRERAPIDS_X:
+ case INTEL_EMERALDRAPIDS_X:
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.extra_regs = intel_glc_extra_regs;
fallthrough;
- case INTEL_FAM6_GRANITERAPIDS_X:
- case INTEL_FAM6_GRANITERAPIDS_D:
+ case INTEL_GRANITERAPIDS_X:
+ case INTEL_GRANITERAPIDS_D:
intel_pmu_init_glc(NULL);
if (!x86_pmu.extra_regs)
x86_pmu.extra_regs = intel_rwc_extra_regs;
@@ -6776,11 +6776,11 @@ __init int intel_pmu_init(void)
name = "sapphire_rapids";
break;
- case INTEL_FAM6_ALDERLAKE:
- case INTEL_FAM6_ALDERLAKE_L:
- case INTEL_FAM6_RAPTORLAKE:
- case INTEL_FAM6_RAPTORLAKE_P:
- case INTEL_FAM6_RAPTORLAKE_S:
+ case INTEL_ALDERLAKE:
+ case INTEL_ALDERLAKE_L:
+ case INTEL_RAPTORLAKE:
+ case INTEL_RAPTORLAKE_P:
+ case INTEL_RAPTORLAKE_S:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
@@ -6838,8 +6838,8 @@ __init int intel_pmu_init(void)
name = "alderlake_hybrid";
break;
- case INTEL_FAM6_METEORLAKE:
- case INTEL_FAM6_METEORLAKE_L:
+ case INTEL_METEORLAKE:
+ case INTEL_METEORLAKE_L:
intel_pmu_init_hybrid(hybrid_big_small);
x86_pmu.pebs_latency_data = mtl_latency_data_small;
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 0c5e7a7c43ac..b985ca79cf97 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -765,51 +765,51 @@ static struct rapl_model model_amd_hygon = {
};
static const struct x86_cpu_id rapl_model_match[] __initconst = {
- X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
- X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE_H, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &model_skl),
+ X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE, &model_snb),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &model_snbep),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE, &model_snb),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &model_snbep),
+ X86_MATCH_VFM(INTEL_HASWELL, &model_hsw),
+ X86_MATCH_VFM(INTEL_HASWELL_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_HASWELL_L, &model_hsw),
+ X86_MATCH_VFM(INTEL_HASWELL_G, &model_hsw),
+ X86_MATCH_VFM(INTEL_BROADWELL, &model_hsw),
+ X86_MATCH_VFM(INTEL_BROADWELL_G, &model_hsw),
+ X86_MATCH_VFM(INTEL_BROADWELL_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_BROADWELL_D, &model_hsx),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &model_knl),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &model_knl),
+ X86_MATCH_VFM(INTEL_SKYLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_SKYLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_KABYLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_KABYLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_CANNONLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &model_hsw),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &model_hsw),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &model_hsw),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ICELAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, &model_hsx),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_COMETLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_COMETLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &model_skl),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &model_spr),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &model_spr),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &model_skl),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &model_skl),
+ X86_MATCH_VFM(INTEL_METEORLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl),
+ X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 768d73de0d09..b4a851d27c7c 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -523,9 +523,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
* transition is complete, hv_vtom_set_host_visibility() marks the pages
* as "present" again.
*/
-static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
+static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
{
- return !set_memory_np(kbuffer, pagecount);
+ return set_memory_np(kbuffer, pagecount);
}
/*
@@ -536,20 +536,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc
* with host. This function works as wrap of hv_mark_gpa_visibility()
* with memory base and size.
*/
-static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
+static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
{
enum hv_mem_host_visibility visibility = enc ?
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
u64 *pfn_array;
phys_addr_t paddr;
+ int i, pfn, err;
void *vaddr;
int ret = 0;
- bool result = true;
- int i, pfn;
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
if (!pfn_array) {
- result = false;
+ ret = -ENOMEM;
goto err_set_memory_p;
}
@@ -568,10 +567,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
ret = hv_mark_gpa_visibility(pfn, pfn_array,
visibility);
- if (ret) {
- result = false;
+ if (ret)
goto err_free_pfn_array;
- }
pfn = 0;
}
}
@@ -586,10 +583,11 @@ err_set_memory_p:
* order to avoid leaving the memory range in a "broken" state. Setting
* the PRESENT bits shouldn't fail, but return an error if it does.
*/
- if (set_memory_p(kbuffer, pagecount))
- result = false;
+ err = set_memory_p(kbuffer, pagecount);
+ if (err && !ret)
+ ret = err;
- return result;
+ return ret;
}
static bool hv_vtom_tlb_flush_required(bool private)
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 5af926c050f0..21bc53f5ed0c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -78,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void)
#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+union acpi_subtable_headers;
+
+int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+ const unsigned long end);
+
+void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
+
/*
* Check if the CPU can handle C2 and deeper
*/
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ba99ef75f56c..ca9ae606aab9 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -156,102 +156,50 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALT_CALL_INSTR "call BUG_func"
-#define b_replacement(num) "664"#num
-#define e_replacement(num) "665"#num
+#define alt_slen "772b-771b"
+#define alt_total_slen "773b-771b"
+#define alt_rlen "775f-774f"
-#define alt_end_marker "663"
-#define alt_slen "662b-661b"
-#define alt_total_slen alt_end_marker"b-661b"
-#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
-
-#define OLDINSTR(oldinstr, num) \
- "# ALT: oldnstr\n" \
- "661:\n\t" oldinstr "\n662:\n" \
+#define OLDINSTR(oldinstr) \
+ "# ALT: oldinstr\n" \
+ "771:\n\t" oldinstr "\n772:\n" \
"# ALT: padding\n" \
- ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
- alt_end_marker ":\n"
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
-
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "# ALT: oldinstr2\n" \
- "661:\n\t" oldinstr "\n662:\n" \
- "# ALT: padding2\n" \
- ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define OLDINSTR_3(oldinsn, n1, n2, n3) \
- "# ALT: oldinstr3\n" \
- "661:\n\t" oldinsn "\n662:\n" \
- "# ALT: padding3\n" \
- ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(ft_flags, num) \
- " .long 661b - .\n" /* label */ \
- " .long " b_replacement(num)"f - .\n" /* new instruction */ \
+ ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "773:\n"
+
+#define ALTINSTR_ENTRY(ft_flags) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ " .long 771b - .\n" /* label */ \
+ " .long 774f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */
+ " .byte " alt_rlen "\n" /* replacement len */ \
+ ".popsection\n"
-#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
- "# ALT: replacement " #num "\n" \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
+#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ "# ALT: replacement\n" \
+ "774:\n\t" newinstr "\n775:\n" \
+ ".popsection\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr, 1) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags, 1) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, 1) \
- ".popsection\n"
+ OLDINSTR(oldinstr) \
+ ALTINSTR_ENTRY(ft_flags) \
+ ALTINSTR_REPLACEMENT(newinstr)
#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
- OLDINSTR_2(oldinstr, 1, 2) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, 2) \
- ".popsection\n"
+ ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
- ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
- newinstr_yes, ft_flags)
-
-#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \
- newinsn3, ft_flags3) \
- OLDINSTR_3(oldinsn, 1, 2, 3) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ALTINSTR_ENTRY(ft_flags3, 3) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, 3) \
- ".popsection\n"
+ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags)
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \
+ newinstr3, ft_flags3)
/*
* Alternative instructions for different CPU types or capabilities.
@@ -266,14 +214,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, ft_flags) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory")
-#define alternative_ternary(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
- asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) ::: "memory")
-
/*
* Alternative inline assembly with input.
*
@@ -283,18 +228,28 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Leaving an unused argument 0 to keep API compatibility.
*/
#define alternative_input(oldinstr, newinstr, ft_flags, input...) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: output : "i" (0), ## input)
-/* Like alternative_io, but for replacing a direct call with another one. */
-#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
- : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+/*
+ * Like alternative_io, but for replacing a direct call with another one.
+ *
+ * Use the %c operand modifier which is the generic way to print a bare
+ * constant expression with all syntax-specific punctuation omitted. %P
+ * is the x86-specific variant which can handle constants too, for
+ * historical reasons, but it should be used primarily for PIC
+ * references: i.e., if used for a function, it would add the PLT
+ * suffix.
+ */
+#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
+ asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
/*
* Like alternative_call, but there are two features and respective functions.
@@ -302,12 +257,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, function1 is used.
* Otherwise, old function is used.
*/
-#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
- output, input...) \
- asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
- "call %c[new2]", ft_flags2) \
- : output, ASM_CALL_CONSTRAINT \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
+ output, input...) \
+ asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
+ "call %c[new2]", ft_flags2) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
[new2] "i" (newfunc2), ## input)
/*
@@ -322,6 +277,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__
+
/* Macro for creating assembler functions avoiding any C magic. */
#define DEFINE_ASM_FUNC(func, instr, sec) \
asm (".pushsection " #sec ", \"ax\"\n" \
@@ -388,22 +345,23 @@ void nop_func(void);
* @newinstr. ".skip" directive takes care of proper instruction padding
* in case @newinstr is longer than @oldinstr.
*/
-.macro ALTERNATIVE oldinstr, newinstr, ft_flags
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f
- .popsection
+#define __ALTERNATIVE(oldinst, newinst, flag) \
+740: \
+ oldinst ; \
+741: \
+ .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\
+742: \
+ .pushsection .altinstructions,"a" ; \
+ altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \
+ .popsection ; \
+ .pushsection .altinstr_replacement,"ax" ; \
+743: \
+ newinst ; \
+744: \
+ .popsection ;
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
+.macro ALTERNATIVE oldinstr, newinstr, ft_flags
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
.endm
#define old_len 141b-140b
@@ -412,65 +370,18 @@ void nop_func(void);
#define new_len3 146f-145f
/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
-
-
-/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
-140:
- \oldinstr
-141:
- .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_2(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+ \newinstr2, \ft_flags2)
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
-140:
- \oldinstr
-141:
- .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
- (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- \newinstr3
-146:
- .popsection
+ __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2),
+ \newinstr3, \ft_flags3)
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 5c37944c8a5e..6f3b6aef47ba 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -21,8 +21,8 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long);
-extern int amd_smn_read(u16 node, u32 address, u32 *value);
-extern int amd_smn_write(u16 node, u32 address, u32 value);
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
+int __must_check amd_smn_write(u16 node, u32 address, u32 value);
struct amd_l3_cache {
unsigned indices;
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index ed2797f132ce..62cef2113ca7 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
- _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
- : [ptr] "+m" (*(_ptr)), \
- "+a" (o.low), "+d" (o.high) \
- : "b" (n.low), "c" (n.high), "S" (_ptr) \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
o.full; \
@@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
- _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
CC_SET(e) \
: CC_OUT(e) (ret), \
- [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
- : "b" (n.low), "c" (n.high), "S" (_ptr) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
if (unlikely(!ret)) \
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index b6325ee30871..3831f612e89c 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -280,10 +280,10 @@ struct x86_cpu_desc {
u32 x86_microcode_rev;
};
-#define INTEL_CPU_DESC(model, stepping, revision) { \
- .x86_family = 6, \
- .x86_vendor = X86_VENDOR_INTEL, \
- .x86_model = (model), \
+#define INTEL_CPU_DESC(vfm, stepping, revision) { \
+ .x86_family = VFM_FAMILY(vfm), \
+ .x86_vendor = VFM_VENDOR(vfm), \
+ .x86_model = VFM_MODEL(vfm), \
.x86_stepping = (stepping), \
.x86_microcode_rev = (revision), \
}
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 3c7434329661..b51e88d01b21 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -18,170 +18,170 @@
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature
+ * bit is not displayed in /proc/cpuinfo at all.
*
* When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c as well.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */
+#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -189,93 +189,93 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
-#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
-#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
-#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
-#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
-#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
-#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
-#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
-#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
-#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */
+#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
-#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
-#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
-#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
-#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */
+#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -283,181 +283,182 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
-#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
-#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
-#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
-#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
-#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
-#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
-#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
-#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
-#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
-#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
-#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
-#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
-#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
-#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
-#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
-#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
-#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
-#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
-#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
-#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
-#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
-#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
-#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
-#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
-#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */
+#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */
+#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
-#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
-#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
-#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
-#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
-#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
-#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
-#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
-#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */
-#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
-#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
-#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
+#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
+#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
-#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
-#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
-#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
-#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
-#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
-#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
-#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
-#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
-#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
-#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
+#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
+#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */
+#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
-#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
-#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
-#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
-#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */
+#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */
+#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */
-#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
-#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
-#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
-#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
-#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
-#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
-#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */
+#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */
+#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
-#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
-#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
-#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
-#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */
-#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
-#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
-#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
-#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
-#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
-#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
-#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
-#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */
+#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */
+#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */
+#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
-#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
-#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
-#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
-#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
-#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
-#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
-#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
-#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
-#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
-#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
-#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
-#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
-#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
-#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
-#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -465,59 +466,59 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
-#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
-#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
-#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
-#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
-#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
-#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
-#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
-#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
-#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
-#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
-#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
-#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
-#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
-#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
-#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
-#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
+#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */
+#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */
+#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
+#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7e523bb3d2d3..fb2809b20b0a 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#endif
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for x86 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 8 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints (see cc_stack_align4/8 in
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints (see cc_stack_align4/8 in
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
* low bits from any entropy chosen here.
*
- * Therefore, final stack offset entropy will be 5 (x86_64) or
- * 6 (ia32) bits.
+ * Therefore, final stack offset entropy will be 7 (x86_64) or
+ * 8 (ia32) bits.
*/
- choose_random_kstack_offset(rdtsc() & 0xFF);
+ choose_random_kstack_offset(rdtsc());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..14d72727d7ee 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -6,6 +6,7 @@
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void (*free_pgt_page)(void *, void *); /* free buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
@@ -16,4 +17,6 @@ struct x86_mapping_info {
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend);
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
+
#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
deleted file mode 100644
index 994638ef171b..000000000000
--- a/arch/x86/include/asm/intel_pconfig.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _ASM_X86_INTEL_PCONFIG_H
-#define _ASM_X86_INTEL_PCONFIG_H
-
-#include <asm/asm.h>
-#include <asm/processor.h>
-
-enum pconfig_target {
- INVALID_TARGET = 0,
- MKTME_TARGET = 1,
- PCONFIG_TARGET_NR
-};
-
-int pconfig_target_supported(enum pconfig_target target);
-
-enum pconfig_leaf {
- MKTME_KEY_PROGRAM = 0,
- PCONFIG_LEAF_INVALID,
-};
-
-#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
-
-/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
-
-/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
-#define MKTME_KEYID_SET_KEY_DIRECT 0
-#define MKTME_KEYID_SET_KEY_RANDOM 1
-#define MKTME_KEYID_CLEAR_KEY 2
-#define MKTME_KEYID_NO_ENCRYPT 3
-
-/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
-#define MKTME_AES_XTS_128 (1 << 8)
-
-/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
-#define MKTME_PROG_SUCCESS 0
-#define MKTME_INVALID_PROG_CMD 1
-#define MKTME_ENTROPY_ERROR 2
-#define MKTME_INVALID_KEYID 3
-#define MKTME_INVALID_ENC_ALG 4
-#define MKTME_DEVICE_BUSY 5
-
-/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */
-struct mktme_key_program {
- u16 keyid;
- u32 keyid_ctrl;
- u8 __rsvd[58];
- u8 key_field_1[64];
- u8 key_field_2[64];
-} __packed __aligned(256);
-
-static inline int mktme_key_program(struct mktme_key_program *key_program)
-{
- unsigned long rax = MKTME_KEY_PROGRAM;
-
- if (!pconfig_target_supported(MKTME_TARGET))
- return -ENXIO;
-
- asm volatile(PCONFIG
- : "=a" (rax), "=b" (key_program)
- : "0" (rax), "1" (key_program)
- : "memory", "cc");
-
- return rax;
-}
-
-#endif /* _ASM_X86_INTEL_PCONFIG_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 8c5ae649d2df..cf7fc2b8e3ce 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -54,6 +54,26 @@ static __always_inline void native_halt(void)
asm volatile("hlt": : :"memory");
}
+static __always_inline int native_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & X86_EFLAGS_IF);
+}
+
+static __always_inline unsigned long native_local_irq_save(void)
+{
+ unsigned long flags = native_save_fl();
+
+ native_irq_disable();
+
+ return flags;
+}
+
+static __always_inline void native_local_irq_restore(unsigned long flags)
+{
+ if (!native_irqs_disabled_flags(flags))
+ native_irq_enable();
+}
+
#endif
#ifdef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index dfd2e9699bd7..3ad29b128943 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -261,7 +261,8 @@ enum mcp_flags {
MCP_DONTLOG = BIT(2), /* only clear, don't log */
MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
};
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
int mce_notify_irq(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e022e6eb766c..01342963011e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -660,6 +660,8 @@
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_SVSM_CAA 0xc001f000
+
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
@@ -1164,6 +1166,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index cc6b8e087192..af4302d79b59 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -54,7 +54,7 @@ static inline void clear_page(void *page)
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
- "0" (page)
+ "D" (page)
: "cc", "memory", "rax", "rcx");
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 65b8e5bb902c..e39311a89bf4 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
+static inline bool pte_decrypted(pte_t pte)
+{
+ return cc_mkdec(pte_val(pte)) == pte_val(pte);
+}
+
#define pmd_dirty pmd_dirty
static inline bool pmd_dirty(pmd_t pmd)
{
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b78644962626..2f321137736c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -549,6 +549,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
+ PG_LEVEL_256T,
PG_LEVEL_NUM
};
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cb4f6c513c48..a75a07f4931f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -692,7 +692,17 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_highest_perf(void);
-extern void amd_clear_divider(void);
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+static __always_inline void amd_clear_divider(void)
+{
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
+}
+
extern void amd_check_microcode(void);
#else
static inline u32 amd_get_highest_perf(void) { return 0; }
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..24e3a53ca255
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n\t" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 9aee31862b4a..4b2abce2e3e7 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
+
+bool set_memory_enc_stop_conversion(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
+
int set_memory_np_noalias(unsigned long addr, int numpages);
int set_memory_nonglobal(unsigned long addr, int numpages);
int set_memory_global(unsigned long addr, int numpages);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index e61e68d71cba..0667b2a88614 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -28,6 +28,8 @@
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
#ifndef __ASSEMBLY__
+#include <linux/cache.h>
+
#include <asm/bootparam.h>
#include <asm/x86_init.h>
@@ -133,6 +135,12 @@ asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data
#endif /* __i386__ */
#endif /* _SETUP */
+#ifdef CONFIG_CMDLINE_BOOL
+extern bool builtin_cmdline_added __ro_after_init;
+#else
+#define builtin_cmdline_added 0
+#endif
+
#else /* __ASSEMBLY */
.macro __RESERVE_BRK name, size
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 5a8246dd532f..e90d403f2068 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -98,6 +98,19 @@ enum psc_op {
/* GHCBData[63:32] */ \
(((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+/* GHCB Run at VMPL Request/Response */
+#define GHCB_MSR_VMPL_REQ 0x016
+#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
+ /* GHCBData[39:32] */ \
+ (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
+ /* GHCBDdata[11:0] */ \
+ GHCB_MSR_VMPL_REQ)
+
+#define GHCB_MSR_VMPL_RESP 0x017
+#define GHCB_MSR_VMPL_RESP_VAL(v) \
+ /* GHCBData[63:32] */ \
+ (((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
+
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
@@ -109,6 +122,7 @@ enum psc_op {
#define GHCB_HV_FT_SNP BIT_ULL(0)
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5)
/*
* SNP Page State Change NAE event
@@ -163,6 +177,10 @@ struct snp_psc_desc {
#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
+#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */
+#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */
+#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */
+#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index ca20cc4e5826..ac5886ce252e 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -152,10 +152,119 @@ struct snp_secrets_page {
u8 vmpck2[VMPCK_KEY_LEN];
u8 vmpck3[VMPCK_KEY_LEN];
struct secrets_os_area os_area;
- u8 rsvd3[3840];
+
+ u8 vmsa_tweak_bitmap[64];
+
+ /* SVSM fields */
+ u64 svsm_base;
+ u64 svsm_size;
+ u64 svsm_caa;
+ u32 svsm_max_version;
+ u8 svsm_guest_vmpl;
+ u8 rsvd3[3];
+
+ /* Remainder of page */
+ u8 rsvd4[3744];
} __packed;
+/*
+ * The SVSM Calling Area (CA) related structures.
+ */
+struct svsm_ca {
+ u8 call_pending;
+ u8 mem_available;
+ u8 rsvd1[6];
+
+ u8 svsm_buffer[PAGE_SIZE - 8];
+};
+
+#define SVSM_SUCCESS 0
+#define SVSM_ERR_INCOMPLETE 0x80000000
+#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001
+#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002
+#define SVSM_ERR_INVALID_ADDRESS 0x80000003
+#define SVSM_ERR_INVALID_FORMAT 0x80000004
+#define SVSM_ERR_INVALID_PARAMETER 0x80000005
+#define SVSM_ERR_INVALID_REQUEST 0x80000006
+#define SVSM_ERR_BUSY 0x80000007
+#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006
+
+/*
+ * The SVSM PVALIDATE related structures
+ */
+struct svsm_pvalidate_entry {
+ u64 page_size : 2,
+ action : 1,
+ ignore_cf : 1,
+ rsvd : 8,
+ pfn : 52;
+};
+
+struct svsm_pvalidate_call {
+ u16 num_entries;
+ u16 cur_index;
+
+ u8 rsvd1[4];
+
+ struct svsm_pvalidate_entry entry[];
+};
+
+#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \
+ offsetof(struct svsm_pvalidate_call, entry)) / \
+ sizeof(struct svsm_pvalidate_entry))
+
+/*
+ * The SVSM Attestation related structures
+ */
+struct svsm_loc_entry {
+ u64 pa;
+ u32 len;
+ u8 rsvd[4];
+};
+
+struct svsm_attest_call {
+ struct svsm_loc_entry report_buf;
+ struct svsm_loc_entry nonce;
+ struct svsm_loc_entry manifest_buf;
+ struct svsm_loc_entry certificates_buf;
+
+ /* For attesting a single service */
+ u8 service_guid[16];
+ u32 service_manifest_ver;
+ u8 rsvd[4];
+};
+
+/*
+ * SVSM protocol structure
+ */
+struct svsm_call {
+ struct svsm_ca *caa;
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 rax_out;
+ u64 rcx_out;
+ u64 rdx_out;
+ u64 r8_out;
+ u64 r9_out;
+};
+
+#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x))
+#define SVSM_CORE_REMAP_CA 0
+#define SVSM_CORE_PVALIDATE 1
+#define SVSM_CORE_CREATE_VCPU 2
+#define SVSM_CORE_DELETE_VCPU 3
+
+#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x))
+#define SVSM_ATTEST_SERVICES 0
+#define SVSM_ATTEST_SINGLE_SERVICE 1
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern u8 snp_vmpl;
+
extern void __sev_es_ist_enter(struct pt_regs *regs);
extern void __sev_es_ist_exit(void);
static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
@@ -181,6 +290,14 @@ static __always_inline void sev_es_nmi_complete(void)
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
extern void sev_enable(struct boot_params *bp);
+/*
+ * RMPADJUST modifies the RMP permissions of a page of a lesser-
+ * privileged (numerically higher) VMPL.
+ *
+ * If the guest is running at a higher-privilege than the privilege
+ * level the instruction is targeting, the instruction will succeed,
+ * otherwise, it will fail.
+ */
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
@@ -225,11 +342,16 @@ bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
-#else
+void snp_update_svsm_ca(void);
+
+#else /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define snp_vmpl 0
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
@@ -253,12 +375,17 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
{
return -ENOTTY;
}
-
+static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
+{
+ return -ENOTTY;
+}
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
-#endif
+static inline void snp_update_svsm_ca(void) { }
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
#ifdef CONFIG_KVM_AMD_SEV
bool snp_probe_rmptable_info(void);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a35936b512fe..ca073f40698f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -35,6 +35,7 @@ struct smp_ops {
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
+ void (*stop_this_cpu)(void);
void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 405efb3e4996..94408a784c8e 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -28,9 +28,6 @@ static inline cycles_t get_cycles(void)
}
#define get_cycles get_cycles
-extern struct system_counterval_t convert_art_to_tsc(u64 art);
-extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
-
extern void tsc_early_init(void);
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 0ef36190abe6..b2d2df026f6e 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -328,9 +328,8 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles,
* due to unsigned comparison.
*
* Due to the MSB/Sign-bit being used as invalid marker (see
- * arch_vdso_cycles_valid() above), the effective mask is S64_MAX,
- * but that case is also unlikely and will also take the unlikely path
- * here.
+ * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that
+ * case is also unlikely and will also take the unlikely path here.
*/
if (unlikely(delta > vd->max_cycles)) {
/*
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index be199a9b2676..93226281b450 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -4,7 +4,6 @@
#ifndef __ASSEMBLY__
-#include <linux/hrtimer.h>
#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#include <asm/vgtod.h>
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 7aa38b2ad8a9..a0ce291abcae 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -14,11 +14,6 @@
#include <uapi/linux/time.h>
-#ifdef BUILD_VDSO32_64
-typedef u64 gtod_long_t;
-#else
-typedef unsigned long gtod_long_t;
-#endif
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index ac9fc51e2b18..c9cf43d5ef23 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -7,51 +7,321 @@
#include <linux/stringify.h>
/*
- * The hypercall definitions differ in the low word of the %edx argument
- * in the following way: the old port base interface uses the port
- * number to distinguish between high- and low bandwidth versions.
+ * VMware hypercall ABI.
+ *
+ * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall)
+ * have up to 6 input and 6 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg2 - Hypercall command
+ * arg3 bits [15:0] - Port number, LB and direction flags
+ *
+ * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB
+ * hypercalls. They also have up to 6 input and 6 output on registers
+ * arguments, with different argument to register mapping:
+ * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3),
+ * %rsi (arg4), %rdi (arg5).
+ *
+ * - High bandwidth (HB) hypercalls are I/O port based only. They have
+ * up to 7 input and 7 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5), %ebp (arg6).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg1 - Hypercall command
+ * arg3 bits [15:0] - Port number, HB and direction flags
+ *
+ * For compatibility purposes, x86_64 systems use only lower 32 bits
+ * for input and output arguments.
+ *
+ * The hypercall definitions differ in the low word of the %edx (arg3)
+ * in the following way: the old I/O port based interface uses the port
+ * number to distinguish between high- and low bandwidth versions, and
+ * uses IN/OUT instructions to define transfer direction.
*
* The new vmcall interface instead uses a set of flags to select
* bandwidth mode and transfer direction. The flags should be loaded
- * into %dx by any user and are automatically replaced by the port
- * number if the VMWARE_HYPERVISOR_PORT method is used.
- *
- * In short, new driver code should strictly use the new definition of
- * %dx content.
+ * into arg3 by any user and are automatically replaced by the port
+ * number if the I/O port method is used.
*/
-/* Old port-based version */
-#define VMWARE_HYPERVISOR_PORT 0x5658
-#define VMWARE_HYPERVISOR_PORT_HB 0x5659
+#define VMWARE_HYPERVISOR_HB BIT(0)
+#define VMWARE_HYPERVISOR_OUT BIT(1)
-/* Current vmcall / vmmcall version */
-#define VMWARE_HYPERVISOR_HB BIT(0)
-#define VMWARE_HYPERVISOR_OUT BIT(1)
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \
+ VMWARE_HYPERVISOR_HB)
-/* The low bandwidth call. The low word of edx is presumed clear. */
-#define VMWARE_HYPERCALL \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
- "inl (%%dx), %%eax", \
- "vmcall", X86_FEATURE_VMCALL, \
- "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U
+#define VMWARE_CMD_GETVERSION 10
+#define VMWARE_CMD_GETHZ 45
+#define VMWARE_CMD_GETVCPU_INFO 68
+#define VMWARE_CMD_STEALCLOCK 91
/*
- * The high bandwidth out call. The low word of edx is presumed to have the
- * HB and OUT bits set.
+ * Hypercall command mask:
+ * bits [6:0] command, range [0, 127]
+ * bits [19:16] sub-command, range [0, 15]
*/
-#define VMWARE_HYPERCALL_HB_OUT \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
- "rep outsb", \
- "vmcall", X86_FEATURE_VMCALL, \
- "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#define VMWARE_CMD_MASK 0xf007fU
+
+#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
+#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
+
+extern unsigned long vmware_hypercall_slow(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
+
+#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL
+#define VMWARE_TDX_HCALL_FUNC 1
+
+extern unsigned long vmware_tdx_hypercall(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
/*
- * The high bandwidth in call. The low word of edx is presumed to have the
- * HB bit set.
+ * The low bandwidth call. The low word of %edx is presumed to have OUT bit
+ * set. The high word of %edx may contain input data from the caller.
*/
-#define VMWARE_HYPERCALL_HB_IN \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
- "rep insb", \
- "vmcall", X86_FEATURE_VMCALL, \
+#define VMWARE_HYPERCALL \
+ ALTERNATIVE_2("movw %[port], %%dx\n\t" \
+ "inl (%%dx), %%eax", \
+ "vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+static inline
+unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1,
+ unsigned long in3, u32 *out2,
+ u32 *out3, u32 *out4, u32 *out5)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4),
+ "=D" (*out5)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out1,
+ u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+#ifdef CONFIG_X86_64
+#define VMW_BP_CONSTRAINT "r"
+#else
+#define VMW_BP_CONSTRAINT "m"
+#endif
+
+/*
+ * High bandwidth calls are not supported on encrypted memory guests.
+ * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use
+ * low bandwidth hypercall if memory encryption is set.
+ * This assumption simplifies HB hypercall implementation to just I/O port
+ * based approach without alternative patching.
+ */
+static inline
+unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep outsb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep insb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+#undef VMW_BP_CONSTRAINT
+#undef VMWARE_HYPERCALL
+
#endif
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 695f36664889..09b1d7e607c1 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -9,85 +9,85 @@
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature bit
+ * is not displayed in /proc/cpuinfo at all.
*/
/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
-#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */
-#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */
+#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* VM-Exit on NMIs */
#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */
-#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */
-#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */
+#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* "preemption_timer" VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* "posted_intr" Posted Interrupts */
/* EPT/VPID features, scattered to bits 16-23 */
-#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */
+#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* "invvpid" INVVPID is supported */
#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
-#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */
-#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */
-#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* 5-level EPT paging */
+#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* "ept_5level" 5-level EPT paging */
/* Aggregated APIC features 24-27 */
-#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */
-#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
/* VM-Functions, shifted to bits 28-31 */
-#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */
+#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */
/* Primary Processor-Based VM-Execution Controls, word 1 */
-#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* VM-Exit if INTRs are unblocked in guest */
#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
-#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */
-#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */
-#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */
-#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */
-#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
-#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
-#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
-#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */
-#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
-#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
+#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* VM-Exit on reads from CR3 */
+#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */
+#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* VM-Exit on reads from CR8 */
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
-#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
-#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */
-#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/
-#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */
+#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* VM-Exit based on I/O port */
#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
-#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */
-#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */
-#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */
-#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */
+#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* Enable Secondary VM-Execution Controls */
/* Secondary Processor-Based VM-Execution Controls, word 2 */
#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */
-#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */
-#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */
-#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */
-#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */
-#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */
-#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */
-#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_EPT ( 2*32+ 1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID ( 2*32+ 5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */
#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */
#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
-#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/
-#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */
-#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */
-#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */
-#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
-#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
+#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
-#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */
-#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
-#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
-#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
-#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
-#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
-#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
-#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */
-#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */
+#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */
+#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */
+#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* VM-Exit when bus lock caused */
+#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */
/* Tertiary Processor-Based VM-Execution Controls, word 3 */
-#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */
+#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* "ipi_virt" Enable IPI virtualization */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index e8d7d4941c4c..422a47746657 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -5,45 +5,12 @@
#include <linux/bitops.h>
#include <linux/wordpart.h>
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
struct word_at_a_time {
const unsigned long one_bits, high_bits;
};
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
-#ifdef CONFIG_64BIT
-
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
-{
- return mask*0x0001020304050608ul >> 56;
-}
-
-#else /* 32-bit case */
-
-/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-static inline long count_masked_bytes(long mask)
-{
- /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
- long a = (0x0ff0001+mask) >> 23;
- /* Fix the 1 for 00 case */
- return a & mask;
-}
-
-#endif
-
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
@@ -57,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
return bits;
}
+#ifdef CONFIG_64BIT
+
+/* Keep the initial has_zero() value for both bitmask and size calc */
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+#else
+
+/* Create the final mask for both bytemask and size */
static inline unsigned long create_zero_mask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
@@ -66,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline unsigned long find_zero(unsigned long mask)
{
- return count_masked_bytes(mask);
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
}
+#endif
+
/*
* Load an unaligned word from kernel space.
*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6149eabe200f..213cf5379a5a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -149,12 +149,22 @@ struct x86_init_acpi {
* @enc_status_change_finish Notify HV after the encryption status of a range is changed
* @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
* @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
+ * @enc_kexec_begin Begin the two-step process of converting shared memory back
+ * to private. It stops the new conversions from being started
+ * and waits in-flight conversions to finish, if possible.
+ * @enc_kexec_finish Finish the two-step process of converting shared memory to
+ * private. All memory is private after the call when
+ * the function returns.
+ * It is called on only one CPU while the others are shut down
+ * and with interrupts disabled.
*/
struct x86_guest {
- bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
- bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
+ void (*enc_kexec_begin)(void);
+ void (*enc_kexec_finish)(void);
};
/**
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 80e1df482337..1814b413fd57 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -115,6 +115,7 @@
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 20a0dd51700a..a847180836e4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,7 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
CFLAGS_REMOVE_head32.o = -pg
-CFLAGS_REMOVE_sev.o = -pg
CFLAGS_REMOVE_rethook.o = -pg
endif
@@ -26,19 +25,16 @@ KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
-KASAN_SANITIZE_sev.o := n
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
KCSAN_SANITIZE := n
KMSAN_SANITIZE_head$(BITS).o := n
KMSAN_SANITIZE_nmi.o := n
-KMSAN_SANITIZE_sev.o := n
# If instrumentation of the following files is enabled, boot hangs during
# first second.
KCOV_INSTRUMENT_head$(BITS).o := n
-KCOV_INSTRUMENT_sev.o := n
CFLAGS_irq.o := -I $(src)/../include/asm/trace
@@ -142,8 +138,6 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
-obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
-
obj-$(CONFIG_CFI_CLANG) += cfi.o
obj-$(CONFIG_CALL_THUNKS) += callthunks.o
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index fc17b3f136fe..842a5f449404 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o
+obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o madt_playdead.o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += cstate.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4bf82dbd2a6b..9f4618dcd704 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -67,13 +67,6 @@ static bool has_lapic_cpus __initdata;
static bool acpi_support_online_capable;
#endif
-#ifdef CONFIG_X86_64
-/* Physical address of the Multiprocessor Wakeup Structure mailbox */
-static u64 acpi_mp_wake_mailbox_paddr;
-/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
-static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
-#endif
-
#ifdef CONFIG_X86_IO_APIC
/*
* Locks related to IOAPIC hotplug
@@ -341,60 +334,6 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e
return 0;
}
-
-#ifdef CONFIG_X86_64
-static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
-{
- /*
- * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
- *
- * Wakeup of secondary CPUs is fully serialized in the core code.
- * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
- */
- if (!acpi_mp_wake_mailbox) {
- acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
- sizeof(*acpi_mp_wake_mailbox),
- MEMREMAP_WB);
- }
-
- /*
- * Mailbox memory is shared between the firmware and OS. Firmware will
- * listen on mailbox command address, and once it receives the wakeup
- * command, the CPU associated with the given apicid will be booted.
- *
- * The value of 'apic_id' and 'wakeup_vector' must be visible to the
- * firmware before the wakeup command is visible. smp_store_release()
- * ensures ordering and visibility.
- */
- acpi_mp_wake_mailbox->apic_id = apicid;
- acpi_mp_wake_mailbox->wakeup_vector = start_ip;
- smp_store_release(&acpi_mp_wake_mailbox->command,
- ACPI_MP_WAKE_COMMAND_WAKEUP);
-
- /*
- * Wait for the CPU to wake up.
- *
- * The CPU being woken up is essentially in a spin loop waiting to be
- * woken up. It should not take long for it wake up and acknowledge by
- * zeroing out ->command.
- *
- * ACPI specification doesn't provide any guidance on how long kernel
- * has to wait for a wake up acknowledgement. It also doesn't provide
- * a way to cancel a wake up request if it takes too long.
- *
- * In TDX environment, the VMM has control over how long it takes to
- * wake up secondary. It can postpone scheduling secondary vCPU
- * indefinitely. Giving up on wake up request and reporting error opens
- * possible attack vector for VMM: it can wake up a secondary CPU when
- * kernel doesn't expect it. Wait until positive result of the wake up
- * request.
- */
- while (READ_ONCE(acpi_mp_wake_mailbox->command))
- cpu_relax();
-
- return 0;
-}
-#endif /* CONFIG_X86_64 */
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
@@ -1124,29 +1063,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
}
return 0;
}
-
-#ifdef CONFIG_X86_64
-static int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
- const unsigned long end)
-{
- struct acpi_madt_multiproc_wakeup *mp_wake;
-
- if (!IS_ENABLED(CONFIG_SMP))
- return -ENODEV;
-
- mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
- if (BAD_MADT_ENTRY(mp_wake, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(&header->common);
-
- acpi_mp_wake_mailbox_paddr = mp_wake->base_address;
-
- apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
-
- return 0;
-}
-#endif /* CONFIG_X86_64 */
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
@@ -1343,7 +1259,7 @@ static void __init acpi_process_madt(void)
smp_found_config = 1;
}
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_ACPI_MADT_WAKEUP
/*
* Parse MADT MP Wake entry.
*/
diff --git a/arch/x86/kernel/acpi/madt_playdead.S b/arch/x86/kernel/acpi/madt_playdead.S
new file mode 100644
index 000000000000..4e498d28cdc8
--- /dev/null
+++ b/arch/x86/kernel/acpi/madt_playdead.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+
+ .text
+ .align PAGE_SIZE
+
+/*
+ * asm_acpi_mp_play_dead() - Hand over control of the CPU to the BIOS
+ *
+ * rdi: Address of the ACPI MADT MPWK ResetVector
+ * rsi: PGD of the identity mapping
+ */
+SYM_FUNC_START(asm_acpi_mp_play_dead)
+ /* Turn off global entries. Following CR3 write will flush them. */
+ movq %cr4, %rdx
+ andq $~(X86_CR4_PGE), %rdx
+ movq %rdx, %cr4
+
+ /* Switch to identity mapping */
+ movq %rsi, %cr3
+
+ /* Jump to reset vector */
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *%rdi
+SYM_FUNC_END(asm_acpi_mp_play_dead)
diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c
new file mode 100644
index 000000000000..6cfe762be28b
--- /dev/null
+++ b/arch/x86/kernel/acpi/madt_wakeup.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/acpi.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/pgtable.h>
+#include <linux/sched/hotplug.h>
+#include <asm/apic.h>
+#include <asm/barrier.h>
+#include <asm/init.h>
+#include <asm/intel_pt.h>
+#include <asm/nmi.h>
+#include <asm/processor.h>
+#include <asm/reboot.h>
+
+/* Physical address of the Multiprocessor Wakeup Structure mailbox */
+static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
+
+/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
+static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init;
+
+static u64 acpi_mp_pgd __ro_after_init;
+static u64 acpi_mp_reset_vector_paddr __ro_after_init;
+
+static void acpi_mp_stop_this_cpu(void)
+{
+ asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
+}
+
+static void acpi_mp_play_dead(void)
+{
+ play_dead_common();
+ asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
+}
+
+static void acpi_mp_cpu_die(unsigned int cpu)
+{
+ u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ unsigned long timeout;
+
+ /*
+ * Use TEST mailbox command to prove that BIOS got control over
+ * the CPU before declaring it dead.
+ *
+ * BIOS has to clear 'command' field of the mailbox.
+ */
+ acpi_mp_wake_mailbox->apic_id = apicid;
+ smp_store_release(&acpi_mp_wake_mailbox->command,
+ ACPI_MP_WAKE_COMMAND_TEST);
+
+ /* Don't wait longer than a second. */
+ timeout = USEC_PER_SEC;
+ while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
+ udelay(1);
+
+ if (!timeout)
+ pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
+}
+
+/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
+static void __init *alloc_pgt_page(void *dummy)
+{
+ return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static void __init free_pgt_page(void *pgt, void *dummy)
+{
+ return memblock_free(pgt, PAGE_SIZE);
+}
+
+/*
+ * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
+ * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
+ * to the identity mapping and the function has be present at the same spot in
+ * the virtual address space before and after switching page tables.
+ */
+static int __init init_transition_pgtable(pgd_t *pgd)
+{
+ pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
+ unsigned long vaddr, paddr;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ vaddr = (unsigned long)asm_acpi_mp_play_dead;
+ pgd += pgd_index(vaddr);
+ if (!pgd_present(*pgd)) {
+ p4d = (p4d_t *)alloc_pgt_page(NULL);
+ if (!p4d)
+ return -ENOMEM;
+ set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ }
+ p4d = p4d_offset(pgd, vaddr);
+ if (!p4d_present(*p4d)) {
+ pud = (pud_t *)alloc_pgt_page(NULL);
+ if (!pud)
+ return -ENOMEM;
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(p4d, vaddr);
+ if (!pud_present(*pud)) {
+ pmd = (pmd_t *)alloc_pgt_page(NULL);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, vaddr);
+ if (!pmd_present(*pmd)) {
+ pte = (pte_t *)alloc_pgt_page(NULL);
+ if (!pte)
+ return -ENOMEM;
+ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
+ }
+ pte = pte_offset_kernel(pmd, vaddr);
+
+ paddr = __pa(vaddr);
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+
+ return 0;
+}
+
+static int __init acpi_mp_setup_reset(u64 reset_vector)
+{
+ struct x86_mapping_info info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .free_pgt_page = free_pgt_page,
+ .page_flag = __PAGE_KERNEL_LARGE_EXEC,
+ .kernpg_flag = _KERNPG_TABLE_NOENC,
+ };
+ pgd_t *pgd;
+
+ pgd = alloc_pgt_page(NULL);
+ if (!pgd)
+ return -ENOMEM;
+
+ for (int i = 0; i < nr_pfn_mapped; i++) {
+ unsigned long mstart, mend;
+
+ mstart = pfn_mapped[i].start << PAGE_SHIFT;
+ mend = pfn_mapped[i].end << PAGE_SHIFT;
+ if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
+ kernel_ident_mapping_free(&info, pgd);
+ return -ENOMEM;
+ }
+ }
+
+ if (kernel_ident_mapping_init(&info, pgd,
+ PAGE_ALIGN_DOWN(reset_vector),
+ PAGE_ALIGN(reset_vector + 1))) {
+ kernel_ident_mapping_free(&info, pgd);
+ return -ENOMEM;
+ }
+
+ if (init_transition_pgtable(pgd)) {
+ kernel_ident_mapping_free(&info, pgd);
+ return -ENOMEM;
+ }
+
+ smp_ops.play_dead = acpi_mp_play_dead;
+ smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
+ smp_ops.cpu_die = acpi_mp_cpu_die;
+
+ acpi_mp_reset_vector_paddr = reset_vector;
+ acpi_mp_pgd = __pa(pgd);
+
+ return 0;
+}
+
+static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
+{
+ if (!acpi_mp_wake_mailbox_paddr) {
+ pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
+ *
+ * Wakeup of secondary CPUs is fully serialized in the core code.
+ * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
+ */
+ if (!acpi_mp_wake_mailbox) {
+ acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
+ sizeof(*acpi_mp_wake_mailbox),
+ MEMREMAP_WB);
+ }
+
+ /*
+ * Mailbox memory is shared between the firmware and OS. Firmware will
+ * listen on mailbox command address, and once it receives the wakeup
+ * command, the CPU associated with the given apicid will be booted.
+ *
+ * The value of 'apic_id' and 'wakeup_vector' must be visible to the
+ * firmware before the wakeup command is visible. smp_store_release()
+ * ensures ordering and visibility.
+ */
+ acpi_mp_wake_mailbox->apic_id = apicid;
+ acpi_mp_wake_mailbox->wakeup_vector = start_ip;
+ smp_store_release(&acpi_mp_wake_mailbox->command,
+ ACPI_MP_WAKE_COMMAND_WAKEUP);
+
+ /*
+ * Wait for the CPU to wake up.
+ *
+ * The CPU being woken up is essentially in a spin loop waiting to be
+ * woken up. It should not take long for it wake up and acknowledge by
+ * zeroing out ->command.
+ *
+ * ACPI specification doesn't provide any guidance on how long kernel
+ * has to wait for a wake up acknowledgment. It also doesn't provide
+ * a way to cancel a wake up request if it takes too long.
+ *
+ * In TDX environment, the VMM has control over how long it takes to
+ * wake up secondary. It can postpone scheduling secondary vCPU
+ * indefinitely. Giving up on wake up request and reporting error opens
+ * possible attack vector for VMM: it can wake up a secondary CPU when
+ * kernel doesn't expect it. Wait until positive result of the wake up
+ * request.
+ */
+ while (READ_ONCE(acpi_mp_wake_mailbox->command))
+ cpu_relax();
+
+ return 0;
+}
+
+static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake)
+{
+ cpu_hotplug_disable_offlining();
+
+ /*
+ * ACPI MADT doesn't allow to offline a CPU after it was onlined. This
+ * limits kexec: the second kernel won't be able to use more than one CPU.
+ *
+ * To prevent a kexec kernel from onlining secondary CPUs invalidate the
+ * mailbox address in the ACPI MADT wakeup structure which prevents a
+ * kexec kernel to use it.
+ *
+ * This is safe as the booting kernel has the mailbox address cached
+ * already and acpi_wakeup_cpu() uses the cached value to bring up the
+ * secondary CPUs.
+ *
+ * Note: This is a Linux specific convention and not covered by the
+ * ACPI specification.
+ */
+ mp_wake->mailbox_address = 0;
+}
+
+int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_multiproc_wakeup *mp_wake;
+
+ mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
+
+ /*
+ * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
+ * entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
+ * than the actual size of the MP wakeup entry in ACPI table because the
+ * 'reset_vector' is only available in the V1 MP wakeup structure.
+ */
+ if (!mp_wake)
+ return -EINVAL;
+ if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
+ return -EINVAL;
+ if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(&header->common);
+
+ acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
+
+ if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
+ mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
+ if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
+ pr_warn("Failed to setup MADT reset vector\n");
+ acpi_mp_disable_offlining(mp_wake);
+ }
+ } else {
+ /*
+ * CPU offlining requires version 1 of the ACPI MADT wakeup
+ * structure.
+ */
+ acpi_mp_disable_offlining(mp_wake);
+ }
+
+ apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
+
+ return 0;
+}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 89de61243272..333b16181357 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -432,6 +432,11 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
return 5;
}
+static inline u8 * instr_va(struct alt_instr *i)
+{
+ return (u8 *)&i->instr_offset + i->instr_offset;
+}
+
/*
* Replace instructions with better alternatives for this CPU type. This runs
* before SMP is initialized to avoid SMP problems with self modifying code.
@@ -447,7 +452,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
{
u8 insn_buff[MAX_PATCH_LEN];
u8 *instr, *replacement;
- struct alt_instr *a;
+ struct alt_instr *a, *b;
DPRINTK(ALT, "alt table %px, -> %px", start, end);
@@ -473,7 +478,18 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
- instr = (u8 *)&a->instr_offset + a->instr_offset;
+ /*
+ * In case of nested ALTERNATIVE()s the outer alternative might
+ * add more padding. To ensure consistent patching find the max
+ * padding for all alt_instr entries for this site (nested
+ * alternatives result in consecutive entries).
+ */
+ for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
+ u8 len = max(a->instrlen, b->instrlen);
+ a->instrlen = b->instrlen = len;
+ }
+
+ instr = instr_va(a);
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
@@ -1641,7 +1657,7 @@ static noinline void __init alt_reloc_selftest(void)
*/
asm_inline volatile (
ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS)
- : /* output */
+ : ASM_CALL_CONSTRAINT
: [mem] "m" (__alt_reloc_selftest_addr)
: _ASM_ARG1
);
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 027a8c7a2c9e..059e5c16af05 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -180,6 +180,43 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev,
return dev;
}
+/*
+ * SMN accesses may fail in ways that are difficult to detect here in the called
+ * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do
+ * their own checking based on what behavior they expect.
+ *
+ * For SMN reads, the returned value may be zero if the register is Read-as-Zero.
+ * Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI Error Response"
+ * can be checked here, and a proper error code can be returned.
+ *
+ * But the Read-as-Zero response cannot be verified here. A value of 0 may be
+ * correct in some cases, so callers must check that this correct is for the
+ * register/fields they need.
+ *
+ * For SMN writes, success can be determined through a "write and read back"
+ * However, this is not robust when done here.
+ *
+ * Possible issues:
+ *
+ * 1) Bits that are "Write-1-to-Clear". In this case, the read value should
+ * *not* match the write value.
+ *
+ * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be
+ * known here.
+ *
+ * 3) Bits that are "Reserved / Set to 1". Ditto above.
+ *
+ * Callers of amd_smn_write() should do the "write and read back" check
+ * themselves, if needed.
+ *
+ * For #1, they can see if their target bits got cleared.
+ *
+ * For #2 and #3, they can check if their target bits got set as intended.
+ *
+ * This matches what is done for RDMSR/WRMSR. As long as there's no #GP, then
+ * the operation is considered a success, and the caller does their own
+ * checking.
+ */
static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
{
struct pci_dev *root;
@@ -202,9 +239,6 @@ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
err = (write ? pci_write_config_dword(root, 0x64, *value)
: pci_read_config_dword(root, 0x64, value));
- if (err)
- pr_warn("Error %s SMN address 0x%x.\n",
- (write ? "writing to" : "reading from"), address);
out_unlock:
mutex_unlock(&smn_mutex);
@@ -213,7 +247,7 @@ out:
return err;
}
-int amd_smn_read(u16 node, u32 address, u32 *value)
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value)
{
int err = __amd_smn_rw(node, address, value, false);
@@ -226,7 +260,7 @@ int amd_smn_read(u16 node, u32 address, u32 *value)
}
EXPORT_SYMBOL_GPL(amd_smn_read);
-int amd_smn_write(u16 node, u32 address, u32 value)
+int __must_check amd_smn_write(u16 node, u32 address, u32 value)
{
return __amd_smn_rw(node, address, &value, true);
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a02bba0ed6b9..5857a0f5d514 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -34,7 +34,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o
ifdef CONFIG_CPU_SUP_INTEL
-obj-y += intel.o intel_pconfig.o tsx.o
+obj-y += intel.o tsx.o
obj-$(CONFIG_PM) += intel_epb.o
endif
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 44df3f11e731..be5889bded49 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1220,14 +1220,3 @@ void amd_check_microcode(void)
on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
-
-/*
- * Issue a DIV 0/1 insn to clear any division data from previous DIV
- * operations.
- */
-void noinstr amd_clear_divider(void)
-{
- asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
- :: "a" (0), "d" (0), "r" (1));
-}
-EXPORT_SYMBOL_GPL(amd_clear_divider);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b6f927f6c567..45675da354f3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1625,6 +1625,7 @@ static bool __init spec_ctrl_bhi_dis(void)
enum bhi_mitigations {
BHI_MITIGATION_OFF,
BHI_MITIGATION_ON,
+ BHI_MITIGATION_VMEXIT_ONLY,
};
static enum bhi_mitigations bhi_mitigation __ro_after_init =
@@ -1639,6 +1640,8 @@ static int __init spectre_bhi_parse_cmdline(char *str)
bhi_mitigation = BHI_MITIGATION_OFF;
else if (!strcmp(str, "on"))
bhi_mitigation = BHI_MITIGATION_ON;
+ else if (!strcmp(str, "vmexit"))
+ bhi_mitigation = BHI_MITIGATION_VMEXIT_ONLY;
else
pr_err("Ignoring unknown spectre_bhi option (%s)", str);
@@ -1659,19 +1662,22 @@ static void __init bhi_select_mitigation(void)
return;
}
+ /* Mitigate in hardware if supported */
if (spec_ctrl_bhi_dis())
return;
if (!IS_ENABLED(CONFIG_X86_64))
return;
- /* Mitigate KVM by default */
- setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
- pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+ if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) {
+ pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n");
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ return;
+ }
- /* Mitigate syscalls when the mitigation is forced =on */
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n");
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
- pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
}
static void __init spectre_v2_select_mitigation(void)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fdf3489d92a4..08b95a35b5cb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -72,19 +72,19 @@ static bool cpu_model_supports_sld __ro_after_init;
*/
static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
{
- switch (c->x86_model) {
- case INTEL_FAM6_CORE_YONAH:
- case INTEL_FAM6_CORE2_MEROM:
- case INTEL_FAM6_CORE2_MEROM_L:
- case INTEL_FAM6_CORE2_PENRYN:
- case INTEL_FAM6_CORE2_DUNNINGTON:
- case INTEL_FAM6_NEHALEM:
- case INTEL_FAM6_NEHALEM_G:
- case INTEL_FAM6_NEHALEM_EP:
- case INTEL_FAM6_NEHALEM_EX:
- case INTEL_FAM6_WESTMERE:
- case INTEL_FAM6_WESTMERE_EP:
- case INTEL_FAM6_SANDYBRIDGE:
+ switch (c->x86_vfm) {
+ case INTEL_CORE_YONAH:
+ case INTEL_CORE2_MEROM:
+ case INTEL_CORE2_MEROM_L:
+ case INTEL_CORE2_PENRYN:
+ case INTEL_CORE2_DUNNINGTON:
+ case INTEL_NEHALEM:
+ case INTEL_NEHALEM_G:
+ case INTEL_NEHALEM_EP:
+ case INTEL_NEHALEM_EX:
+ case INTEL_WESTMERE:
+ case INTEL_WESTMERE_EP:
+ case INTEL_SANDYBRIDGE:
setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
}
}
@@ -106,9 +106,9 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
*/
if (c->x86 != 6)
return;
- switch (c->x86_model) {
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
+ switch (c->x86_vfm) {
+ case INTEL_XEON_PHI_KNL:
+ case INTEL_XEON_PHI_KNM:
break;
default:
return;
@@ -134,32 +134,32 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
* - Release note from 20180108 microcode release
*/
struct sku_microcode {
- u8 model;
+ u32 vfm;
u8 stepping;
u32 microcode;
};
static const struct sku_microcode spectre_bad_microcodes[] = {
- { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 },
- { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 },
- { INTEL_FAM6_KABYLAKE, 0x09, 0x80 },
- { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 },
- { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 },
- { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
- { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
- { INTEL_FAM6_BROADWELL, 0x04, 0x28 },
- { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b },
- { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 },
- { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 },
- { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
- { INTEL_FAM6_HASWELL_L, 0x01, 0x21 },
- { INTEL_FAM6_HASWELL_G, 0x01, 0x18 },
- { INTEL_FAM6_HASWELL, 0x03, 0x23 },
- { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
- { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
- { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
+ { INTEL_KABYLAKE, 0x0B, 0x80 },
+ { INTEL_KABYLAKE, 0x0A, 0x80 },
+ { INTEL_KABYLAKE, 0x09, 0x80 },
+ { INTEL_KABYLAKE_L, 0x0A, 0x80 },
+ { INTEL_KABYLAKE_L, 0x09, 0x80 },
+ { INTEL_SKYLAKE_X, 0x03, 0x0100013e },
+ { INTEL_SKYLAKE_X, 0x04, 0x0200003c },
+ { INTEL_BROADWELL, 0x04, 0x28 },
+ { INTEL_BROADWELL_G, 0x01, 0x1b },
+ { INTEL_BROADWELL_D, 0x02, 0x14 },
+ { INTEL_BROADWELL_D, 0x03, 0x07000011 },
+ { INTEL_BROADWELL_X, 0x01, 0x0b000025 },
+ { INTEL_HASWELL_L, 0x01, 0x21 },
+ { INTEL_HASWELL_G, 0x01, 0x18 },
+ { INTEL_HASWELL, 0x03, 0x23 },
+ { INTEL_HASWELL_X, 0x02, 0x3b },
+ { INTEL_HASWELL_X, 0x04, 0x10 },
+ { INTEL_IVYBRIDGE_X, 0x04, 0x42a },
/* Observed in the wild */
- { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
- { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
+ { INTEL_SANDYBRIDGE_X, 0x06, 0x61b },
+ { INTEL_SANDYBRIDGE_X, 0x07, 0x712 },
};
static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
@@ -173,11 +173,8 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return false;
- if (c->x86 != 6)
- return false;
-
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
- if (c->x86_model == spectre_bad_microcodes[i].model &&
+ if (c->x86_vfm == spectre_bad_microcodes[i].vfm &&
c->x86_stepping == spectre_bad_microcodes[i].stepping)
return (c->microcode <= spectre_bad_microcodes[i].microcode);
}
@@ -190,83 +187,35 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
static void detect_tme_early(struct cpuinfo_x86 *c)
{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
+ u64 tme_activate;
+ int keyid_bits;
rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
clear_cpu_cap(c, X86_FEATURE_TME);
return;
}
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
+ pr_info_once("x86/tme: enabled by BIOS\n");
keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
+ if (!keyid_bits)
+ return;
/*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ * KeyID bits are set by BIOS and can be present regardless
+ * of whether the kernel is using them. They effectively lower
+ * the number of physical address bits.
+ *
+ * Update cpuinfo_x86::x86_phys_bits accordingly.
*/
c->x86_phys_bits -= keyid_bits;
+ pr_info_once("x86/mktme: BIOS enabled: x86_phys_bits reduced by %d\n",
+ keyid_bits);
}
void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
@@ -320,7 +269,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* need the microcode to have already been loaded... so if it is
* not, recommend a BIOS update and disable large pages.
*/
- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
+ if (c->x86_vfm == INTEL_ATOM_BONNELL && c->x86_stepping <= 2 &&
c->microcode < 0x20e) {
pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -352,17 +301,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
}
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
- if (c->x86 == 6) {
- switch (c->x86_model) {
- case INTEL_FAM6_ATOM_SALTWELL_MID:
- case INTEL_FAM6_ATOM_SALTWELL_TABLET:
- case INTEL_FAM6_ATOM_SILVERMONT_MID:
- case INTEL_FAM6_ATOM_AIRMONT_NP:
- set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
- break;
- default:
- break;
- }
+ switch (c->x86_vfm) {
+ case INTEL_ATOM_SALTWELL_MID:
+ case INTEL_ATOM_SALTWELL_TABLET:
+ case INTEL_ATOM_SILVERMONT_MID:
+ case INTEL_ATOM_AIRMONT_NP:
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
+ break;
}
/*
@@ -401,7 +346,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE
* to be modified.
*/
- if (c->x86 == 5 && c->x86_model == 9) {
+ if (c->x86_vfm == INTEL_QUARK_X1000) {
pr_info("Disabling PGE capability bit\n");
setup_clear_cpu_cap(X86_FEATURE_PGE);
}
@@ -633,12 +578,13 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
- (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
+ if (boot_cpu_has(X86_FEATURE_CLFLUSH) &&
+ (c->x86_vfm == INTEL_CORE2_DUNNINGTON ||
+ c->x86_vfm == INTEL_NEHALEM_EX ||
+ c->x86_vfm == INTEL_WESTMERE_EX))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
- if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
- ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
+ if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
set_cpu_bug(c, X86_BUG_MONITOR);
#ifdef CONFIG_X86_64
@@ -1254,9 +1200,9 @@ void handle_bus_lock(struct pt_regs *regs)
* feature even though they do not enumerate IA32_CORE_CAPABILITIES.
*/
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
{}
};
diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c
deleted file mode 100644
index 5be2b1790282..000000000000
--- a/arch/x86/kernel/cpu/intel_pconfig.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Intel PCONFIG instruction support.
- *
- * Copyright (C) 2017 Intel Corporation
- *
- * Author:
- * Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
- */
-#include <linux/bug.h>
-#include <linux/limits.h>
-
-#include <asm/cpufeature.h>
-#include <asm/intel_pconfig.h>
-
-#define PCONFIG_CPUID 0x1b
-
-#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1)
-
-/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */
-enum {
- PCONFIG_CPUID_SUBLEAF_INVALID = 0,
- PCONFIG_CPUID_SUBLEAF_TARGETID = 1,
-};
-
-/* Bitmask of supported targets */
-static u64 targets_supported __read_mostly;
-
-int pconfig_target_supported(enum pconfig_target target)
-{
- /*
- * We would need to re-think the implementation once we get > 64
- * PCONFIG targets. Spec allows up to 2^32 targets.
- */
- BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64);
-
- if (WARN_ON_ONCE(target >= 64))
- return 0;
- return targets_supported & (1ULL << target);
-}
-
-static int __init intel_pconfig_init(void)
-{
- int subleaf;
-
- if (!boot_cpu_has(X86_FEATURE_PCONFIG))
- return 0;
-
- /*
- * Scan subleafs of PCONFIG CPUID leaf.
- *
- * Subleafs of the same type need not to be consecutive.
- *
- * Stop on the first invalid subleaf type. All subleafs after the first
- * invalid are invalid too.
- */
- for (subleaf = 0; subleaf < INT_MAX; subleaf++) {
- struct cpuid_regs regs;
-
- cpuid_count(PCONFIG_CPUID, subleaf,
- &regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
-
- switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) {
- case PCONFIG_CPUID_SUBLEAF_INVALID:
- /* Stop on the first invalid subleaf */
- goto out;
- case PCONFIG_CPUID_SUBLEAF_TARGETID:
- /* Mark supported PCONFIG targets */
- if (regs.ebx < 64)
- targets_supported |= (1ULL << regs.ebx);
- if (regs.ecx < 64)
- targets_supported |= (1ULL << regs.ecx);
- if (regs.edx < 64)
- targets_supported |= (1ULL << regs.edx);
- break;
- default:
- /* Unknown CPUID.PCONFIG subleaf: ignore */
- break;
- }
- }
-out:
- return 0;
-}
-arch_initcall(intel_pconfig_init);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index ad0623b659ed..b85ec7a4ec9e 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -677,10 +677,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either.
*/
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- bool error_seen = false;
struct mce m;
int i;
@@ -754,8 +753,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
continue;
log_it:
- error_seen = true;
-
if (flags & MCP_DONTLOG)
goto clear_it;
@@ -787,8 +784,6 @@ clear_it:
*/
sync_core();
-
- return error_seen;
}
EXPORT_SYMBOL_GPL(machine_check_poll);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 94953d749475..49ed3428785d 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -487,12 +487,16 @@ static void prepare_msrs(void *info)
wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
}
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
+
+ if (m.misc)
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
} else {
wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
- wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
+
+ if (m.misc)
+ wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
}
}
@@ -795,4 +799,5 @@ static void __exit inject_exit(void)
module_init(inject_init);
module_exit(inject_exit);
+MODULE_DESCRIPTION("Machine check injection support");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 1db560ed2ca3..68f537347466 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -30,8 +30,7 @@ dump_array()
# If the /* comment */ starts with a quote string, grab that.
VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')"
- [ -z "$VALUE" ] && VALUE="\"$NAME\""
- [ "$VALUE" = '""' ] && continue
+ [ ! "$VALUE" ] && continue
# Name is uppercase, VALUE is all lowercase
VALUE="$(echo "$VALUE" | tr A-Z a-z)"
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index a113d9aba553..1930fce9dfe9 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -19,7 +19,6 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/cacheinfo.h>
#include <linux/cpuhotplug.h>
#include <asm/cpu_device_id.h>
@@ -60,7 +59,8 @@ static void mba_wrmsr_intel(struct msr_param *m);
static void cat_wrmsr(struct msr_param *m);
static void mba_wrmsr_amd(struct msr_param *m);
-#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
+#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
+#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
struct rdt_hw_resource rdt_resources_all[] = {
[RDT_RESOURCE_L3] =
@@ -68,8 +68,10 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
- .cache_level = 3,
- .domains = domain_init(RDT_RESOURCE_L3),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .mon_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3),
+ .mon_domains = mon_domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
@@ -82,8 +84,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
- .cache_level = 2,
- .domains = domain_init(RDT_RESOURCE_L2),
+ .ctrl_scope = RESCTRL_L2_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
@@ -96,8 +98,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_MBA,
.name = "MB",
- .cache_level = 3,
- .domains = domain_init(RDT_RESOURCE_MBA),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
@@ -108,8 +110,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_SMBA,
.name = "SMBA",
- .cache_level = 3,
- .domains = domain_init(RDT_RESOURCE_SMBA),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
@@ -306,8 +308,8 @@ static void rdt_get_cdp_l2_config(void)
static void mba_wrmsr_amd(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
for (i = m->low; i < m->high; i++)
@@ -330,8 +332,8 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
static void mba_wrmsr_intel(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
/* Write the delay values for mba. */
@@ -341,23 +343,38 @@ static void mba_wrmsr_intel(struct msr_param *m)
static void cat_wrmsr(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
lockdep_assert_cpus_held();
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
/* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->cpu_mask))
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
+struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
return d;
}
@@ -379,24 +396,21 @@ void rdt_ctrl_update(void *arg)
}
/*
- * rdt_find_domain - Find a domain in a resource that matches input resource id
+ * rdt_find_domain - Search for a domain id in a resource domain list.
*
- * Search resource r's domain list to find the resource id. If the resource
- * id is found in a domain, return the domain. Otherwise, if requested by
- * caller, return the first domain whose id is bigger than the input id.
- * The domain list is sorted by id in ascending order.
+ * Search the domain list to find the domain id. If the domain id is
+ * found, return the domain. NULL otherwise. If the domain id is not
+ * found (and NULL returned) then the first domain with id bigger than
+ * the input id can be returned to the caller via @pos.
*/
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos)
+struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
+ struct list_head **pos)
{
- struct rdt_domain *d;
+ struct rdt_domain_hdr *d;
struct list_head *l;
- if (id < 0)
- return ERR_PTR(-ENODEV);
-
- list_for_each(l, &r->domains) {
- d = list_entry(l, struct rdt_domain, list);
+ list_for_each(l, h) {
+ d = list_entry(l, struct rdt_domain_hdr, list);
/* When id is found, return its domain. */
if (id == d->id)
return d;
@@ -425,18 +439,23 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
*dc = r->default_ctrl;
}
-static void domain_free(struct rdt_hw_domain *hw_dom)
+static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
+{
+ kfree(hw_dom->ctrl_val);
+ kfree(hw_dom);
+}
+
+static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
{
kfree(hw_dom->arch_mbm_total);
kfree(hw_dom->arch_mbm_local);
- kfree(hw_dom->ctrl_val);
kfree(hw_dom);
}
-static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct msr_param m;
u32 *dc;
@@ -461,7 +480,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
* @num_rmid: The size of the MBM counter array
* @hw_dom: The domain that owns the allocated arrays
*/
-static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
+static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
size_t tsize;
@@ -484,37 +503,45 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
return 0;
}
-/*
- * domain_add_cpu - Add a cpu to a resource's domain list.
- *
- * If an existing domain in the resource r's domain list matches the cpu's
- * resource id, add the cpu in the domain.
- *
- * Otherwise, a new domain is allocated and inserted into the right position
- * in the domain list sorted by id in ascending order.
- *
- * The order in the domain list is visible to users when we print entries
- * in the schemata file and schemata input is validated to have the same order
- * as this list.
- */
-static void domain_add_cpu(int cpu, struct rdt_resource *r)
+static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ switch (scope) {
+ case RESCTRL_L2_CACHE:
+ case RESCTRL_L3_CACHE:
+ return get_cpu_cacheinfo_id(cpu, scope);
+ case RESCTRL_L3_NODE:
+ return cpu_to_node(cpu);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+ struct rdt_hw_ctrl_domain *hw_dom;
struct list_head *add_pos = NULL;
- struct rdt_hw_domain *hw_dom;
- struct rdt_domain *d;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_ctrl_domain *d;
int err;
lockdep_assert_held(&domain_list_lock);
- d = rdt_find_domain(r, id, &add_pos);
- if (IS_ERR(d)) {
- pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+ if (id < 0) {
+ pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->ctrl_scope, r->name);
return;
}
- if (d) {
- cpumask_set_cpu(cpu, &d->cpu_mask);
+ hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos);
+ if (hdr) {
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+ return;
+ d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
if (r->cache.arch_has_per_cpu_cfg)
rdt_domain_reconfigure_cdp(r);
return;
@@ -525,64 +552,187 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
d = &hw_dom->d_resctrl;
- d->id = id;
- cpumask_set_cpu(cpu, &d->cpu_mask);
+ d->hdr.id = id;
+ d->hdr.type = RESCTRL_CTRL_DOMAIN;
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
rdt_domain_reconfigure_cdp(r);
- if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
- domain_free(hw_dom);
+ if (domain_setup_ctrlval(r, d)) {
+ ctrl_domain_free(hw_dom);
return;
}
- if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
- domain_free(hw_dom);
+ list_add_tail_rcu(&d->hdr.list, add_pos);
+
+ err = resctrl_online_ctrl_domain(r, d);
+ if (err) {
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ ctrl_domain_free(hw_dom);
+ }
+}
+
+static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct list_head *add_pos = NULL;
+ struct rdt_hw_mon_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_mon_domain *d;
+ int err;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
return;
}
- list_add_tail_rcu(&d->list, add_pos);
+ hdr = rdt_find_domain(&r->mon_domains, id, &add_pos);
+ if (hdr) {
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ return;
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ return;
+ }
- err = resctrl_online_domain(r, d);
+ hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+ if (!hw_dom)
+ return;
+
+ d = &hw_dom->d_resctrl;
+ d->hdr.id = id;
+ d->hdr.type = RESCTRL_MON_DOMAIN;
+ d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!d->ci) {
+ pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
+ mon_domain_free(hw_dom);
+ return;
+ }
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+
+ arch_mon_domain_online(r, d);
+
+ if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
+ mon_domain_free(hw_dom);
+ return;
+ }
+
+ list_add_tail_rcu(&d->hdr.list, add_pos);
+
+ err = resctrl_online_mon_domain(r, d);
if (err) {
- list_del_rcu(&d->list);
+ list_del_rcu(&d->hdr.list);
synchronize_rcu();
- domain_free(hw_dom);
+ mon_domain_free(hw_dom);
}
}
-static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+static void domain_add_cpu(int cpu, struct rdt_resource *r)
+{
+ if (r->alloc_capable)
+ domain_add_cpu_ctrl(cpu, r);
+ if (r->mon_capable)
+ domain_add_cpu_mon(cpu, r);
+}
+
+static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
- struct rdt_hw_domain *hw_dom;
- struct rdt_domain *d;
+ int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+ struct rdt_hw_ctrl_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_ctrl_domain *d;
lockdep_assert_held(&domain_list_lock);
- d = rdt_find_domain(r, id, NULL);
- if (IS_ERR_OR_NULL(d)) {
- pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+ if (id < 0) {
+ pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->ctrl_scope, r->name);
+ return;
+ }
+
+ hdr = rdt_find_domain(&r->ctrl_domains, id, NULL);
+ if (!hdr) {
+ pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
+ id, cpu, r->name);
return;
}
- hw_dom = resctrl_to_arch_dom(d);
- cpumask_clear_cpu(cpu, &d->cpu_mask);
- if (cpumask_empty(&d->cpu_mask)) {
- resctrl_offline_domain(r, d);
- list_del_rcu(&d->list);
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+ return;
+
+ d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
+
+ cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+ if (cpumask_empty(&d->hdr.cpu_mask)) {
+ resctrl_offline_ctrl_domain(r, d);
+ list_del_rcu(&d->hdr.list);
synchronize_rcu();
/*
- * rdt_domain "d" is going to be freed below, so clear
+ * rdt_ctrl_domain "d" is going to be freed below, so clear
* its pointer from pseudo_lock_region struct.
*/
if (d->plr)
d->plr->d = NULL;
- domain_free(hw_dom);
+ ctrl_domain_free(hw_dom);
return;
}
}
+static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct rdt_hw_mon_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
+ return;
+ }
+
+ hdr = rdt_find_domain(&r->mon_domains, id, NULL);
+ if (!hdr) {
+ pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
+ id, cpu, r->name);
+ return;
+ }
+
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ return;
+
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ hw_dom = resctrl_to_arch_mon_dom(d);
+
+ cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+ if (cpumask_empty(&d->hdr.cpu_mask)) {
+ resctrl_offline_mon_domain(r, d);
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ mon_domain_free(hw_dom);
+
+ return;
+ }
+}
+
+static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+{
+ if (r->alloc_capable)
+ domain_remove_cpu_ctrl(cpu, r);
+ if (r->mon_capable)
+ domain_remove_cpu_mon(cpu, r);
+}
+
static void clear_closid_rmid(int cpu)
{
struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index b7291f60399c..50fa1fe9a073 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -60,7 +60,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
}
int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid;
@@ -69,7 +69,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}
@@ -139,7 +139,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
* resource type.
*/
int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
struct rdtgroup *rdtgrp = data->rdtgrp;
struct resctrl_staged_config *cfg;
@@ -148,7 +148,7 @@ int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}
@@ -208,8 +208,8 @@ static int parse_line(char *line, struct resctrl_schema *s,
struct resctrl_staged_config *cfg;
struct rdt_resource *r = s->res;
struct rdt_parse_data data;
+ struct rdt_ctrl_domain *d;
char *dom = NULL, *id;
- struct rdt_domain *d;
unsigned long dom_id;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -231,8 +231,8 @@ next:
return -EINVAL;
}
dom = strim(dom);
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
if (r->parse_ctrlval(&data, s, d))
@@ -272,15 +272,15 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
}
}
-int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
u32 idx = get_config_index(closid, t);
struct msr_param msr_param;
- if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
+ if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
return -EINVAL;
hw_dom->ctrl_val[idx] = cfg_val;
@@ -297,17 +297,17 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
+ struct rdt_ctrl_domain *d;
enum resctrl_conf_type t;
- struct rdt_domain *d;
u32 idx;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- list_for_each_entry(d, &r->domains, list) {
- hw_dom = resctrl_to_arch_dom(d);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
msr_param.res = NULL;
for (t = 0; t < CDP_NUM_TYPES; t++) {
cfg = &hw_dom->d_resctrl.staged_config[t];
@@ -330,7 +330,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
}
}
if (msr_param.res)
- smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
return 0;
@@ -430,10 +430,10 @@ out:
return ret ?: nbytes;
}
-u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
u32 idx = get_config_index(closid, type);
return hw_dom->ctrl_val[idx];
@@ -442,7 +442,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
{
struct rdt_resource *r = schema->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
bool sep = false;
u32 ctrl_val;
@@ -450,7 +450,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
lockdep_assert_cpus_held();
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_puts(s, ";");
@@ -460,7 +460,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
ctrl_val = resctrl_arch_get_config(r, dom, closid,
schema->conf_type);
- seq_printf(s, r->format_str, dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->hdr.id, max_data_width,
ctrl_val);
sep = true;
}
@@ -489,7 +489,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
} else {
seq_printf(s, "%s:%d=%x\n",
rdtgrp->plr->s->res->name,
- rdtgrp->plr->d->id,
+ rdtgrp->plr->d->hdr.id,
rdtgrp->plr->cbm);
}
} else {
@@ -514,8 +514,8 @@ static int smp_mon_event_count(void *arg)
}
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_domain *d, struct rdtgroup *rdtgrp,
- int evtid, int first)
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ cpumask_t *cpumask, int evtid, int first)
{
int cpu;
@@ -529,7 +529,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
rr->evtid = evtid;
rr->r = r;
rr->d = d;
- rr->val = 0;
rr->first = first;
rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
if (IS_ERR(rr->arch_mon_ctx)) {
@@ -537,7 +536,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
return;
}
- cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
+ cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
/*
* cpumask_any_housekeeping() prefers housekeeping CPUs, but
@@ -546,7 +545,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
* counters on some platforms if its called in IRQ context.
*/
if (tick_nohz_full_cpu(cpu))
- smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+ smp_call_function_any(cpumask, mon_event_count, rr, 1);
else
smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
@@ -556,12 +555,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
+ struct rdt_domain_hdr *hdr;
+ struct rmid_read rr = {0};
+ struct rdt_mon_domain *d;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
union mon_data_bits md;
- struct rdt_domain *d;
- struct rmid_read rr;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
@@ -574,15 +574,40 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
resid = md.u.rid;
domid = md.u.domid;
evtid = md.u.evtid;
-
r = &rdt_resources_all[resid].r_resctrl;
- d = rdt_find_domain(r, domid, NULL);
- if (IS_ERR_OR_NULL(d)) {
+
+ if (md.u.sum) {
+ /*
+ * This file requires summing across all domains that share
+ * the L3 cache id that was provided in the "domid" field of the
+ * mon_data_bits union. Search all domains in the resource for
+ * one that matches this cache id.
+ */
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->ci->id == domid) {
+ rr.ci = d->ci;
+ mon_event_read(&rr, r, NULL, rdtgrp,
+ &d->ci->shared_cpu_map, evtid, false);
+ goto checkresult;
+ }
+ }
ret = -ENOENT;
goto out;
+ } else {
+ /*
+ * This file provides data from a single domain. Search
+ * the resource to find the domain with "domid".
+ */
+ hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
+ if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
}
- mon_event_read(&rr, r, d, rdtgrp, evtid, false);
+checkresult:
if (rr.err == -EIO)
seq_puts(m, "Error\n");
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index f1d926832ec8..955999aecfca 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -127,29 +127,54 @@ struct mon_evt {
};
/**
- * union mon_data_bits - Monitoring details for each event file
+ * union mon_data_bits - Monitoring details for each event file.
* @priv: Used to store monitoring event data in @u
- * as kernfs private data
- * @rid: Resource id associated with the event file
- * @evtid: Event id associated with the event file
- * @domid: The domain to which the event file belongs
- * @u: Name of the bit fields struct
+ * as kernfs private data.
+ * @u.rid: Resource id associated with the event file.
+ * @u.evtid: Event id associated with the event file.
+ * @u.sum: Set when event must be summed across multiple
+ * domains.
+ * @u.domid: When @u.sum is zero this is the domain to which
+ * the event file belongs. When @sum is one this
+ * is the id of the L3 cache that all domains to be
+ * summed share.
+ * @u: Name of the bit fields struct.
*/
union mon_data_bits {
void *priv;
struct {
unsigned int rid : 10;
- enum resctrl_event_id evtid : 8;
+ enum resctrl_event_id evtid : 7;
+ unsigned int sum : 1;
unsigned int domid : 14;
} u;
};
+/**
+ * struct rmid_read - Data passed across smp_call*() to read event count.
+ * @rgrp: Resource group for which the counter is being read. If it is a parent
+ * resource group then its event count is summed with the count from all
+ * its child resource groups.
+ * @r: Resource describing the properties of the event being read.
+ * @d: Domain that the counter should be read from. If NULL then sum all
+ * domains in @r sharing L3 @ci.id
+ * @evtid: Which monitor event to read.
+ * @first: Initialize MBM counter when true.
+ * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @err: Error encountered when reading counter.
+ * @val: Returned value of event counter. If @rgrp is a parent resource group,
+ * @val includes the sum of event counts from its child resource groups.
+ * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
+ * (summed across child resource groups if @rgrp is a parent resource group).
+ * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
+ */
struct rmid_read {
struct rdtgroup *rgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
+ struct cacheinfo *ci;
int err;
u64 val;
void *arch_mon_ctx;
@@ -232,7 +257,7 @@ struct mongroup {
*/
struct pseudo_lock_region {
struct resctrl_schema *s;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
int thread_done;
@@ -355,25 +380,41 @@ struct arch_mbm_state {
};
/**
- * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
- * a resource
+ * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
+ * a resource for a control function
* @d_resctrl: Properties exposed to the resctrl file system
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
+ */
+struct rdt_hw_ctrl_domain {
+ struct rdt_ctrl_domain d_resctrl;
+ u32 *ctrl_val;
+};
+
+/**
+ * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
+ * a resource for a monitor function
+ * @d_resctrl: Properties exposed to the resctrl file system
* @arch_mbm_total: arch private state for MBM total bandwidth
* @arch_mbm_local: arch private state for MBM local bandwidth
*
* Members of this structure are accessed via helpers that provide abstraction.
*/
-struct rdt_hw_domain {
- struct rdt_domain d_resctrl;
- u32 *ctrl_val;
+struct rdt_hw_mon_domain {
+ struct rdt_mon_domain d_resctrl;
struct arch_mbm_state *arch_mbm_total;
struct arch_mbm_state *arch_mbm_local;
};
-static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
+static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
{
- return container_of(r, struct rdt_hw_domain, d_resctrl);
+ return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl);
+}
+
+static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r)
+{
+ return container_of(r, struct rdt_hw_mon_domain, d_resctrl);
}
/**
@@ -385,7 +426,7 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
*/
struct msr_param {
struct rdt_resource *res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
u32 low;
u32 high;
};
@@ -458,9 +499,9 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
}
int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);
int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);
extern struct mutex rdtgroup_mutex;
@@ -493,6 +534,8 @@ static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
+
/*
* To return the common struct rdt_resource, which is contained in struct
* rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
@@ -558,27 +601,28 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn);
int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
umode_t mask);
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos);
+struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
+ struct list_head **pos);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive);
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(u32 closid);
@@ -589,19 +633,19 @@ bool __init rdt_cpu_has(int flag);
void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_domain *d, struct rdtgroup *rdtgrp,
- int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom,
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ cpumask_t *cpumask, int evtid, int first);
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
unsigned long delay_ms,
int exclude_cpu);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu);
void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_domain *d);
-void __check_limbo(struct rdt_domain *d, bool force_free);
+bool has_busy_rmid(struct rdt_mon_domain *d);
+void __check_limbo(struct rdt_mon_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
void __init mbm_config_rftype_init(const char *config);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 366f496ca3ce..851b561850e0 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,6 +15,8 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
+#define pr_fmt(fmt) "resctrl: " fmt
+
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/sizes.h>
@@ -97,6 +99,8 @@ unsigned int resctrl_rmid_realloc_limit;
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
+static int snc_nodes_per_l3_cache = 1;
+
/*
* The correction factor table is documented in Documentation/arch/x86/resctrl.rst.
* If rmid > rmid threshold, MBM total and local values should be multiplied
@@ -185,7 +189,43 @@ static inline struct rmid_entry *__rmid_entry(u32 idx)
return entry;
}
-static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
+/*
+ * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by
+ * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is
+ * needed. The physical RMID is the same as the logical RMID.
+ *
+ * On a platform with SNC mode enabled, Linux enables RMID sharing mode
+ * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel
+ * Resource Director Technology Architecture Specification" for a full
+ * description of RMID sharing mode).
+ *
+ * In RMID sharing mode there are fewer "logical RMID" values available
+ * to accumulate data ("physical RMIDs" are divided evenly between SNC
+ * nodes that share an L3 cache). Linux creates an rdt_mon_domain for
+ * each SNC node.
+ *
+ * The value loaded into IA32_PQR_ASSOC is the "logical RMID".
+ *
+ * Data is collected independently on each SNC node and can be retrieved
+ * using the "physical RMID" value computed by this function and loaded
+ * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node.
+ *
+ * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3
+ * cache. So a "physical RMID" may be read from any CPU that shares
+ * the L3 cache with the desired SNC node, not just from a CPU in
+ * the specific SNC node.
+ */
+static int logical_rmid_to_physical_rmid(int cpu, int lrmid)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+ if (snc_nodes_per_l3_cache == 1)
+ return lrmid;
+
+ return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid;
+}
+
+static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
{
u64 msr_val;
@@ -197,7 +237,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
* IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
* are error bits.
*/
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid);
rdmsrl(MSR_IA32_QM_CTR, msr_val);
if (msr_val & RMID_VAL_ERROR)
@@ -209,7 +249,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
return 0;
}
-static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
+static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
{
@@ -228,19 +268,22 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
return NULL;
}
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid,
enum resctrl_event_id eventid)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
struct arch_mbm_state *am;
+ u32 prmid;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
memset(am, 0, sizeof(*am));
+ prmid = logical_rmid_to_physical_rmid(cpu, rmid);
/* Record any initial, non-zero count value. */
- __rmid_read(rmid, eventid, &am->prev_msr);
+ __rmid_read_phys(prmid, eventid, &am->prev_msr);
}
}
@@ -248,9 +291,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
* Assumes that hardware counters are also reset and thus that there is
* no need to record initial non-zero counts.
*/
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
if (is_mbm_total_enabled())
memset(hw_dom->arch_mbm_total, 0,
@@ -269,22 +312,22 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >> shift;
}
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
struct arch_mbm_state *am;
u64 msr_val, chunks;
+ u32 prmid;
int ret;
resctrl_arch_rmid_read_context_check();
- if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
- return -EINVAL;
-
- ret = __rmid_read(rmid, eventid, &msr_val);
+ prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+ ret = __rmid_read_phys(prmid, eventid, &msr_val);
if (ret)
return ret;
@@ -320,7 +363,7 @@ static void limbo_release_entry(struct rmid_entry *entry)
* decrement the count. If the busy count gets to zero on an RMID, we
* free the RMID
*/
-void __check_limbo(struct rdt_domain *d, bool force_free)
+void __check_limbo(struct rdt_mon_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
@@ -364,7 +407,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* CLOSID and RMID because there may be dependencies between them
* on some architectures.
*/
- trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val);
+ trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
}
if (force_free || !rmid_dirty) {
@@ -378,7 +421,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
}
-bool has_busy_rmid(struct rdt_domain *d)
+bool has_busy_rmid(struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
@@ -479,7 +522,7 @@ int alloc_rmid(u32 closid)
static void add_rmid_to_limbo(struct rmid_entry *entry)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
u32 idx;
lockdep_assert_held(&rdtgroup_mutex);
@@ -490,7 +533,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
entry->busy = 0;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
/*
* For the first limbo RMID in the domain,
* setup up the limbo worker.
@@ -532,7 +575,7 @@ void free_rmid(u32 closid, u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
-static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
+static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
u32 rmid, enum resctrl_event_id evtid)
{
u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
@@ -549,7 +592,10 @@ static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
+ int cpu = smp_processor_id();
+ struct rdt_mon_domain *d;
struct mbm_state *m;
+ int err, ret;
u64 tval = 0;
if (rr->first) {
@@ -560,14 +606,47 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
return 0;
}
- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
- &tval, rr->arch_mon_ctx);
- if (rr->err)
- return rr->err;
+ if (rr->d) {
+ /* Reading a single domain, must be on a CPU in that domain. */
+ if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
+ return -EINVAL;
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
+ if (rr->err)
+ return rr->err;
- rr->val += tval;
+ rr->val += tval;
- return 0;
+ return 0;
+ }
+
+ /* Summing domains that share a cache, must be on a CPU for that cache. */
+ if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
+ return -EINVAL;
+
+ /*
+ * Legacy files must report the sum of an event across all
+ * domains that share the same L3 cache instance.
+ * Report success if a read from any domain succeeds, -EINVAL
+ * (translated to "Unavailable" for user space) if reading from
+ * all domains fail for any reason.
+ */
+ ret = -EINVAL;
+ list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
+ if (d->ci->id != rr->ci->id)
+ continue;
+ err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
+ if (!err) {
+ rr->val += tval;
+ ret = 0;
+ }
+ }
+
+ if (ret)
+ rr->err = ret;
+
+ return ret;
}
/*
@@ -668,12 +747,12 @@ void mon_event_count(void *info)
* throttle MSRs already have low percentage values. To avoid
* unnecessarily restricting such rdtgroups, we also increase the bandwidth.
*/
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
+ struct rdt_ctrl_domain *dom_mba;
struct rdt_resource *r_mba;
- struct rdt_domain *dom_mba;
u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
@@ -688,7 +767,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
idx = resctrl_arch_rmid_idx_encode(closid, rmid);
pmbm_data = &dom_mbm->mbm_local[idx];
- dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+ dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
pr_warn_once("Failure to get domain for MBA update\n");
return;
@@ -734,12 +813,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
}
-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
+static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid)
{
- struct rmid_read rr;
+ struct rmid_read rr = {0};
- rr.first = false;
rr.r = r;
rr.d = d;
@@ -792,17 +870,17 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
void cqm_handle_limbo(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- d = container_of(work, struct rdt_domain, cqm_limbo.work);
+ d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
__check_limbo(d, false);
if (has_busy_rmid(d)) {
- d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
RESCTRL_PICK_ANY_CPU);
schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
delay);
@@ -820,13 +898,13 @@ void cqm_handle_limbo(struct work_struct *work)
* @exclude_cpu: Which CPU the handler should not run on,
* RESCTRL_PICK_ANY_CPU to pick any CPU.
*/
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
dom->cqm_work_cpu = cpu;
if (cpu < nr_cpu_ids)
@@ -837,9 +915,9 @@ void mbm_handle_overflow(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
struct rdtgroup *prgrp, *crgrp;
+ struct rdt_mon_domain *d;
struct list_head *head;
struct rdt_resource *r;
- struct rdt_domain *d;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
@@ -852,7 +930,7 @@ void mbm_handle_overflow(struct work_struct *work)
goto out_unlock;
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- d = container_of(work, struct rdt_domain, mbm_over.work);
+ d = container_of(work, struct rdt_mon_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
@@ -869,7 +947,7 @@ void mbm_handle_overflow(struct work_struct *work)
* Re-check for housekeeping CPUs. This allows the overflow handler to
* move off a nohz_full CPU quickly.
*/
- d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
RESCTRL_PICK_ANY_CPU);
schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
@@ -886,7 +964,7 @@ out_unlock:
* @exclude_cpu: Which CPU the handler should not run on,
* RESCTRL_PICK_ANY_CPU to pick any CPU.
*/
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
@@ -898,7 +976,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
*/
if (!resctrl_mounted || !resctrl_arch_mon_capable())
return;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
dom->mbm_work_cpu = cpu;
if (cpu < nr_cpu_ids)
@@ -1015,6 +1093,88 @@ static void l3_mon_evt_init(struct rdt_resource *r)
list_add_tail(&mbm_local_event.list, &r->evt_list);
}
+/*
+ * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
+ * which indicates that RMIDs are configured in legacy mode.
+ * This mode is incompatible with Linux resctrl semantics
+ * as RMIDs are partitioned between SNC nodes, which requires
+ * a user to know which RMID is allocated to a task.
+ * Clearing bit 0 reconfigures the RMID counters for use
+ * in RMID sharing mode. This mode is better for Linux.
+ * The RMID space is divided between all SNC nodes with the
+ * RMIDs renumbered to start from zero in each node when
+ * counting operations from tasks. Code to read the counters
+ * must adjust RMID counter numbers based on SNC node. See
+ * logical_rmid_to_physical_rmid() for code that does this.
+ */
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ if (snc_nodes_per_l3_cache > 1)
+ msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
+}
+
+/* CPU models that support MSR_RMID_SNC_CONFIG */
+static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
+ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
+ {}
+};
+
+/*
+ * There isn't a simple hardware bit that indicates whether a CPU is running
+ * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
+ * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
+ * the same NUMA node as CPU0.
+ * It is not possible to accurately determine SNC state if the system is
+ * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
+ * to L3 caches. It will be OK if system is booted with hyperthreading
+ * disabled (since this doesn't affect the ratio).
+ */
+static __init int snc_get_config(void)
+{
+ struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
+ const cpumask_t *node0_cpumask;
+ int cpus_per_node, cpus_per_l3;
+ int ret;
+
+ if (!x86_match_cpu(snc_cpu_ids) || !ci)
+ return 1;
+
+ cpus_read_lock();
+ if (num_online_cpus() != num_present_cpus())
+ pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
+ cpus_read_unlock();
+
+ node0_cpumask = cpumask_of_node(cpu_to_node(0));
+
+ cpus_per_node = cpumask_weight(node0_cpumask);
+ cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
+
+ if (!cpus_per_node || !cpus_per_l3)
+ return 1;
+
+ ret = cpus_per_l3 / cpus_per_node;
+
+ /* sanity check: Only valid results are 1, 2, 3, 4 */
+ switch (ret) {
+ case 1:
+ break;
+ case 2 ... 4:
+ pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
+ rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
+ break;
+ default:
+ pr_warn("Ignore improbable SNC node count %d\n", ret);
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
int __init rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
@@ -1022,9 +1182,11 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
unsigned int threshold;
int ret;
+ snc_nodes_per_l3_cache = snc_get_config();
+
resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
- hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
- r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+ hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
+ r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index aacf236dfe3b..e69489d48625 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -11,7 +11,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/debugfs.h>
@@ -221,7 +220,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
int cpu;
int ret;
- for_each_cpu(cpu, &plr->d->cpu_mask) {
+ for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
if (!pm_req) {
rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
@@ -292,12 +291,15 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
*/
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
{
- struct cpu_cacheinfo *ci;
+ enum resctrl_scope scope = plr->s->res->ctrl_scope;
+ struct cacheinfo *ci;
int ret;
- int i;
+
+ if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
+ return -ENODEV;
/* Pick the first cpu we find that is associated with the cache. */
- plr->cpu = cpumask_first(&plr->d->cpu_mask);
+ plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask);
if (!cpu_online(plr->cpu)) {
rdt_last_cmd_printf("CPU %u associated with cache not online\n",
@@ -306,15 +308,11 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
goto out_region;
}
- ci = get_cpu_cacheinfo(plr->cpu);
-
- plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
-
- for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == plr->s->res->cache_level) {
- plr->line_size = ci->info_list[i].coherency_line_size;
- return 0;
- }
+ ci = get_cpu_cacheinfo_level(plr->cpu, scope);
+ if (ci) {
+ plr->line_size = ci->coherency_line_size;
+ plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
+ return 0;
}
ret = -1;
@@ -810,7 +808,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
* Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
{
unsigned int cbm_len;
unsigned long cbm_b;
@@ -837,11 +835,11 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm
* if it is not possible to test due to memory allocation issue,
* false otherwise.
*/
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
{
+ struct rdt_ctrl_domain *d_i;
cpumask_var_t cpu_with_psl;
struct rdt_resource *r;
- struct rdt_domain *d_i;
bool ret = false;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -855,10 +853,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* associated with them.
*/
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(d_i, &r->domains, list) {
+ list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
if (d_i->plr)
cpumask_or(cpu_with_psl, cpu_with_psl,
- &d_i->cpu_mask);
+ &d_i->hdr.cpu_mask);
}
}
@@ -866,7 +864,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* Next test if new pseudo-locked region would intersect with
* existing region.
*/
- if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
+ if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl))
ret = true;
free_cpumask_var(cpu_with_psl);
@@ -1198,7 +1196,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
}
plr->thread_done = 0;
- cpu = cpumask_first(&plr->d->cpu_mask);
+ cpu = cpumask_first(&plr->d->hdr.cpu_mask);
if (!cpu_online(cpu)) {
ret = -ENODEV;
goto out;
@@ -1528,7 +1526,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 02f213f1c51c..d7163b764c62 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
@@ -92,13 +91,13 @@ void rdt_last_cmd_printf(const char *fmt, ...)
void rdt_staged_configs_clear(void)
{
+ struct rdt_ctrl_domain *dom;
struct rdt_resource *r;
- struct rdt_domain *dom;
lockdep_assert_held(&rdtgroup_mutex);
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
}
@@ -317,7 +316,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
- mask = &rdtgrp->plr->d->cpu_mask;
+ mask = &rdtgrp->plr->d->hdr.cpu_mask;
seq_printf(s, is_cpu_list(of) ?
"%*pbl\n" : "%*pb\n",
cpumask_pr_args(mask));
@@ -1012,7 +1011,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
unsigned long sw_shareable = 0, hw_shareable = 0;
unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_resource *r = s->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
bool sep = false;
@@ -1021,12 +1020,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
sw_shareable = 0;
exclusive = 0;
- seq_printf(seq, "%d=", dom->id);
+ seq_printf(seq, "%d=", dom->hdr.id);
for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
@@ -1243,7 +1242,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
*
* Return: false if CBM does not overlap, true if it does.
*/
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid,
enum resctrl_conf_type type, bool exclusive)
{
@@ -1298,7 +1297,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -1329,10 +1328,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
+ struct rdt_ctrl_domain *d;
struct resctrl_schema *s;
struct rdt_resource *r;
bool has_cache = false;
- struct rdt_domain *d;
u32 ctrl;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -1343,7 +1342,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
continue;
has_cache = true;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
ctrl = resctrl_arch_get_config(r, d, closid,
s->conf_type);
if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
@@ -1448,20 +1447,19 @@ out:
* bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, unsigned long cbm)
+ struct rdt_ctrl_domain *d, unsigned long cbm)
{
- struct cpu_cacheinfo *ci;
unsigned int size = 0;
- int num_b, i;
+ struct cacheinfo *ci;
+ int num_b;
+
+ if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
+ return size;
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
- ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
- for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == r->cache_level) {
- size = ci->info_list[i].size / r->cache.cbm_len * num_b;
- break;
- }
- }
+ ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
+ if (ci)
+ size = ci->size / r->cache.cbm_len * num_b;
return size;
}
@@ -1477,9 +1475,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
{
struct resctrl_schema *schema;
enum resctrl_conf_type type;
+ struct rdt_ctrl_domain *d;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
unsigned int size;
int ret = 0;
u32 closid;
@@ -1503,7 +1501,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
}
goto out;
}
@@ -1515,7 +1513,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
type = schema->conf_type;
sep = false;
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1533,7 +1531,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
else
size = rdtgroup_cbm_to_size(r, d, ctrl);
}
- seq_printf(s, "%d=%u", d->id, size);
+ seq_printf(s, "%d=%u", d->hdr.id, size);
sep = true;
}
seq_putc(s, '\n');
@@ -1591,21 +1589,21 @@ static void mon_event_config_read(void *info)
mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
}
-static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
{
- smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
}
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
struct mon_config_info mon_info = {0};
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
bool sep = false;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
if (sep)
seq_puts(s, ";");
@@ -1613,7 +1611,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
mon_info.evtid = evtid;
mondata_config_read(dom, &mon_info);
- seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+ seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
sep = true;
}
seq_puts(s, "\n");
@@ -1658,7 +1656,7 @@ static void mon_event_config_write(void *info)
}
static void mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_domain *d, u32 evtid, u32 val)
+ struct rdt_mon_domain *d, u32 evtid, u32 val)
{
struct mon_config_info mon_info = {0};
@@ -1679,7 +1677,7 @@ static void mbm_config_write_domain(struct rdt_resource *r,
* are scoped at the domain level. Writing any of these MSRs
* on one CPU is observed by all the CPUs in the domain.
*/
- smp_call_function_any(&d->cpu_mask, mon_event_config_write,
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
&mon_info, 1);
/*
@@ -1699,7 +1697,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
@@ -1729,8 +1727,8 @@ next:
return -EINVAL;
}
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
mbm_config_write_domain(r, d, evtid, val);
goto next;
}
@@ -2258,9 +2256,9 @@ static inline bool is_mba_linear(void)
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
+ struct rdt_ctrl_domain *d;
struct rdt_resource *r_l;
cpumask_var_t cpu_mask;
- struct rdt_domain *d;
int cpu;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -2277,14 +2275,14 @@ static int set_cache_qos_cfg(int level, bool enable)
return -ENOMEM;
r_l = &rdt_resources_all[level].r_resctrl;
- list_for_each_entry(d, &r_l->domains, list) {
+ list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
- for_each_cpu(cpu, &d->cpu_mask)
+ for_each_cpu(cpu, &d->hdr.cpu_mask)
cpumask_set_cpu(cpu, cpu_mask);
else
/* Pick one CPU from each domain instance to update MSR */
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
}
/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
@@ -2310,10 +2308,10 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
l3_qos_cfg_update(&hw_res->cdp_enabled);
}
-static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
u32 num_closid = resctrl_arch_get_num_closid(r);
- int cpu = cpumask_any(&d->cpu_mask);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
int i;
d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
@@ -2328,7 +2326,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
}
static void mba_sc_domain_destroy(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
kfree(d->mbps_val);
d->mbps_val = NULL;
@@ -2336,14 +2334,18 @@ static void mba_sc_domain_destroy(struct rdt_resource *r,
/*
* MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale.
+ * MBM is supported and MBA is in linear scale,
+ * and the MBM monitor scope is the same as MBA
+ * control scope.
*/
static bool supports_mba_mbps(void)
{
+ struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
return (is_mbm_local_enabled() &&
- r->alloc_capable && is_mba_linear());
+ r->alloc_capable && is_mba_linear() &&
+ r->ctrl_scope == rmbm->mon_scope);
}
/*
@@ -2354,7 +2356,7 @@ static int set_mba_sc(bool mba_sc)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
u32 num_closid = resctrl_arch_get_num_closid(r);
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int i;
if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
@@ -2362,7 +2364,7 @@ static int set_mba_sc(bool mba_sc)
r->membw.mba_sc = mba_sc;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
@@ -2626,7 +2628,7 @@ static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
unsigned long flags = RFTYPE_CTRL_BASE;
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
struct rdt_resource *r;
int ret;
@@ -2701,7 +2703,7 @@ static int rdt_get_tree(struct fs_context *fc)
if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->mon_domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
@@ -2751,6 +2753,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct fs_parse_result result;
+ const char *msg;
int opt;
opt = fs_parse(fc, rdt_fs_parameters, param, &result);
@@ -2765,8 +2768,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mbps:
+ msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope";
if (!supports_mba_mbps())
- return -EINVAL;
+ return invalfc(fc, msg);
ctx->enable_mba_mbps = true;
return 0;
case Opt_debug:
@@ -2811,9 +2815,9 @@ static int rdt_init_fs_context(struct fs_context *fc)
static int reset_all_ctrls(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int i;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -2825,16 +2829,16 @@ static int reset_all_ctrls(struct rdt_resource *r)
/*
* Disable resource control for this resource by setting all
- * CBMs in all domains to the maximum mask value. Pick one CPU
+ * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
* from each domain to update the MSRs below.
*/
- list_for_each_entry(d, &r->domains, list) {
- hw_dom = resctrl_to_arch_dom(d);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
for (i = 0; i < hw_res->num_closid; i++)
hw_dom->ctrl_val[i] = r->default_ctrl;
msr_param.dom = d;
- smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
return 0;
@@ -3002,62 +3006,126 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
return ret;
}
+static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_find_and_get(pkn, name);
+ if (!kn)
+ return;
+ kernfs_put(kn);
+
+ if (kn->dir.subdirs <= 1)
+ kernfs_remove(kn);
+ else
+ kernfs_remove_by_name(kn, subname);
+}
+
/*
* Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups with given domain id.
+ * and monitor groups for the given domain.
+ * Remove files and directories containing "sum" of domain data
+ * when last domain being summed is removed.
*/
static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- unsigned int dom_id)
+ struct rdt_mon_domain *d)
{
struct rdtgroup *prgrp, *crgrp;
+ char subname[32];
+ bool snc_mode;
char name[32];
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ if (snc_mode)
+ sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
+
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- sprintf(name, "mon_%s_%02d", r->name, dom_id);
- kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+ mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
- kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
+ mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
}
}
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
- struct rdt_domain *d,
- struct rdt_resource *r, struct rdtgroup *prgrp)
+static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp,
+ bool do_sum)
{
+ struct rmid_read rr = {0};
union mon_data_bits priv;
- struct kernfs_node *kn;
struct mon_evt *mevt;
- struct rmid_read rr;
- char name[32];
int ret;
- sprintf(name, "mon_%s_%02d", r->name, d->id);
- /* create the directory */
- kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
- goto out_destroy;
-
- if (WARN_ON(list_empty(&r->evt_list))) {
- ret = -EPERM;
- goto out_destroy;
- }
+ if (WARN_ON(list_empty(&r->evt_list)))
+ return -EPERM;
priv.u.rid = r->rid;
- priv.u.domid = d->id;
+ priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
+ priv.u.sum = do_sum;
list_for_each_entry(mevt, &r->evt_list, list) {
priv.u.evtid = mevt->evtid;
ret = mon_addfile(kn, mevt->name, priv.priv);
if (ret)
+ return ret;
+
+ if (!do_sum && is_mbm_event(mevt->evtid))
+ mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
+ }
+
+ return 0;
+}
+
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+ struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+ struct kernfs_node *kn, *ckn;
+ char name[32];
+ bool snc_mode;
+ int ret = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ kn = kernfs_find_and_get(parent_kn, name);
+ if (kn) {
+ /*
+ * rdtgroup_mutex will prevent this directory from being
+ * removed. No need to keep this hold.
+ */
+ kernfs_put(kn);
+ } else {
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+ ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
+ if (ret)
+ goto out_destroy;
+ }
+
+ if (snc_mode) {
+ sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
+ ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(ckn)) {
+ ret = -EINVAL;
+ goto out_destroy;
+ }
+
+ ret = rdtgroup_kn_set_ugid(ckn);
+ if (ret)
goto out_destroy;
- if (is_mbm_event(mevt->evtid))
- mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
+ ret = mon_add_all_files(ckn, d, r, prgrp, false);
+ if (ret)
+ goto out_destroy;
}
+
kernfs_activate(kn);
return 0;
@@ -3071,7 +3139,7 @@ out_destroy:
* and "monitor" groups with given domain id.
*/
static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_mon_domain *d)
{
struct kernfs_node *parent_kn;
struct rdtgroup *prgrp, *crgrp;
@@ -3093,13 +3161,13 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_resource *r,
struct rdtgroup *prgrp)
{
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
int ret;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
@@ -3198,7 +3266,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
* Set the RDT domain up to start off with all usable allocations. That is,
* all shareable and unused bits. All-zero CBM is invalid.
*/
-static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
+static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
u32 closid)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -3258,7 +3326,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
return -ENOSPC;
}
cfg->have_new_ctrl = true;
@@ -3278,10 +3346,10 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int ret;
- list_for_each_entry(d, &s->res->domains, list) {
+ list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
@@ -3294,9 +3362,9 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (is_mba_sc(r)) {
d->mbps_val[closid] = MBA_MAX_MBPS;
continue;
@@ -3920,29 +3988,33 @@ static void __init rdtgroup_setup_default(void)
mutex_unlock(&rdtgroup_mutex);
}
-static void domain_destroy_mon_state(struct rdt_domain *d)
+static void domain_destroy_mon_state(struct rdt_mon_domain *d)
{
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
}
-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
mutex_lock(&rdtgroup_mutex);
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);
- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ mutex_lock(&rdtgroup_mutex);
/*
* If resctrl is mounted, remove all the
* per domain monitor data directories.
*/
if (resctrl_mounted && resctrl_arch_mon_capable())
- rmdir_mondata_subdir_allrdtgrp(r, d->id);
+ rmdir_mondata_subdir_allrdtgrp(r, d);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -3961,11 +4033,10 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
domain_destroy_mon_state(d);
-out_unlock:
mutex_unlock(&rdtgroup_mutex);
}
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
@@ -3996,7 +4067,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
return 0;
}
-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
int err = 0;
@@ -4005,11 +4076,18 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
err = mba_sc_domain_allocate(r, d);
- goto out_unlock;
}
- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ int err;
+
+ mutex_lock(&rdtgroup_mutex);
err = domain_setup_mon_state(r, d);
if (err)
@@ -4060,8 +4138,8 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
void resctrl_offline_cpu(unsigned int cpu)
{
struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
- struct rdt_domain *d;
mutex_lock(&rdtgroup_mutex);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
@@ -4074,7 +4152,7 @@ void resctrl_offline_cpu(unsigned int cpu)
if (!l3->mon_capable)
goto out_unlock;
- d = get_domain_from_cpu(cpu, l3);
+ d = get_mon_domain_from_cpu(cpu, l3);
if (d) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 11f83d07925e..00189cdeb775 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -41,80 +41,97 @@
#define CPUID_VMWARE_INFO_LEAF 0x40000000
#define CPUID_VMWARE_FEATURES_LEAF 0x40000010
-#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
-#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
-#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
-
-#define VMWARE_CMD_GETVERSION 10
-#define VMWARE_CMD_GETHZ 45
-#define VMWARE_CMD_GETVCPU_INFO 68
-#define VMWARE_CMD_LEGACY_X2APIC 3
-#define VMWARE_CMD_VCPU_RESERVED 31
-#define VMWARE_CMD_STEALCLOCK 91
+#define GETVCPU_INFO_LEGACY_X2APIC BIT(3)
+#define GETVCPU_INFO_VCPU_RESERVED BIT(31)
#define STEALCLOCK_NOT_AVAILABLE (-1)
#define STEALCLOCK_DISABLED 0
#define STEALCLOCK_ENABLED 1
-#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
- __asm__("inl (%%dx), %%eax" : \
- "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
- "a"(VMWARE_HYPERVISOR_MAGIC), \
- "c"(VMWARE_CMD_##cmd), \
- "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \
- "memory")
-
-#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \
- __asm__("vmcall" : \
- "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
- "a"(VMWARE_HYPERVISOR_MAGIC), \
- "c"(VMWARE_CMD_##cmd), \
- "d"(0), "b"(UINT_MAX) : \
- "memory")
-
-#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \
- __asm__("vmmcall" : \
- "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
- "a"(VMWARE_HYPERVISOR_MAGIC), \
- "c"(VMWARE_CMD_##cmd), \
- "d"(0), "b"(UINT_MAX) : \
- "memory")
-
-#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \
- switch (vmware_hypercall_mode) { \
- case CPUID_VMWARE_FEATURES_ECX_VMCALL: \
- VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \
- break; \
- case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \
- VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \
- break; \
- default: \
- VMWARE_PORT(cmd, eax, ebx, ecx, edx); \
- break; \
- } \
- } while (0)
-
struct vmware_steal_time {
union {
- uint64_t clock; /* stolen time counter in units of vtsc */
+ u64 clock; /* stolen time counter in units of vtsc */
struct {
/* only for little-endian */
- uint32_t clock_low;
- uint32_t clock_high;
+ u32 clock_low;
+ u32 clock_high;
};
};
- uint64_t reserved[7];
+ u64 reserved[7];
};
static unsigned long vmware_tsc_khz __ro_after_init;
static u8 vmware_hypercall_mode __ro_after_init;
+unsigned long vmware_hypercall_slow(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5)
+{
+ unsigned long out0, rbx, rcx, rdx, rsi, rdi;
+
+ switch (vmware_hypercall_mode) {
+ case CPUID_VMWARE_FEATURES_ECX_VMCALL:
+ asm_inline volatile ("vmcall"
+ : "=a" (out0), "=b" (rbx), "=c" (rcx),
+ "=d" (rdx), "=S" (rsi), "=D" (rdi)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ break;
+ case CPUID_VMWARE_FEATURES_ECX_VMMCALL:
+ asm_inline volatile ("vmmcall"
+ : "=a" (out0), "=b" (rbx), "=c" (rcx),
+ "=d" (rdx), "=S" (rsi), "=D" (rdi)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ break;
+ default:
+ asm_inline volatile ("movw %[port], %%dx; inl (%%dx), %%eax"
+ : "=a" (out0), "=b" (rbx), "=c" (rcx),
+ "=d" (rdx), "=S" (rsi), "=D" (rdi)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ break;
+ }
+
+ if (out1)
+ *out1 = rbx;
+ if (out2)
+ *out2 = rcx;
+ if (out3)
+ *out3 = rdx;
+ if (out4)
+ *out4 = rsi;
+ if (out5)
+ *out5 = rdi;
+
+ return out0;
+}
+
static inline int __vmware_platform(void)
{
- uint32_t eax, ebx, ecx, edx;
- VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
- return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
+ u32 eax, ebx, ecx;
+
+ eax = vmware_hypercall3(VMWARE_CMD_GETVERSION, 0, &ebx, &ecx);
+ return eax != UINT_MAX && ebx == VMWARE_HYPERVISOR_MAGIC;
}
static unsigned long vmware_get_tsc_khz(void)
@@ -166,21 +183,12 @@ static void __init vmware_cyc2ns_setup(void)
pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
}
-static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
+static int vmware_cmd_stealclock(u32 addr_hi, u32 addr_lo)
{
- uint32_t result, info;
-
- asm volatile (VMWARE_HYPERCALL :
- "=a"(result),
- "=c"(info) :
- "a"(VMWARE_HYPERVISOR_MAGIC),
- "b"(0),
- "c"(VMWARE_CMD_STEALCLOCK),
- "d"(0),
- "S"(arg1),
- "D"(arg2) :
- "memory");
- return result;
+ u32 info;
+
+ return vmware_hypercall5(VMWARE_CMD_STEALCLOCK, 0, 0, addr_hi, addr_lo,
+ &info);
}
static bool stealclock_enable(phys_addr_t pa)
@@ -215,15 +223,15 @@ static bool vmware_is_stealclock_available(void)
* Return:
* The steal clock reading in ns.
*/
-static uint64_t vmware_steal_clock(int cpu)
+static u64 vmware_steal_clock(int cpu)
{
struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
- uint64_t clock;
+ u64 clock;
if (IS_ENABLED(CONFIG_64BIT))
clock = READ_ONCE(steal->clock);
else {
- uint32_t initial_high, low, high;
+ u32 initial_high, low, high;
do {
initial_high = READ_ONCE(steal->clock_high);
@@ -235,7 +243,7 @@ static uint64_t vmware_steal_clock(int cpu)
high = READ_ONCE(steal->clock_high);
} while (initial_high != high);
- clock = ((uint64_t)high << 32) | low;
+ clock = ((u64)high << 32) | low;
}
return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
@@ -389,13 +397,13 @@ static void __init vmware_set_capabilities(void)
static void __init vmware_platform_setup(void)
{
- uint32_t eax, ebx, ecx, edx;
- uint64_t lpj, tsc_khz;
+ u32 eax, ebx, ecx;
+ u64 lpj, tsc_khz;
- VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
+ eax = vmware_hypercall3(VMWARE_CMD_GETHZ, UINT_MAX, &ebx, &ecx);
if (ebx != UINT_MAX) {
- lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
+ lpj = tsc_khz = eax | (((u64)ebx) << 32);
do_div(tsc_khz, 1000);
WARN_ON(tsc_khz >> 32);
pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
@@ -446,7 +454,7 @@ static u8 __init vmware_select_hypercall(void)
* If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
* intentionally defaults to 0.
*/
-static uint32_t __init vmware_platform(void)
+static u32 __init vmware_platform(void)
{
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
unsigned int eax;
@@ -474,12 +482,65 @@ static uint32_t __init vmware_platform(void)
/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
static bool __init vmware_legacy_x2apic_available(void)
{
- uint32_t eax, ebx, ecx, edx;
- VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
- return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
- (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
+ u32 eax;
+
+ eax = vmware_hypercall1(VMWARE_CMD_GETVCPU_INFO, 0);
+ return !(eax & GETVCPU_INFO_VCPU_RESERVED) &&
+ (eax & GETVCPU_INFO_LEGACY_X2APIC);
}
+#ifdef CONFIG_INTEL_TDX_GUEST
+/*
+ * TDCALL[TDG.VP.VMCALL] uses %rax (arg0) and %rcx (arg2). Therefore,
+ * we remap those registers to %r12 and %r13, respectively.
+ */
+unsigned long vmware_tdx_hypercall(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5)
+{
+ struct tdx_module_args args = {};
+
+ if (!hypervisor_is_type(X86_HYPER_VMWARE)) {
+ pr_warn_once("Incorrect usage\n");
+ return ULONG_MAX;
+ }
+
+ if (cmd & ~VMWARE_CMD_MASK) {
+ pr_warn_once("Out of range command %lx\n", cmd);
+ return ULONG_MAX;
+ }
+
+ args.rbx = in1;
+ args.rdx = in3;
+ args.rsi = in4;
+ args.rdi = in5;
+ args.r10 = VMWARE_TDX_VENDOR_LEAF;
+ args.r11 = VMWARE_TDX_HCALL_FUNC;
+ args.r12 = VMWARE_HYPERVISOR_MAGIC;
+ args.r13 = cmd;
+ /* CPL */
+ args.r15 = 0;
+
+ __tdx_hypercall(&args);
+
+ if (out1)
+ *out1 = args.rbx;
+ if (out2)
+ *out2 = args.r13;
+ if (out3)
+ *out3 = args.rdx;
+ if (out4)
+ *out4 = args.rsi;
+ if (out5)
+ *out5 = args.rdi;
+
+ return args.r12;
+}
+EXPORT_SYMBOL_GPL(vmware_tdx_hypercall);
+#endif
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
struct pt_regs *regs)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f06501445cd9..340af8155658 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
+
+ /*
+ * Non-crash kexec calls enc_kexec_begin() while scheduling is still
+ * active. This allows the callback to wait until all in-flight
+ * shared<->private conversions are complete. In a crash scenario,
+ * enc_kexec_begin() gets called after all but one CPU have been shut
+ * down and interrupts have been disabled. This allows the callback to
+ * detect a race with the conversion and report it.
+ */
+ x86_platform.guest.enc_kexec_begin();
+ x86_platform.guest.enc_kexec_finish();
+
crash_save_cpu(regs, safe_smp_processor_id());
}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 8e3c53b4d070..64280879c68c 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -83,7 +83,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev)
ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (ret)
- return ret;
+ return pcibios_err_to_errno(ret);
if (!pin)
return 0;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 68b09f718f10..4893d30ce438 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -828,7 +828,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
/*
* Find the highest page frame number we have available
*/
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type)
+static unsigned long __init e820__end_ram_pfn(unsigned long limit_pfn)
{
int i;
unsigned long last_pfn = 0;
@@ -839,7 +839,8 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
unsigned long start_pfn;
unsigned long end_pfn;
- if (entry->type != type)
+ if (entry->type != E820_TYPE_RAM &&
+ entry->type != E820_TYPE_ACPI)
continue;
start_pfn = entry->addr >> PAGE_SHIFT;
@@ -865,12 +866,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
unsigned long __init e820__end_of_ram_pfn(void)
{
- return e820_end_pfn(MAX_ARCH_PFN, E820_TYPE_RAM);
+ return e820__end_ram_pfn(MAX_ARCH_PFN);
}
unsigned long __init e820__end_of_low_ram_pfn(void)
{
- return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_TYPE_RAM);
+ return e820__end_ram_pfn(1UL << (32 - PAGE_SHIFT));
}
static void __init early_panic(char *msg)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 05df04f39628..2ee0b9c53dcc 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -106,21 +106,17 @@ static inline u64 xfeatures_mask_independent(void)
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
*
- * We use XSAVE as a fallback.
- *
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
- * original instruction which gets replaced. We need to use it here as the
- * address of the instruction where we might get an exception at.
+ * Use XSAVE as a fallback.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
- asm volatile(ALTERNATIVE_3(XSAVE, \
+ asm volatile("1: " ALTERNATIVE_3(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVEC, X86_FEATURE_XSAVEC, \
XSAVES, X86_FEATURE_XSAVES) \
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
+ _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -130,11 +126,11 @@ static inline u64 xfeatures_mask_independent(void)
* XSAVE area format.
*/
#define XSTATE_XRESTORE(st, lmask, hmask) \
- asm volatile(ALTERNATIVE(XRSTOR, \
+ asm volatile("1: " ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
+ _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FPU_RESTORE) \
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b8441147eb5e..f63f8fd00a91 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -835,6 +835,13 @@ void __noreturn stop_this_cpu(void *dummy)
*/
cpumask_clear_cpu(cpu, &cpus_stop_mask);
+#ifdef CONFIG_SMP
+ if (smp_ops.stop_this_cpu) {
+ smp_ops.stop_this_cpu();
+ unreachable();
+ }
+#endif
+
for (;;) {
/*
* Use native_halt() so that memory contents don't change
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f3130f762784..0e0a4cf6b5eb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
#include <linux/delay.h>
#include <linux/objtool.h>
#include <linux/pgtable.h>
+#include <linux/kexec.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
@@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void)
void native_machine_shutdown(void)
{
+ /*
+ * Call enc_kexec_begin() while all CPUs are still active and
+ * interrupts are enabled. This will allow all in-flight memory
+ * conversions to finish cleanly.
+ */
+ if (kexec_in_progress)
+ x86_platform.guest.enc_kexec_begin();
+
/* Stop the cpus and apics */
#ifdef CONFIG_X86_IO_APIC
/*
@@ -752,6 +761,9 @@ void native_machine_shutdown(void)
#ifdef CONFIG_X86_64
x86_platform.iommu_shutdown();
#endif
+
+ if (kexec_in_progress)
+ x86_platform.guest.enc_kexec_finish();
}
static void __machine_emergency_restart(int emergency)
@@ -868,6 +880,12 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
+
+ if (smp_ops.stop_this_cpu) {
+ smp_ops.stop_this_cpu();
+ unreachable();
+ }
+
/* Assume hlt works */
halt();
for (;;)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 56cab1bb25f5..042c9a0334e9 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -5,6 +5,8 @@
*/
#include <linux/linkage.h>
+#include <linux/stringify.h>
+#include <asm/alternative.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
@@ -145,16 +147,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* Set cr4 to a known state:
* - physical address extension enabled
* - 5-level paging, if it was enabled before
+ * - Machine check exception on TDX guest, if it was enabled before.
+ * Clearing MCE might not be allowed in TDX guests, depending on setup.
+ *
+ * Use R13 that contains the original CR4 value, read in relocate_kernel().
+ * PAE is always set in the original CR4.
*/
- movl $X86_CR4_PAE, %eax
- testq $X86_CR4_LA57, %r13
- jz 1f
- orl $X86_CR4_LA57, %eax
-1:
- movq %rax, %cr4
-
- jmp 1f
-1:
+ andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
+ ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
+ movq %r13, %cr4
/* Flush the TLB (needed?) */
movq %r9, %cr3
@@ -165,9 +166,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* used by kexec. Flush the caches before copying the kernel.
*/
testq %r12, %r12
- jz 1f
+ jz .Lsme_off
wbinvd
-1:
+.Lsme_off:
movq %rcx, %r11
call swap_pages
@@ -187,7 +188,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
*/
testq %r11, %r11
- jnz 1f
+ jnz .Lrelocate
xorl %eax, %eax
xorl %ebx, %ebx
xorl %ecx, %ecx
@@ -208,7 +209,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
ret
int3
-1:
+.Lrelocate:
popq %rdx
leaq PAGE_SIZE(%r10), %rsp
ANNOTATE_RETPOLINE_SAFE
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 05c5aa951da7..728927e4ba51 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -165,6 +165,7 @@ unsigned long saved_video_mode;
static char __initdata command_line[COMMAND_LINE_SIZE];
#ifdef CONFIG_CMDLINE_BOOL
static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+bool builtin_cmdline_added __ro_after_init;
#endif
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
@@ -765,6 +766,7 @@ void __init setup_arch(char **cmdline_p)
strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
}
#endif
+ builtin_cmdline_added = true;
#endif
strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e42faa792c07..52e1f3f0b361 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -27,25 +27,7 @@
unsigned long profile_pc(struct pt_regs *regs)
{
- unsigned long pc = instruction_pointer(regs);
-
- if (!user_mode(regs) && in_lock_functions(pc)) {
-#ifdef CONFIG_FRAME_POINTER
- return *(unsigned long *)(regs->bp + sizeof(long));
-#else
- unsigned long *sp = (unsigned long *)regs->sp;
- /*
- * Return address is either directly at stack pointer
- * or above a saved flags. Eflags has bits 22-31 zero,
- * kernel addresses don't.
- */
- if (sp[0] >> 22)
- return sp[0];
- if (sp[1] >> 22)
- return sp[1];
-#endif
- }
- return pc;
+ return instruction_pointer(regs);
}
EXPORT_SYMBOL(profile_pc);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 06b170759e5b..d4462fb26299 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -50,9 +50,9 @@ int tsc_clocksource_reliable;
static int __read_mostly tsc_force_recalibrate;
-static u32 art_to_tsc_numerator;
-static u32 art_to_tsc_denominator;
-static u64 art_to_tsc_offset;
+static struct clocksource_base art_base_clk = {
+ .id = CSID_X86_ART,
+};
static bool have_art;
struct cyc2ns {
@@ -1074,7 +1074,7 @@ core_initcall(cpufreq_register_tsc_scaling);
*/
static void __init detect_art(void)
{
- unsigned int unused[2];
+ unsigned int unused;
if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
return;
@@ -1089,13 +1089,14 @@ static void __init detect_art(void)
tsc_async_resets)
return;
- cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
- &art_to_tsc_numerator, unused, unused+1);
+ cpuid(ART_CPUID_LEAF, &art_base_clk.denominator,
+ &art_base_clk.numerator, &art_base_clk.freq_khz, &unused);
- if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
+ art_base_clk.freq_khz /= KHZ;
+ if (art_base_clk.denominator < ART_MIN_DENOMINATOR)
return;
- rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
+ rdmsrl(MSR_IA32_TSC_ADJUST, art_base_clk.offset);
/* Make this sticky over multiple CPU init calls */
setup_force_cpu_cap(X86_FEATURE_ART);
@@ -1296,67 +1297,6 @@ int unsynchronized_tsc(void)
return 0;
}
-/*
- * Convert ART to TSC given numerator/denominator found in detect_art()
- */
-struct system_counterval_t convert_art_to_tsc(u64 art)
-{
- u64 tmp, res, rem;
-
- rem = do_div(art, art_to_tsc_denominator);
-
- res = art * art_to_tsc_numerator;
- tmp = rem * art_to_tsc_numerator;
-
- do_div(tmp, art_to_tsc_denominator);
- res += tmp + art_to_tsc_offset;
-
- return (struct system_counterval_t) {
- .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC,
- .cycles = res,
- };
-}
-EXPORT_SYMBOL(convert_art_to_tsc);
-
-/**
- * convert_art_ns_to_tsc() - Convert ART in nanoseconds to TSC.
- * @art_ns: ART (Always Running Timer) in unit of nanoseconds
- *
- * PTM requires all timestamps to be in units of nanoseconds. When user
- * software requests a cross-timestamp, this function converts system timestamp
- * to TSC.
- *
- * This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set
- * indicating the tsc_khz is derived from CPUID[15H]. Drivers should check
- * that this flag is set before conversion to TSC is attempted.
- *
- * Return:
- * struct system_counterval_t - system counter value with the ID of the
- * corresponding clocksource:
- * cycles: System counter value
- * cs_id: The clocksource ID for validating comparability
- */
-
-struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
-{
- u64 tmp, res, rem;
-
- rem = do_div(art_ns, USEC_PER_SEC);
-
- res = art_ns * tsc_khz;
- tmp = rem * tsc_khz;
-
- do_div(tmp, USEC_PER_SEC);
- res += tmp;
-
- return (struct system_counterval_t) {
- .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC,
- .cycles = res,
- };
-}
-EXPORT_SYMBOL(convert_art_ns_to_tsc);
-
-
static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
/**
@@ -1458,8 +1398,10 @@ out:
if (tsc_unstable)
goto unreg;
- if (boot_cpu_has(X86_FEATURE_ART))
+ if (boot_cpu_has(X86_FEATURE_ART)) {
have_art = true;
+ clocksource_tsc.base = &art_base_clk;
+ }
clocksource_register_khz(&clocksource_tsc, tsc_khz);
unreg:
clocksource_unregister(&clocksource_tsc_early);
@@ -1484,8 +1426,10 @@ static int __init init_tsc_clocksource(void)
* the refined calibration and directly register it as a clocksource.
*/
if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
- if (boot_cpu_has(X86_FEATURE_ART))
+ if (boot_cpu_has(X86_FEATURE_ART)) {
have_art = true;
+ clocksource_tsc.base = &art_base_clk;
+ }
clocksource_register_khz(&clocksource_tsc, tsc_khz);
clocksource_unregister(&clocksource_tsc_early);
@@ -1509,10 +1453,12 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
if (early) {
cpu_khz = x86_platform.calibrate_cpu();
- if (tsc_early_khz)
+ if (tsc_early_khz) {
tsc_khz = tsc_early_khz;
- else
+ } else {
tsc_khz = x86_platform.calibrate_tsc();
+ clocksource_tsc.freq_khz = tsc_khz;
+ }
} else {
/* We should not be here with non-native cpu calibration */
WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3509afc6a672..6e73403e874f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -357,6 +357,9 @@ SECTIONS
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d5dc5a92635a..82b128d3f309 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -134,10 +134,12 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { };
-static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
-static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
+static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
+static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
+static void enc_kexec_begin_noop(void) {}
+static void enc_kexec_finish_noop(void) {}
static bool is_private_mmio_noop(u64 addr) {return false; }
struct x86_platform_ops x86_platform __ro_after_init = {
@@ -161,6 +163,8 @@ struct x86_platform_ops x86_platform __ro_after_init = {
.enc_status_change_finish = enc_status_change_finish_noop,
.enc_tlb_flush_required = enc_tlb_flush_required_noop,
.enc_cache_flush_required = enc_cache_flush_required_noop,
+ .enc_kexec_begin = enc_kexec_begin_noop,
+ .enc_kexec_finish = enc_kexec_finish_noop,
},
};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5d4c86133453..c8cc578646d0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1069,7 +1069,7 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
asm("push %[flags]; popf; " CALL_NOSPEC
- : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
+ : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags));
return rc;
}
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 80570eb3c89b..384da1fdd5c6 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -6,8 +6,10 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/ctype.h>
+
#include <asm/setup.h>
#include <asm/cmdline.h>
+#include <asm/bug.h>
static inline int myisspace(u8 c)
{
@@ -205,12 +207,18 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size,
int cmdline_find_option_bool(const char *cmdline, const char *option)
{
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOL))
+ WARN_ON_ONCE(!builtin_cmdline_added);
+
return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
}
int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
int bufsize)
{
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOL))
+ WARN_ON_ONCE(!builtin_cmdline_added);
+
return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
buffer, bufsize);
}
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index a1cb3a4e6742..a314622aa093 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -44,21 +44,23 @@
or %rdx, %rax
.else
cmp $TASK_SIZE_MAX-\size+1, %eax
-.if \size != 8
jae .Lbad_get_user
-.else
- jae .Lbad_get_user_8
-.endif
sbb %edx, %edx /* array_index_mask_nospec() */
and %edx, %eax
.endif
.endm
+.macro UACCESS op src dst
+1: \op \src,\dst
+ _ASM_EXTABLE_UA(1b, __get_user_handle_exception)
+.endm
+
+
.text
SYM_FUNC_START(__get_user_1)
check_range size=1
ASM_STAC
-1: movzbl (%_ASM_AX),%edx
+ UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
@@ -68,7 +70,7 @@ EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
check_range size=2
ASM_STAC
-2: movzwl (%_ASM_AX),%edx
+ UACCESS movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
@@ -78,7 +80,7 @@ EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
check_range size=4
ASM_STAC
-3: movl (%_ASM_AX),%edx
+ UACCESS movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
@@ -89,10 +91,11 @@ SYM_FUNC_START(__get_user_8)
check_range size=8
ASM_STAC
#ifdef CONFIG_X86_64
-4: movq (%_ASM_AX),%rdx
+ UACCESS movq (%_ASM_AX),%rdx
#else
-4: movl (%_ASM_AX),%edx
-5: movl 4(%_ASM_AX),%ecx
+ xor %ecx,%ecx
+ UACCESS movl (%_ASM_AX),%edx
+ UACCESS movl 4(%_ASM_AX),%ecx
#endif
xor %eax,%eax
ASM_CLAC
@@ -104,7 +107,7 @@ EXPORT_SYMBOL(__get_user_8)
SYM_FUNC_START(__get_user_nocheck_1)
ASM_STAC
ASM_BARRIER_NOSPEC
-6: movzbl (%_ASM_AX),%edx
+ UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
@@ -114,7 +117,7 @@ EXPORT_SYMBOL(__get_user_nocheck_1)
SYM_FUNC_START(__get_user_nocheck_2)
ASM_STAC
ASM_BARRIER_NOSPEC
-7: movzwl (%_ASM_AX),%edx
+ UACCESS movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
@@ -124,7 +127,7 @@ EXPORT_SYMBOL(__get_user_nocheck_2)
SYM_FUNC_START(__get_user_nocheck_4)
ASM_STAC
ASM_BARRIER_NOSPEC
-8: movl (%_ASM_AX),%edx
+ UACCESS movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
@@ -135,10 +138,11 @@ SYM_FUNC_START(__get_user_nocheck_8)
ASM_STAC
ASM_BARRIER_NOSPEC
#ifdef CONFIG_X86_64
-9: movq (%_ASM_AX),%rdx
+ UACCESS movq (%_ASM_AX),%rdx
#else
-9: movl (%_ASM_AX),%edx
-10: movl 4(%_ASM_AX),%ecx
+ xor %ecx,%ecx
+ UACCESS movl (%_ASM_AX),%edx
+ UACCESS movl 4(%_ASM_AX),%ecx
#endif
xor %eax,%eax
ASM_CLAC
@@ -154,36 +158,3 @@ SYM_CODE_START_LOCAL(__get_user_handle_exception)
mov $(-EFAULT),%_ASM_AX
RET
SYM_CODE_END(__get_user_handle_exception)
-
-#ifdef CONFIG_X86_32
-SYM_CODE_START_LOCAL(__get_user_8_handle_exception)
- ASM_CLAC
-.Lbad_get_user_8:
- xor %edx,%edx
- xor %ecx,%ecx
- mov $(-EFAULT),%_ASM_AX
- RET
-SYM_CODE_END(__get_user_8_handle_exception)
-#endif
-
-/* get_user */
- _ASM_EXTABLE_UA(1b, __get_user_handle_exception)
- _ASM_EXTABLE_UA(2b, __get_user_handle_exception)
- _ASM_EXTABLE_UA(3b, __get_user_handle_exception)
-#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, __get_user_handle_exception)
-#else
- _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception)
- _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception)
-#endif
-
-/* __get_user */
- _ASM_EXTABLE_UA(6b, __get_user_handle_exception)
- _ASM_EXTABLE_UA(7b, __get_user_handle_exception)
- _ASM_EXTABLE_UA(8b, __get_user_handle_exception)
-#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(9b, __get_user_handle_exception)
-#else
- _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception)
- _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception)
-#endif
diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c
index e0411a3774d4..5eecb45d05d5 100644
--- a/arch/x86/lib/iomem.c
+++ b/arch/x86/lib/iomem.c
@@ -25,6 +25,9 @@ static __always_inline void rep_movs(void *to, const void *from, size_t n)
static void string_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
{
+ const void *orig_to = to;
+ const size_t orig_n = n;
+
if (unlikely(!n))
return;
@@ -39,7 +42,7 @@ static void string_memcpy_fromio(void *to, const volatile void __iomem *from, si
}
rep_movs(to, (const void *)from, n);
/* KMSAN must treat values read from devices as initialized. */
- kmsan_unpoison_memory(to, n);
+ kmsan_unpoison_memory(orig_to, orig_n);
}
static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..c45127265f2f 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -4,6 +4,79 @@
* included by both the compressed kernel and the regular kernel.
*/
+static void free_pte(struct x86_mapping_info *info, pmd_t *pmd)
+{
+ pte_t *pte = pte_offset_kernel(pmd, 0);
+
+ info->free_pgt_page(pte, info->context);
+}
+
+static void free_pmd(struct x86_mapping_info *info, pud_t *pud)
+{
+ pmd_t *pmd = pmd_offset(pud, 0);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_present(pmd[i]))
+ continue;
+
+ if (pmd_leaf(pmd[i]))
+ continue;
+
+ free_pte(info, &pmd[i]);
+ }
+
+ info->free_pgt_page(pmd, info->context);
+}
+
+static void free_pud(struct x86_mapping_info *info, p4d_t *p4d)
+{
+ pud_t *pud = pud_offset(p4d, 0);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ if (!pud_present(pud[i]))
+ continue;
+
+ if (pud_leaf(pud[i]))
+ continue;
+
+ free_pmd(info, &pud[i]);
+ }
+
+ info->free_pgt_page(pud, info->context);
+}
+
+static void free_p4d(struct x86_mapping_info *info, pgd_t *pgd)
+{
+ p4d_t *p4d = p4d_offset(pgd, 0);
+ int i;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ if (!p4d_present(p4d[i]))
+ continue;
+
+ free_pud(info, &p4d[i]);
+ }
+
+ if (pgtable_l5_enabled())
+ info->free_pgt_page(p4d, info->context);
+}
+
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd)
+{
+ int i;
+
+ for (i = 0; i < PTRS_PER_PGD; i++) {
+ if (!pgd_present(pgd[i]))
+ continue;
+
+ free_p4d(info, &pgd[i]);
+ }
+
+ info->free_pgt_page(pgd, info->context);
+}
+
static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
unsigned long addr, unsigned long end)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7e177856ee4f..28002cc7a37d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -469,7 +469,9 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
- E820_TYPE_RESERVED_KERN))
+ E820_TYPE_RESERVED_KERN) &&
+ !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
+ E820_TYPE_ACPI))
set_pte_init(pte, __pte(0), init);
continue;
}
@@ -524,7 +526,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
- E820_TYPE_RESERVED_KERN))
+ E820_TYPE_RESERVED_KERN) &&
+ !e820__mapped_any(paddr & PMD_MASK, paddr_next,
+ E820_TYPE_ACPI))
set_pmd_init(pmd, __pmd(0), init);
continue;
}
@@ -611,7 +615,9 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
- E820_TYPE_RESERVED_KERN))
+ E820_TYPE_RESERVED_KERN) &&
+ !e820__mapped_any(paddr & PUD_MASK, paddr_next,
+ E820_TYPE_ACPI))
set_pud_init(pud, __pud(0), init);
continue;
}
@@ -698,7 +704,9 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
- E820_TYPE_RESERVED_KERN))
+ E820_TYPE_RESERVED_KERN) &&
+ !e820__mapped_any(paddr & P4D_MASK, paddr_next,
+ E820_TYPE_ACPI))
set_p4d_init(p4d, __p4d(0), init);
continue;
}
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 422602f6039b..86a476a426c2 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -2,7 +2,7 @@
/*
* AMD Memory Encryption Support
*
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
@@ -283,7 +283,7 @@ static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
#endif
}
-static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
{
/*
* To maintain the security guarantees of SEV-SNP guests, make sure
@@ -292,11 +292,11 @@ static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
snp_set_memory_shared(vaddr, npages);
- return true;
+ return 0;
}
/* Return true unconditionally: return value doesn't matter for the SEV side */
-static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
+static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
{
/*
* After memory is mapped encrypted in the page table, validate it
@@ -308,7 +308,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
- return true;
+ return 0;
}
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
@@ -510,6 +510,12 @@ void __init sme_early_init(void)
*/
x86_init.resources.dmi_setup = snp_dmi_setup;
}
+
+ /*
+ * Switch the SVSM CA mapping (if active) from identity mapped to
+ * kernel mapped.
+ */
+ snp_update_svsm_ca();
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 19fdfbb171ed..443a97e515c0 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -662,8 +662,9 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
/*
* Lookup the page table entry for a virtual address in a specific pgd.
- * Return a pointer to the entry, the level of the mapping, and the effective
- * NX and RW bits of all page table levels.
+ * Return a pointer to the entry (or NULL if the entry does not exist),
+ * the level of the entry, and the effective NX and RW bits of all
+ * page table levels.
*/
pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
unsigned int *level, bool *nx, bool *rw)
@@ -672,13 +673,14 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
pud_t *pud;
pmd_t *pmd;
- *level = PG_LEVEL_NONE;
+ *level = PG_LEVEL_256T;
*nx = false;
*rw = true;
if (pgd_none(*pgd))
return NULL;
+ *level = PG_LEVEL_512G;
*nx |= pgd_flags(*pgd) & _PAGE_NX;
*rw &= pgd_flags(*pgd) & _PAGE_RW;
@@ -686,10 +688,10 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
if (p4d_none(*p4d))
return NULL;
- *level = PG_LEVEL_512G;
if (p4d_leaf(*p4d) || !p4d_present(*p4d))
return (pte_t *)p4d;
+ *level = PG_LEVEL_1G;
*nx |= p4d_flags(*p4d) & _PAGE_NX;
*rw &= p4d_flags(*p4d) & _PAGE_RW;
@@ -697,10 +699,10 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
if (pud_none(*pud))
return NULL;
- *level = PG_LEVEL_1G;
if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;
+ *level = PG_LEVEL_2M;
*nx |= pud_flags(*pud) & _PAGE_NX;
*rw &= pud_flags(*pud) & _PAGE_RW;
@@ -708,15 +710,13 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
if (pmd_none(*pmd))
return NULL;
- *level = PG_LEVEL_2M;
if (pmd_leaf(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
+ *level = PG_LEVEL_4K;
*nx |= pmd_flags(*pmd) & _PAGE_NX;
*rw &= pmd_flags(*pmd) & _PAGE_RW;
- *level = PG_LEVEL_4K;
-
return pte_offset_kernel(pmd, address);
}
@@ -736,9 +736,8 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
*
- * Note: We return pud and pmd either when the entry is marked large
- * or when the present bit is not set. Otherwise we would return a
- * pointer to a nonexisting mapping.
+ * Note: the function returns p4d, pud or pmd either when the entry is marked
+ * large or when the present bit is not set. Otherwise it returns NULL.
*/
pte_t *lookup_address(unsigned long address, unsigned int *level)
{
@@ -2196,7 +2195,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
/* Notify hypervisor that we are about to set/clr encryption attribute. */
- if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
+ ret = x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
+ if (ret)
goto vmm_fail;
ret = __change_page_attr_set_clr(&cpa, 1);
@@ -2214,24 +2214,61 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
return ret;
/* Notify hypervisor that we have successfully set/clr encryption attribute. */
- if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
+ ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc);
+ if (ret)
goto vmm_fail;
return 0;
vmm_fail:
- WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s.\n",
- (void *)addr, numpages, enc ? "private" : "shared");
+ WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s: %d\n",
+ (void *)addr, numpages, enc ? "private" : "shared", ret);
+
+ return ret;
+}
+
+/*
+ * The lock serializes conversions between private and shared memory.
+ *
+ * It is taken for read on conversion. A write lock guarantees that no
+ * concurrent conversions are in progress.
+ */
+static DECLARE_RWSEM(mem_enc_lock);
+
+/*
+ * Stop new private<->shared conversions.
+ *
+ * Taking the exclusive mem_enc_lock waits for in-flight conversions to complete.
+ * The lock is not released to prevent new conversions from being started.
+ */
+bool set_memory_enc_stop_conversion(void)
+{
+ /*
+ * In a crash scenario, sleep is not allowed. Try to take the lock.
+ * Failure indicates that there is a race with the conversion.
+ */
+ if (oops_in_progress)
+ return down_write_trylock(&mem_enc_lock);
+
+ down_write(&mem_enc_lock);
- return -EIO;
+ return true;
}
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return __set_memory_enc_pgtable(addr, numpages, enc);
+ int ret = 0;
- return 0;
+ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
+ if (!down_read_trylock(&mem_enc_lock))
+ return -EBUSY;
+
+ ret = __set_memory_enc_pgtable(addr, numpages, enc);
+
+ up_read(&mem_enc_lock);
+ }
+
+ return ret;
}
int set_memory_encrypted(unsigned long addr, int numpages)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 8edd62206604..b433b1753016 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -216,7 +216,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
}
static const struct x86_cpu_id intel_mid_cpu_ids[] = {
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, NULL),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, NULL),
{}
};
@@ -233,9 +233,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
return 0;
ret = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
- if (ret < 0) {
+ if (ret) {
dev_warn(&dev->dev, "Failed to read interrupt line: %d\n", ret);
- return ret;
+ return pcibios_err_to_errno(ret);
}
id = x86_match_cpu(intel_mid_cpu_ids);
@@ -243,7 +243,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
model = id->model;
switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT_MID:
+ case VFM_MODEL(INTEL_ATOM_SILVERMONT_MID):
polarity_low = false;
/* Special treatment for IRQ0 */
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 652cd53e77f6..0f2fe524f60d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -38,10 +38,10 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
u8 gsi;
rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
- if (rc < 0) {
+ if (rc) {
dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
rc);
- return rc;
+ return pcibios_err_to_errno(rc);
}
/* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
pirq = gsi;
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 7be71c2cdc83..f83bbe0acd4a 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -22,6 +22,7 @@
#include <asm/mpspec_def.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
+#include <asm/cpu_device_id.h>
#include <asm/io_apic.h>
#include <asm/intel-mid.h>
#include <asm/io.h>
@@ -55,9 +56,8 @@ static void __init intel_mid_time_init(void)
static void intel_mid_arch_setup(void)
{
- switch (boot_cpu_data.x86_model) {
- case 0x3C:
- case 0x4A:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_ATOM_SILVERMONT_MID:
x86_platform.legacy.rtc = 1;
break;
default:
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index fdd49d70b437..c81cea208c2c 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -62,7 +62,7 @@ static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr)
fail_read:
dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
- return result;
+ return pcibios_err_to_errno(result);
}
static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr)
@@ -91,7 +91,7 @@ static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr)
fail_write:
dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
- return result;
+ return pcibios_err_to_errno(result);
}
int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 89df5d89d664..51655133eee3 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -9,6 +9,10 @@
#include <linux/cache.h>
#include <asm/syscall.h>
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long,
+ unsigned long, unsigned long,
+ unsigned long, unsigned long);
+
/*
* Below you can see, in terms of #define's, the differences between the x86-64
* and the UML syscall table.
@@ -22,15 +26,13 @@
#define sys_vm86 sys_ni_syscall
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+#define __SYSCALL_NORETURN __SYSCALL
#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_32.h>
+#undef __SYSCALL
-#undef __SYSCALL
#define __SYSCALL(nr, sym) sym,
-
-extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b0b4cfd2308c..943d414f2109 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -9,6 +9,10 @@
#include <linux/cache.h>
#include <asm/syscall.h>
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long,
+ unsigned long, unsigned long,
+ unsigned long, unsigned long);
+
/*
* Below you can see, in terms of #define's, the differences between the x86-64
* and the UML syscall table.
@@ -18,14 +22,13 @@
#define sys_iopl sys_ni_syscall
#define sys_ioperm sys_ni_syscall
+#define __SYSCALL_NORETURN __SYSCALL
+
#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
+#undef __SYSCALL
-#undef __SYSCALL
#define __SYSCALL(nr, sym) sym,
-
-extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
#include <asm/syscalls_64.h>
};
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 0ae10535c699..0ce17766c0e5 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -120,7 +120,7 @@ static __init void snp_enable(void *arg)
bool snp_probe_rmptable_info(void)
{
- u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end;
+ u64 rmp_sz, rmp_base, rmp_end;
rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
rdmsrl(MSR_AMD64_RMP_END, rmp_end);
@@ -137,28 +137,11 @@ bool snp_probe_rmptable_info(void)
rmp_sz = rmp_end - rmp_base + 1;
- /*
- * Calculate the amount the memory that must be reserved by the BIOS to
- * address the whole RAM, including the bookkeeping area. The RMP itself
- * must also be covered.
- */
- max_rmp_pfn = max_pfn;
- if (PHYS_PFN(rmp_end) > max_pfn)
- max_rmp_pfn = PHYS_PFN(rmp_end);
-
- calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
-
- if (calc_rmp_sz > rmp_sz) {
- pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
- calc_rmp_sz, rmp_sz);
- return false;
- }
-
probed_rmp_base = rmp_base;
probed_rmp_size = rmp_sz;
pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
- probed_rmp_base, probed_rmp_base + probed_rmp_size - 1);
+ rmp_base, rmp_end);
return true;
}
@@ -206,9 +189,8 @@ void __init snp_fixup_e820_tables(void)
*/
static int __init snp_rmptable_init(void)
{
+ u64 max_rmp_pfn, calc_rmp_sz, rmptable_size, rmp_end, val;
void *rmptable_start;
- u64 rmptable_size;
- u64 val;
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
@@ -219,10 +201,28 @@ static int __init snp_rmptable_init(void)
if (!probed_rmp_size)
goto nosnp;
+ rmp_end = probed_rmp_base + probed_rmp_size - 1;
+
+ /*
+ * Calculate the amount the memory that must be reserved by the BIOS to
+ * address the whole RAM, including the bookkeeping area. The RMP itself
+ * must also be covered.
+ */
+ max_rmp_pfn = max_pfn;
+ if (PFN_UP(rmp_end) > max_pfn)
+ max_rmp_pfn = PFN_UP(rmp_end);
+
+ calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
+ if (calc_rmp_sz > probed_rmp_size) {
+ pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+ calc_rmp_sz, probed_rmp_size);
+ goto nosnp;
+ }
+
rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
if (!rmptable_start) {
pr_err("Failed to map RMP table\n");
- return 1;
+ goto nosnp;
}
/*
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 49a1c6890b55..4e2b2e2ac9f9 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -33,7 +33,7 @@
#include <asm/msr.h>
#include <asm/cpufeature.h>
#include <asm/tdx.h>
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include "tdx.h"
@@ -1426,9 +1426,9 @@ static void __init check_tdx_erratum(void)
* private memory poisons that memory, and a subsequent read of
* that memory triggers #MC.
*/
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_EMERALDRAPIDS_X:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_SAPPHIRERAPIDS_X:
+ case INTEL_EMERALDRAPIDS_X:
setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
}
}
diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 08010dbf5e09..df275d554788 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h
@@ -19,7 +19,7 @@
struct task_struct;
-static inline struct task_struct *get_current(void)
+static __always_inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 326db1c1d5d8..e0dffcc43b9e 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -91,7 +91,7 @@ struct thread_info {
}
/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
+static __always_inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
__asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t"
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index defc67909a9c..d6d2b533a574 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -263,6 +263,9 @@ static const struct proc_ops simdisk_proc_ops = {
static int __init simdisk_setup(struct simdisk *dev, int which,
struct proc_dir_entry *procdir)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
char tmp[2] = { '0' + which, 0 };
int err;
@@ -271,7 +274,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
spin_lock_init(&dev->lock);
dev->users = 0;
- dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE);
+ dev->gd = blk_alloc_disk(&lim, NUMA_NO_NODE);
if (IS_ERR(dev->gd)) {
err = PTR_ERR(dev->gd);
goto out;