diff options
Diffstat (limited to 'arch')
243 files changed, 4909 insertions, 6105 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ee5115252aac..a867a7d967aa 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -34,6 +34,7 @@ config ARM select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 + select ARCH_NEED_CMPXCHG_1_EMU if CPU_V6 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE diff --git a/arch/arm/boot/dts/rockchip/rk3066a.dtsi b/arch/arm/boot/dts/rockchip/rk3066a.dtsi index a4e815cd6707..5e0750547ab5 100644 --- a/arch/arm/boot/dts/rockchip/rk3066a.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3066a.dtsi @@ -144,6 +144,7 @@ pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>; power-domains = <&power RK3066_PD_VIO>; rockchip,grf = <&grf>; + #sound-dai-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index 44667bdb4707..9beb64d30586 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -5,6 +5,7 @@ #include <linux/irqflags.h> #include <linux/prefetch.h> #include <asm/barrier.h> +#include <linux/cmpxchg-emu.h> #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) /* @@ -162,7 +163,11 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, prefetchw((const void *)ptr); switch (size) { -#ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */ +#ifdef CONFIG_CPU_V6 /* ARCH == ARMv6 */ + case 1: + oldval = cmpxchg_emu_u8((volatile u8 *)ptr, old, new); + break; +#else /* min ARCH > ARMv6 */ case 1: do { asm volatile("@ __cmpxchg1\n" diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89a77e3f51d2..aaae31b8c4a5 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -78,8 +78,6 @@ obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ - perf_event_v7.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_VDSO) += vdso.o diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c deleted file mode 100644 index d9fd53841591..000000000000 --- a/arch/arm/kernel/perf_event_v6.c +++ /dev/null @@ -1,448 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARMv6 Performance counter handling code. - * - * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) - -#include <asm/cputype.h> -#include <asm/irq_regs.h> - -#include <linux/of.h> -#include <linux/perf/arm_pmu.h> -#include <linux/platform_device.h> - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 0, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - - /* - * The ARM performance counters can count micro DTLB misses, micro ITLB - * misses and main TLB misses. There isn't an event for TLB misses, so - * use the micro misses here and if users want the main TLB misses they - * can use a raw counter. - */ - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u64 armv6pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -static void armv6pmu_enable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); -} - -static irqreturn_t -armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv6pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); -} - -static void armv6pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); -} - -static int -armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) - return ARMV6_COUNTER1; - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) - return ARMV6_COUNTER0; - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - clear_bit(event->hw.idx, cpuc->used_mask); -} - -static void armv6pmu_disable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); -} - -static int armv6_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv6_perf_map, - &armv6_perf_cache_map, 0xFF); -} - -static void armv6pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->handle_irq = armv6pmu_handle_irq; - cpu_pmu->enable = armv6pmu_enable_event; - cpu_pmu->disable = armv6pmu_disable_event; - cpu_pmu->read_counter = armv6pmu_read_counter; - cpu_pmu->write_counter = armv6pmu_write_counter; - cpu_pmu->get_event_idx = armv6pmu_get_event_idx; - cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; - cpu_pmu->start = armv6pmu_start; - cpu_pmu->stop = armv6pmu_stop; - cpu_pmu->map_event = armv6_map_event; - cpu_pmu->num_events = 3; -} - -static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1136"; - return 0; -} - -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1156"; - return 0; -} - -static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv6pmu_init(cpu_pmu); - cpu_pmu->name = "armv6_1176"; - return 0; -} - -static const struct of_device_id armv6_pmu_of_device_ids[] = { - {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, - { /* sentinel value */ } -}; - -static const struct pmu_probe_info armv6_pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - { /* sentinel value */ } -}; - -static int armv6_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, - armv6_pmu_probe_table); -} - -static struct platform_driver armv6_pmu_driver = { - .driver = { - .name = "armv6-pmu", - .of_match_table = armv6_pmu_of_device_ids, - }, - .probe = armv6_pmu_device_probe, -}; - -builtin_platform_driver(armv6_pmu_driver); -#endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c deleted file mode 100644 index a3322e2b3ea4..000000000000 --- a/arch/arm/kernel/perf_event_v7.c +++ /dev/null @@ -1,2005 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * ARMv7 support: Jean Pihet <jpihet@mvista.com> - * 2010 (c) MontaVista Software, LLC. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -#ifdef CONFIG_CPU_V7 - -#include <asm/cp15.h> -#include <asm/cputype.h> -#include <asm/irq_regs.h> -#include <asm/vfp.h> -#include "../vfp/vfpinstr.h" - -#include <linux/of.h> -#include <linux/perf/arm_pmu.h> -#include <linux/platform_device.h> - -/* - * Common ARMv7 event types - * - * Note: An implementation may not be able to count all of these events - * but the encodings are considered to be `reserved' in the case that - * they are not available. - */ -#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00 -#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01 -#define ARMV7_PERFCTR_ITLB_REFILL 0x02 -#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03 -#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04 -#define ARMV7_PERFCTR_DTLB_REFILL 0x05 -#define ARMV7_PERFCTR_MEM_READ 0x06 -#define ARMV7_PERFCTR_MEM_WRITE 0x07 -#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08 -#define ARMV7_PERFCTR_EXC_TAKEN 0x09 -#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A -#define ARMV7_PERFCTR_CID_WRITE 0x0B - -/* - * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all (taken) branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ -#define ARMV7_PERFCTR_PC_WRITE 0x0C -#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D -#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E -#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F -#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10 -#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11 -#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12 - -/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ -#define ARMV7_PERFCTR_MEM_ACCESS 0x13 -#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14 -#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15 -#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16 -#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17 -#define ARMV7_PERFCTR_L2_CACHE_WB 0x18 -#define ARMV7_PERFCTR_BUS_ACCESS 0x19 -#define ARMV7_PERFCTR_MEM_ERROR 0x1A -#define ARMV7_PERFCTR_INSTR_SPEC 0x1B -#define ARMV7_PERFCTR_TTBR_WRITE 0x1C -#define ARMV7_PERFCTR_BUS_CYCLES 0x1D - -#define ARMV7_PERFCTR_CPU_CYCLES 0xFF - -/* ARMv7 Cortex-A8 specific event types */ -#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43 -#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44 -#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50 -#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56 - -/* ARMv7 Cortex-A9 specific event types */ -#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68 -#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60 -#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66 - -/* ARMv7 Cortex-A5 specific event types */ -#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2 -#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3 - -/* ARMv7 Cortex-A15 specific event types */ -#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 -#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 -#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42 -#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43 - -#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C -#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D - -#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50 -#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 -#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52 -#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53 - -#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76 - -/* ARMv7 Cortex-A12 specific event types */ -#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 -#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 - -#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50 -#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 - -#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76 - -#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7 - -/* ARMv7 Krait specific event types */ -#define KRAIT_PMRESR0_GROUP0 0xcc -#define KRAIT_PMRESR1_GROUP0 0xd0 -#define KRAIT_PMRESR2_GROUP0 0xd4 -#define KRAIT_VPMRESR0_GROUP0 0xd8 - -#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011 -#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010 - -#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222 -#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210 - -/* ARMv7 Scorpion specific event types */ -#define SCORPION_LPM0_GROUP0 0x4c -#define SCORPION_LPM1_GROUP0 0x50 -#define SCORPION_LPM2_GROUP0 0x54 -#define SCORPION_L2LPM_GROUP0 0x58 -#define SCORPION_VLPM_GROUP0 0x5c - -#define SCORPION_ICACHE_ACCESS 0x10053 -#define SCORPION_ICACHE_MISS 0x10052 - -#define SCORPION_DTLB_ACCESS 0x12013 -#define SCORPION_DTLB_MISS 0x12012 - -#define SCORPION_ITLB_MISS 0x12021 - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A5 HW events mapping - */ -static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - /* - * The prefetch counters don't differentiate between the I side and the - * D side. - */ - [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, - [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A15 HW events mapping - */ -static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, - - /* - * Not all performance counters differentiate between read and write - * accesses/misses so we're not always strictly correct, but it's the - * best we can do. Writes and reads get combined in these cases. - */ - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A7 HW events mapping - */ -static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Cortex-A12 HW events mapping - */ -static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - /* - * Not all performance counters differentiate between read and write - * accesses/misses so we're not always strictly correct, but it's the - * best we can do. Writes and reads get combined in these cases. - */ - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Krait HW events mapping - */ -static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, - - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, - - [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* - * Scorpion HW events mapping - */ -static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - /* - * The performance counters don't differentiate between read and write - * accesses/misses so this isn't strictly correct, but it's the best we - * can do. Writes and reads get combined. - */ - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, - /* - * Only ITLB misses and DTLB refills are supported. If users want the - * DTLB refills misses a raw counter must be used. - */ - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -PMU_FORMAT_ATTR(event, "config:0-7"); - -static struct attribute *armv7_pmu_format_attrs[] = { - &format_attr_event.attr, - NULL, -}; - -static struct attribute_group armv7_pmu_format_attr_group = { - .name = "format", - .attrs = armv7_pmu_format_attrs, -}; - -#define ARMV7_EVENT_ATTR_RESOLVE(m) #m -#define ARMV7_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \ - "event=" ARMV7_EVENT_ATTR_RESOLVE(config)) - -ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR); -ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL); -ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL); -ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL); -ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS); -ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL); -ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ); -ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE); -ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED); -ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN); -ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED); -ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE); -ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE); -ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH); -ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN); -ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS); -ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED); -ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES); -ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED); - -static struct attribute *armv7_pmuv1_event_attrs[] = { - &armv7_event_attr_sw_incr.attr.attr, - &armv7_event_attr_l1i_cache_refill.attr.attr, - &armv7_event_attr_l1i_tlb_refill.attr.attr, - &armv7_event_attr_l1d_cache_refill.attr.attr, - &armv7_event_attr_l1d_cache.attr.attr, - &armv7_event_attr_l1d_tlb_refill.attr.attr, - &armv7_event_attr_ld_retired.attr.attr, - &armv7_event_attr_st_retired.attr.attr, - &armv7_event_attr_inst_retired.attr.attr, - &armv7_event_attr_exc_taken.attr.attr, - &armv7_event_attr_exc_return.attr.attr, - &armv7_event_attr_cid_write_retired.attr.attr, - &armv7_event_attr_pc_write_retired.attr.attr, - &armv7_event_attr_br_immed_retired.attr.attr, - &armv7_event_attr_br_return_retired.attr.attr, - &armv7_event_attr_unaligned_ldst_retired.attr.attr, - &armv7_event_attr_br_mis_pred.attr.attr, - &armv7_event_attr_cpu_cycles.attr.attr, - &armv7_event_attr_br_pred.attr.attr, - NULL, -}; - -static struct attribute_group armv7_pmuv1_events_attr_group = { - .name = "events", - .attrs = armv7_pmuv1_event_attrs, -}; - -ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); -ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); -ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); -ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS); -ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL); -ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB); -ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS); -ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR); -ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); -ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); -ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); - -static struct attribute *armv7_pmuv2_event_attrs[] = { - &armv7_event_attr_sw_incr.attr.attr, - &armv7_event_attr_l1i_cache_refill.attr.attr, - &armv7_event_attr_l1i_tlb_refill.attr.attr, - &armv7_event_attr_l1d_cache_refill.attr.attr, - &armv7_event_attr_l1d_cache.attr.attr, - &armv7_event_attr_l1d_tlb_refill.attr.attr, - &armv7_event_attr_ld_retired.attr.attr, - &armv7_event_attr_st_retired.attr.attr, - &armv7_event_attr_inst_retired.attr.attr, - &armv7_event_attr_exc_taken.attr.attr, - &armv7_event_attr_exc_return.attr.attr, - &armv7_event_attr_cid_write_retired.attr.attr, - &armv7_event_attr_pc_write_retired.attr.attr, - &armv7_event_attr_br_immed_retired.attr.attr, - &armv7_event_attr_br_return_retired.attr.attr, - &armv7_event_attr_unaligned_ldst_retired.attr.attr, - &armv7_event_attr_br_mis_pred.attr.attr, - &armv7_event_attr_cpu_cycles.attr.attr, - &armv7_event_attr_br_pred.attr.attr, - &armv7_event_attr_mem_access.attr.attr, - &armv7_event_attr_l1i_cache.attr.attr, - &armv7_event_attr_l1d_cache_wb.attr.attr, - &armv7_event_attr_l2d_cache.attr.attr, - &armv7_event_attr_l2d_cache_refill.attr.attr, - &armv7_event_attr_l2d_cache_wb.attr.attr, - &armv7_event_attr_bus_access.attr.attr, - &armv7_event_attr_memory_error.attr.attr, - &armv7_event_attr_inst_spec.attr.attr, - &armv7_event_attr_ttbr_write_retired.attr.attr, - &armv7_event_attr_bus_cycles.attr.attr, - NULL, -}; - -static struct attribute_group armv7_pmuv2_events_attr_group = { - .name = "events", - .attrs = armv7_pmuv2_event_attrs, -}; - -/* - * Perf Events' indices - */ -#define ARMV7_IDX_CYCLE_COUNTER 0 -#define ARMV7_IDX_COUNTER0 1 -#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) - -#define ARMV7_MAX_COUNTERS 32 -#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Perf Event to low level counters mapping - */ -#define ARMV7_IDX_TO_COUNTER(x) \ - (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK) - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -/* - * PMXEVTYPER: Event selection reg - */ -#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ -#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ - -/* - * Event filters for PMUv2 - */ -#define ARMV7_EXCLUDE_PL1 BIT(31) -#define ARMV7_EXCLUDE_USER BIT(30) -#define ARMV7_INCLUDE_HYP BIT(27) - -/* - * Secure debug enable reg - */ -#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */ - -static inline u32 armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(u32 val) -{ - val &= ARMV7_PMNC_MASK; - isb(); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(u32 pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV7_IDX_CYCLE_COUNTER && - idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); -} - -static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) -{ - return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); -} - -static inline void armv7_pmnc_select_counter(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); - isb(); -} - -static inline u64 armv7pmu_read_counter(struct perf_event *event) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - u32 value = 0; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - } else { - armv7_pmnc_select_counter(idx); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); - } - - return value; -} - -static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value)); - } else { - armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value)); - } -} - -static inline void armv7_pmnc_write_evtsel(int idx, u32 val) -{ - armv7_pmnc_select_counter(idx); - val &= ARMV7_EVTYPE_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); -} - -static inline void armv7_pmnc_enable_counter(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); -} - -static inline void armv7_pmnc_disable_counter(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); -} - -static inline void armv7_pmnc_enable_intens(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); -} - -static inline void armv7_pmnc_disable_intens(int idx) -{ - u32 counter = ARMV7_IDX_TO_COUNTER(idx); - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); - isb(); - /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); - isb(); -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) -{ - u32 val; - unsigned int cnt; - - pr_info("PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - pr_info("PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - pr_info("CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - pr_info("INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - pr_info("FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - pr_info("SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - pr_info("CCNT =0x%08x\n", val); - - for (cnt = ARMV7_IDX_COUNTER0; - cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - pr_info("CNT[%d] count =0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - pr_info("CNT[%d] evtsel=0x%08x\n", - ARMV7_IDX_TO_COUNTER(cnt), val); - } -} -#endif - -static void armv7pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - int idx = hwc->idx; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return; - } - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We only need to set the event for the cycle counter if we - * have the ability to perform event filtering. - */ - if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); -} - -static void armv7pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - int idx = hwc->idx; - - if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { - pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return; - } - - /* - * Disable counter and interrupt - */ - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); -} - -static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - u32 pmnc; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(struct arm_pmu *cpu_pmu) -{ - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); -} - -static void armv7pmu_stop(struct arm_pmu *cpu_pmu) -{ - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); -} - -static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; - - /* Always place a cycle counter into the cycle counter. */ - if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_IDX_CYCLE_COUNTER; - } - - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; -} - -static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - clear_bit(event->hw.idx, cpuc->used_mask); -} - -/* - * Add an event filter to a given event. This will only work for PMUv2 PMUs. - */ -static int armv7pmu_set_event_filter(struct hw_perf_event *event, - struct perf_event_attr *attr) -{ - unsigned long config_base = 0; - - if (attr->exclude_idle) { - pr_debug("ARM performance counters do not support mode exclusion\n"); - return -EOPNOTSUPP; - } - if (attr->exclude_user) - config_base |= ARMV7_EXCLUDE_USER; - if (attr->exclude_kernel) - config_base |= ARMV7_EXCLUDE_PL1; - if (!attr->exclude_hv) - config_base |= ARMV7_INCLUDE_HYP; - - /* - * Install the filter into config_base as this is used to - * construct the event type. - */ - event->config_base = config_base; - - return 0; -} - -static void armv7pmu_reset(void *info) -{ - struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events, val; - - if (cpu_pmu->secure_access) { - asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val)); - val |= ARMV7_SDER_SUNIDEN; - asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val)); - } - - /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_disable_counter(idx); - armv7_pmnc_disable_intens(idx); - } - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); -} - -static int armv7_a8_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a8_perf_map, - &armv7_a8_perf_cache_map, 0xFF); -} - -static int armv7_a9_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a9_perf_map, - &armv7_a9_perf_cache_map, 0xFF); -} - -static int armv7_a5_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a5_perf_map, - &armv7_a5_perf_cache_map, 0xFF); -} - -static int armv7_a15_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a15_perf_map, - &armv7_a15_perf_cache_map, 0xFF); -} - -static int armv7_a7_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a7_perf_map, - &armv7_a7_perf_cache_map, 0xFF); -} - -static int armv7_a12_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv7_a12_perf_map, - &armv7_a12_perf_cache_map, 0xFF); -} - -static int krait_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &krait_perf_map, - &krait_perf_cache_map, 0xFFFFF); -} - -static int krait_map_event_no_branch(struct perf_event *event) -{ - return armpmu_map_event(event, &krait_perf_map_no_branch, - &krait_perf_cache_map, 0xFFFFF); -} - -static int scorpion_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &scorpion_perf_map, - &scorpion_perf_cache_map, 0xFFFFF); -} - -static void armv7pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->handle_irq = armv7pmu_handle_irq; - cpu_pmu->enable = armv7pmu_enable_event; - cpu_pmu->disable = armv7pmu_disable_event; - cpu_pmu->read_counter = armv7pmu_read_counter; - cpu_pmu->write_counter = armv7pmu_write_counter; - cpu_pmu->get_event_idx = armv7pmu_get_event_idx; - cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; - cpu_pmu->start = armv7pmu_start; - cpu_pmu->stop = armv7pmu_stop; - cpu_pmu->reset = armv7pmu_reset; -}; - -static void armv7_read_num_pmnc_events(void *info) -{ - int *nb_cnt = info; - - /* Read the nb of CNTx counters supported from PMNC */ - *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter */ - *nb_cnt += 1; -} - -static int armv7_probe_num_events(struct arm_pmu *arm_pmu) -{ - return smp_call_function_any(&arm_pmu->supported_cpus, - armv7_read_num_pmnc_events, - &arm_pmu->num_events, 1); -} - -static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a8"; - cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv1_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a9"; - cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv1_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a5"; - cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv1_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a15"; - cpu_pmu->map_event = armv7_a15_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a7"; - cpu_pmu->map_event = armv7_a7_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a12"; - cpu_pmu->map_event = armv7_a12_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return armv7_probe_num_events(cpu_pmu); -} - -static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) -{ - int ret = armv7_a12_pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_cortex_a17"; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv7_pmuv2_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv7_pmu_format_attr_group; - return ret; -} - -/* - * Krait Performance Monitor Region Event Selection Register (PMRESRn) - * - * 31 30 24 16 8 0 - * +--------------------------------+ - * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0 - * +--------------------------------+ - * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1 - * +--------------------------------+ - * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2 - * +--------------------------------+ - * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ? - * +--------------------------------+ - * EN | G=3 | G=2 | G=1 | G=0 - * - * Event Encoding: - * - * hwc->config_base = 0xNRCCG - * - * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR) - * R = region register - * CC = class of events the group G is choosing from - * G = group or particular event - * - * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2 - * - * A region (R) corresponds to a piece of the CPU (execution unit, instruction - * unit, etc.) while the event code (CC) corresponds to a particular class of - * events (interrupts for example). An event code is broken down into - * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for - * example). - */ - -#define KRAIT_EVENT (1 << 16) -#define VENUM_EVENT (2 << 16) -#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) -#define PMRESRn_EN BIT(31) - -#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ -#define EVENT_GROUP(event) ((event) & 0xf) /* G */ -#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ -#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ -#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ - -static u32 krait_read_pmresrn(int n) -{ - u32 val; - - switch (n) { - case 0: - asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val)); - break; - case 1: - asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val)); - break; - case 2: - asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val)); - break; - default: - BUG(); /* Should be validated in krait_pmu_get_event_idx() */ - } - - return val; -} - -static void krait_write_pmresrn(int n, u32 val) -{ - switch (n) { - case 0: - asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val)); - break; - case 1: - asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val)); - break; - case 2: - asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val)); - break; - default: - BUG(); /* Should be validated in krait_pmu_get_event_idx() */ - } -} - -static u32 venum_read_pmresr(void) -{ - u32 val; - asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); - return val; -} - -static void venum_write_pmresr(u32 val) -{ - asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); -} - -static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) -{ - u32 venum_new_val; - u32 fp_new_val; - - BUG_ON(preemptible()); - /* CPACR Enable CP10 and CP11 access */ - *venum_orig_val = get_copro_access(); - venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11); - set_copro_access(venum_new_val); - - /* Enable FPEXC */ - *fp_orig_val = fmrx(FPEXC); - fp_new_val = *fp_orig_val | FPEXC_EN; - fmxr(FPEXC, fp_new_val); -} - -static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) -{ - BUG_ON(preemptible()); - /* Restore FPEXC */ - fmxr(FPEXC, fp_orig_val); - isb(); - /* Restore CPACR */ - set_copro_access(venum_orig_val); -} - -static u32 krait_get_pmresrn_event(unsigned int region) -{ - static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0, - KRAIT_PMRESR1_GROUP0, - KRAIT_PMRESR2_GROUP0 }; - return pmresrn_table[region]; -} - -static void krait_evt_setup(int idx, u32 config_base) -{ - u32 val; - u32 mask; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - unsigned int code = EVENT_CODE(config_base); - unsigned int group_shift; - bool venum_event = EVENT_VENUM(config_base); - - group_shift = group * 8; - mask = 0xff << group_shift; - - /* Configure evtsel for the region and group */ - if (venum_event) - val = KRAIT_VPMRESR0_GROUP0; - else - val = krait_get_pmresrn_event(region); - val += group; - /* Mix in mode-exclusion bits */ - val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); - armv7_pmnc_write_evtsel(idx, val); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = krait_read_pmresrn(region); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - krait_write_pmresrn(region, val); - } -} - -static u32 clear_pmresrn_group(u32 val, int group) -{ - u32 mask; - int group_shift; - - group_shift = group * 8; - mask = 0xff << group_shift; - val &= ~mask; - - /* Don't clear enable bit if entire region isn't disabled */ - if (val & ~PMRESRn_EN) - return val |= PMRESRn_EN; - - return 0; -} - -static void krait_clearpmu(u32 config_base) -{ - u32 val; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - bool venum_event = EVENT_VENUM(config_base); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val = clear_pmresrn_group(val, group); - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = krait_read_pmresrn(region); - val = clear_pmresrn_group(val, group); - krait_write_pmresrn(region, val); - } -} - -static void krait_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* Disable counter and interrupt */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Clear pmresr code (if destined for PMNx counters) - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - krait_clearpmu(hwc->config_base); - - /* Disable interrupt for this counter */ - armv7_pmnc_disable_intens(idx); -} - -static void krait_pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We set the event for the cycle counter because we - * have the ability to perform event filtering. - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - krait_evt_setup(idx, hwc->config_base); - else - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* Enable interrupt for this counter */ - armv7_pmnc_enable_intens(idx); - - /* Enable counter */ - armv7_pmnc_enable_counter(idx); -} - -static void krait_pmu_reset(void *info) -{ - u32 vval, fval; - struct arm_pmu *cpu_pmu = info; - u32 idx, nb_cnt = cpu_pmu->num_events; - - armv7pmu_reset(info); - - /* Clear all pmresrs */ - krait_write_pmresrn(0, 0); - krait_write_pmresrn(1, 0); - krait_write_pmresrn(2, 0); - - venum_pre_pmresr(&vval, &fval); - venum_write_pmresr(0); - venum_post_pmresr(vval, fval); - - /* Reset PMxEVNCTCR to sane default */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - } - -} - -static int krait_event_to_bit(struct perf_event *event, unsigned int region, - unsigned int group) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - - if (hwc->config_base & VENUM_EVENT) - bit = KRAIT_VPMRESR0_GROUP0; - else - bit = krait_get_pmresrn_event(region); - bit -= krait_get_pmresrn_event(0); - bit += group; - /* - * Lower bits are reserved for use by the counters (see - * armv7pmu_get_event_idx() for more info) - */ - bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; - - return bit; -} - -/* - * We check for column exclusion constraints here. - * Two events cant use the same group within a pmresr register. - */ -static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - int bit = -1; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int code = EVENT_CODE(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool krait_event = EVENT_CPU(hwc->config_base); - - if (venum_event || krait_event) { - /* Ignore invalid events */ - if (group > 3 || region > 2) - return -EINVAL; - if (venum_event && (code & 0xe0)) - return -EINVAL; - - bit = krait_event_to_bit(event, region, group); - if (test_and_set_bit(bit, cpuc->used_mask)) - return -EAGAIN; - } - - idx = armv7pmu_get_event_idx(cpuc, event); - if (idx < 0 && bit >= 0) - clear_bit(bit, cpuc->used_mask); - - return idx; -} - -static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool krait_event = EVENT_CPU(hwc->config_base); - - armv7pmu_clear_event_idx(cpuc, event); - if (venum_event || krait_event) { - bit = krait_event_to_bit(event, region, group); - clear_bit(bit, cpuc->used_mask); - } -} - -static int krait_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_krait"; - /* Some early versions of Krait don't support PC write events */ - if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, - "qcom,no-pc-write")) - cpu_pmu->map_event = krait_map_event_no_branch; - else - cpu_pmu->map_event = krait_map_event; - cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->reset = krait_pmu_reset; - cpu_pmu->enable = krait_pmu_enable_event; - cpu_pmu->disable = krait_pmu_disable_event; - cpu_pmu->get_event_idx = krait_pmu_get_event_idx; - cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; - return armv7_probe_num_events(cpu_pmu); -} - -/* - * Scorpion Local Performance Monitor Register (LPMn) - * - * 31 30 24 16 8 0 - * +--------------------------------+ - * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 - * +--------------------------------+ - * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 - * +--------------------------------+ - * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 - * +--------------------------------+ - * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 - * +--------------------------------+ - * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? - * +--------------------------------+ - * EN | G=3 | G=2 | G=1 | G=0 - * - * - * Event Encoding: - * - * hwc->config_base = 0xNRCCG - * - * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) - * R = region register - * CC = class of events the group G is choosing from - * G = group or particular event - * - * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 - * - * A region (R) corresponds to a piece of the CPU (execution unit, instruction - * unit, etc.) while the event code (CC) corresponds to a particular class of - * events (interrupts for example). An event code is broken down into - * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for - * example). - */ - -static u32 scorpion_read_pmresrn(int n) -{ - u32 val; - - switch (n) { - case 0: - asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); - break; - case 1: - asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); - break; - case 2: - asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); - break; - case 3: - asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); - break; - default: - BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ - } - - return val; -} - -static void scorpion_write_pmresrn(int n, u32 val) -{ - switch (n) { - case 0: - asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); - break; - case 1: - asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); - break; - case 2: - asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); - break; - case 3: - asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); - break; - default: - BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ - } -} - -static u32 scorpion_get_pmresrn_event(unsigned int region) -{ - static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, - SCORPION_LPM1_GROUP0, - SCORPION_LPM2_GROUP0, - SCORPION_L2LPM_GROUP0 }; - return pmresrn_table[region]; -} - -static void scorpion_evt_setup(int idx, u32 config_base) -{ - u32 val; - u32 mask; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - unsigned int code = EVENT_CODE(config_base); - unsigned int group_shift; - bool venum_event = EVENT_VENUM(config_base); - - group_shift = group * 8; - mask = 0xff << group_shift; - - /* Configure evtsel for the region and group */ - if (venum_event) - val = SCORPION_VLPM_GROUP0; - else - val = scorpion_get_pmresrn_event(region); - val += group; - /* Mix in mode-exclusion bits */ - val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); - armv7_pmnc_write_evtsel(idx, val); - - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = scorpion_read_pmresrn(region); - val &= ~mask; - val |= code << group_shift; - val |= PMRESRn_EN; - scorpion_write_pmresrn(region, val); - } -} - -static void scorpion_clearpmu(u32 config_base) -{ - u32 val; - u32 vval, fval; - unsigned int region = EVENT_REGION(config_base); - unsigned int group = EVENT_GROUP(config_base); - bool venum_event = EVENT_VENUM(config_base); - - if (venum_event) { - venum_pre_pmresr(&vval, &fval); - val = venum_read_pmresr(); - val = clear_pmresrn_group(val, group); - venum_write_pmresr(val); - venum_post_pmresr(vval, fval); - } else { - val = scorpion_read_pmresrn(region); - val = clear_pmresrn_group(val, group); - scorpion_write_pmresrn(region, val); - } -} - -static void scorpion_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* Disable counter and interrupt */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Clear pmresr code (if destined for PMNx counters) - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - scorpion_clearpmu(hwc->config_base); - - /* Disable interrupt for this counter */ - armv7_pmnc_disable_intens(idx); -} - -static void scorpion_pmu_enable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - - /* Disable counter */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't set the event for the cycle counter because we - * don't have the ability to perform event filtering. - */ - if (hwc->config_base & KRAIT_EVENT_MASK) - scorpion_evt_setup(idx, hwc->config_base); - else if (idx != ARMV7_IDX_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* Enable interrupt for this counter */ - armv7_pmnc_enable_intens(idx); - - /* Enable counter */ - armv7_pmnc_enable_counter(idx); -} - -static void scorpion_pmu_reset(void *info) -{ - u32 vval, fval; - struct arm_pmu *cpu_pmu = info; - u32 idx, nb_cnt = cpu_pmu->num_events; - - armv7pmu_reset(info); - - /* Clear all pmresrs */ - scorpion_write_pmresrn(0, 0); - scorpion_write_pmresrn(1, 0); - scorpion_write_pmresrn(2, 0); - scorpion_write_pmresrn(3, 0); - - venum_pre_pmresr(&vval, &fval); - venum_write_pmresr(0); - venum_post_pmresr(vval, fval); - - /* Reset PMxEVNCTCR to sane default */ - for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - } -} - -static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, - unsigned int group) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - - if (hwc->config_base & VENUM_EVENT) - bit = SCORPION_VLPM_GROUP0; - else - bit = scorpion_get_pmresrn_event(region); - bit -= scorpion_get_pmresrn_event(0); - bit += group; - /* - * Lower bits are reserved for use by the counters (see - * armv7pmu_get_event_idx() for more info) - */ - bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; - - return bit; -} - -/* - * We check for column exclusion constraints here. - * Two events cant use the same group within a pmresr register. - */ -static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - int bit = -1; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool scorpion_event = EVENT_CPU(hwc->config_base); - - if (venum_event || scorpion_event) { - /* Ignore invalid events */ - if (group > 3 || region > 3) - return -EINVAL; - - bit = scorpion_event_to_bit(event, region, group); - if (test_and_set_bit(bit, cpuc->used_mask)) - return -EAGAIN; - } - - idx = armv7pmu_get_event_idx(cpuc, event); - if (idx < 0 && bit >= 0) - clear_bit(bit, cpuc->used_mask); - - return idx; -} - -static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int bit; - struct hw_perf_event *hwc = &event->hw; - unsigned int region = EVENT_REGION(hwc->config_base); - unsigned int group = EVENT_GROUP(hwc->config_base); - bool venum_event = EVENT_VENUM(hwc->config_base); - bool scorpion_event = EVENT_CPU(hwc->config_base); - - armv7pmu_clear_event_idx(cpuc, event); - if (venum_event || scorpion_event) { - bit = scorpion_event_to_bit(event, region, group); - clear_bit(bit, cpuc->used_mask); - } -} - -static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_scorpion"; - cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->reset = scorpion_pmu_reset; - cpu_pmu->enable = scorpion_pmu_enable_event; - cpu_pmu->disable = scorpion_pmu_disable_event; - cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; - cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return armv7_probe_num_events(cpu_pmu); -} - -static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv7pmu_init(cpu_pmu); - cpu_pmu->name = "armv7_scorpion_mp"; - cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->reset = scorpion_pmu_reset; - cpu_pmu->enable = scorpion_pmu_enable_event; - cpu_pmu->disable = scorpion_pmu_disable_event; - cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; - cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return armv7_probe_num_events(cpu_pmu); -} - -static const struct of_device_id armv7_pmu_of_device_ids[] = { - {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, - {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, - {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, - {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, - {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, - {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, - {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, - {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, - {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, - {}, -}; - -static const struct pmu_probe_info armv7_pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - { /* sentinel value */ } -}; - - -static int armv7_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, - armv7_pmu_probe_table); -} - -static struct platform_driver armv7_pmu_driver = { - .driver = { - .name = "armv7-pmu", - .of_match_table = armv7_pmu_of_device_ids, - .suppress_bind_attrs = true, - }, - .probe = armv7_pmu_device_probe, -}; - -builtin_platform_driver(armv7_pmu_driver); -#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c deleted file mode 100644 index 7a2ba1c689a7..000000000000 --- a/arch/arm/kernel/perf_event_xscale.c +++ /dev/null @@ -1,748 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> - * - * Based on the previous xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -#ifdef CONFIG_CPU_XSCALE - -#include <asm/cputype.h> -#include <asm/irq_regs.h> - -#include <linux/of.h> -#include <linux/perf/arm_pmu.h> -#include <linux/platform_device.h> - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 0, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!event) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void xscale1pmu_enable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); -} - -static void xscale1pmu_disable_event(struct perf_event *event) -{ - unsigned long val, mask, evt; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); -} - -static int -xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - if (XSCALE_PERFCTR_CCNT == hwc->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) - return XSCALE_COUNTER1; - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) - return XSCALE_COUNTER0; - - return -EAGAIN; - } -} - -static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - clear_bit(event->hw.idx, cpuc->used_mask); -} - -static void xscale1pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); -} - -static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); -} - -static inline u64 xscale1pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static int xscale_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &xscale_perf_map, - &xscale_perf_cache_map, 0xFF); -} - -static int xscale1pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->name = "armv5_xscale1"; - cpu_pmu->handle_irq = xscale1pmu_handle_irq; - cpu_pmu->enable = xscale1pmu_enable_event; - cpu_pmu->disable = xscale1pmu_disable_event; - cpu_pmu->read_counter = xscale1pmu_read_counter; - cpu_pmu->write_counter = xscale1pmu_write_counter; - cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; - cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; - cpu_pmu->start = xscale1pmu_start; - cpu_pmu->stop = xscale1pmu_stop; - cpu_pmu->map_event = xscale_map_event; - cpu_pmu->num_events = 3; - - return 0; -} - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!event) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void xscale2pmu_enable_event(struct perf_event *event) -{ - unsigned long ien, evtsel; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); -} - -static void xscale2pmu_disable_event(struct perf_event *event) -{ - unsigned long ien, evtsel, of_flags; - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - of_flags = XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - of_flags = XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - of_flags = XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - of_flags = XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - of_flags = XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - xscale2pmu_write_overflow_flags(of_flags); -} - -static int -xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void xscale2pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); -} - -static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long val; - - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); -} - -static inline u64 xscale2pmu_read_counter(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) -{ - struct hw_perf_event *hwc = &event->hw; - int counter = hwc->idx; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static int xscale2pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->name = "armv5_xscale2"; - cpu_pmu->handle_irq = xscale2pmu_handle_irq; - cpu_pmu->enable = xscale2pmu_enable_event; - cpu_pmu->disable = xscale2pmu_disable_event; - cpu_pmu->read_counter = xscale2pmu_read_counter; - cpu_pmu->write_counter = xscale2pmu_write_counter; - cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; - cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; - cpu_pmu->start = xscale2pmu_start; - cpu_pmu->stop = xscale2pmu_stop; - cpu_pmu->map_event = xscale_map_event; - cpu_pmu->num_events = 5; - - return 0; -} - -static const struct pmu_probe_info xscale_pmu_probe_table[] = { - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), - { /* sentinel value */ } -}; - -static int xscale_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); -} - -static struct platform_driver xscale_pmu_driver = { - .driver = { - .name = "xscale-pmu", - }, - .probe = xscale_pmu_device_probe, -}; - -builtin_platform_driver(xscale_pmu_driver); -#endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c index 8aa39db095d7..2c5155bd376b 100644 --- a/arch/arm/mach-davinci/pm.c +++ b/arch/arm/mach-davinci/pm.c @@ -61,7 +61,7 @@ static void davinci_pm_suspend(void) /* Configure sleep count in deep sleep register */ val = __raw_readl(pm_config.deepsleep_reg); - val &= ~DEEPSLEEP_SLEEPCOUNT_MASK, + val &= ~DEEPSLEEP_SLEEPCOUNT_MASK; val |= pm_config.sleepcount; __raw_writel(val, pm_config.deepsleep_reg); diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 2ed7d229c8f9..23c98203c40f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # Linux system call numbers and entry vectors # diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d91259ee7b5..79a656a62cbc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -5,6 +5,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if (ACPI && PCI) @@ -381,7 +382,7 @@ config BROKEN_GAS_INST config BUILTIN_RETURN_ADDRESS_STRIPS_PAC bool - # Clang's __builtin_return_adddress() strips the PAC since 12.0.0 + # Clang's __builtin_return_address() strips the PAC since 12.0.0 # https://github.com/llvm/llvm-project/commit/2a96f47c5ffca84cd774ad402cacd137f4bf45e2 default y if CC_IS_CLANG # GCC's __builtin_return_address() strips the PAC since 11.1.0, @@ -1067,34 +1068,21 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. -config ARM64_WORKAROUND_SPECULATIVE_SSBS - bool - config ARM64_ERRATUM_3194386 - bool "Cortex-X4: 3194386: workaround for MSR SSBS not self-synchronizing" - select ARM64_WORKAROUND_SPECULATIVE_SSBS + bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" default y help - This option adds the workaround for ARM Cortex-X4 erratum 3194386. + This option adds the workaround for the following errata: - On affected cores "MSR SSBS, #0" instructions may not affect - subsequent speculative instructions, which may permit unexepected - speculative store bypassing. - - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. - - If unsure, say Y. - -config ARM64_ERRATUM_3312417 - bool "Neoverse-V3: 3312417: workaround for MSR SSBS not self-synchronizing" - select ARM64_WORKAROUND_SPECULATIVE_SSBS - default y - help - This option adds the workaround for ARM Neoverse-V3 erratum 3312417. + * ARM Cortex-A710 erratam 3324338 + * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-X2 erratum 3324338 + * ARM Cortex-X3 erratum 3324335 + * ARM Cortex-X4 erratum 3194386 + * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse V2 erratum 3324336 + * ARM Neoverse-V3 erratum 3312417 On affected cores "MSR SSBS, #0" instructions may not affect subsequent speculative instructions, which may permit unexepected @@ -1108,7 +1096,6 @@ config ARM64_ERRATUM_3312417 If unsure, say Y. - config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -1649,6 +1636,7 @@ config RODATA_FULL_DEFAULT_ENABLED config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + depends on !KCSAN help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index a52618073de2..3738a9fc2d6c 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -312,6 +312,8 @@ config ARCH_STM32 select STM32_EXTI select ARM_SMC_MBOX select ARM_SCMI_PROTOCOL + select REGULATOR + select REGULATOR_ARM_SCMI select COMMON_CLK_SCMI select STM32_FIREWALL help diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts index c204dd43c726..ce90327e1b2e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts @@ -191,7 +191,7 @@ compatible = "x-powers,axp803"; reg = <0x3a3>; interrupt-parent = <&r_intc>; - interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_LOW>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_LOW>; x-powers,drive-vbus-en; vin1-supply = <®_vcc5v>; diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi index 31329cbe0976..642ca8f0236b 100644 --- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi +++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi @@ -1581,9 +1581,23 @@ system-cache-controller@19200000 { compatible = "qcom,qdu1000-llcc"; - reg = <0 0x19200000 0 0xd80000>, + reg = <0 0x19200000 0 0x80000>, + <0 0x19300000 0 0x80000>, + <0 0x19600000 0 0x80000>, + <0 0x19700000 0 0x80000>, + <0 0x19a00000 0 0x80000>, + <0 0x19b00000 0 0x80000>, + <0 0x19e00000 0 0x80000>, + <0 0x19f00000 0 0x80000>, <0 0x1a200000 0 0x80000>; reg-names = "llcc0_base", + "llcc1_base", + "llcc2_base", + "llcc3_base", + "llcc4_base", + "llcc5_base", + "llcc6_base", + "llcc7_base", "llcc_broadcast_base"; interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index a2627ab4db9a..b98b2f7752b5 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -977,8 +977,7 @@ reset-n-pins { pins = "gpio99"; function = "gpio"; - output-high; - drive-strength = <16>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 40da95b202da..b27143f81867 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -676,15 +676,16 @@ status = "okay"; - /* FIXME: verify */ touchscreen@10 { - compatible = "hid-over-i2c"; + compatible = "elan,ekth5015m", "elan,ekth6915"; reg = <0x10>; - hid-descr-addr = <0x1>; interrupts-extended = <&tlmm 175 IRQ_TYPE_LEVEL_LOW>; - vdd-supply = <&vreg_misc_3p3>; - vddl-supply = <&vreg_s10b>; + reset-gpios = <&tlmm 99 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; + no-reset-on-power-off; + + vcc33-supply = <&vreg_misc_3p3>; + vccio-supply = <&vreg_misc_3p3>; pinctrl-names = "default"; pinctrl-0 = <&ts0_default>; @@ -1619,8 +1620,8 @@ reset-n-pins { pins = "gpio99"; function = "gpio"; - output-high; - drive-strength = <16>; + drive-strength = <2>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index aec6ca5941c2..e374733f3b85 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1092,6 +1092,7 @@ power-domains = <&rpmpd SM6115_VDDCX>; operating-points-v2 = <&sdhc1_opp_table>; + iommus = <&apps_smmu 0x00c0 0x0>; interconnects = <&system_noc MASTER_SDCC_1 RPM_ALWAYS_TAG &bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>, <&bimc MASTER_AMPSS_M0 RPM_ALWAYS_TAG diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 96f51850644b..6152bcd0bc1f 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -200,7 +200,7 @@ }; codec { - sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>; + sound-dai = <&wcd938x 1>, <&swr2 1>, <&lpass_txmacro 0>; }; platform { @@ -873,7 +873,7 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - qcom,tx-port-mapping = <1 1 2 3>; + qcom,tx-port-mapping = <2 2 3 4>; }; }; diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi index b7a3e6caa386..b34855956ae0 100644 --- a/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi +++ b/arch/arm64/boot/dts/renesas/rz-smarc-common.dtsi @@ -54,14 +54,6 @@ }; }; - usb0_vbus_otg: regulator-usb0-vbus-otg { - compatible = "regulator-fixed"; - - regulator-name = "USB0_VBUS_OTG"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - }; - vccq_sdhi1: regulator-vccq-sdhi1 { compatible = "regulator-gpio"; regulator-name = "SDHI1 VccQ"; @@ -139,6 +131,9 @@ &phyrst { status = "okay"; + usb0_vbus_otg: regulator-vbus { + regulator-name = "vbus"; + }; }; &scif0 { diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts index 0b973c1a5ef9..62d18ca769a1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts @@ -5,6 +5,8 @@ */ /dts-v1/; + +#include <dt-bindings/leds/common.h> #include "rk3308.dtsi" / { @@ -25,17 +27,21 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 = <&green_led_gio>, <&heartbeat_led_gpio>; + pinctrl-0 = <&green_led>, <&heartbeat_led>; green-led { + color = <LED_COLOR_ID_GREEN>; default-state = "on"; + function = LED_FUNCTION_POWER; gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; label = "rockpis:green:power"; linux,default-trigger = "default-on"; }; blue-led { + color = <LED_COLOR_ID_BLUE>; default-state = "on"; + function = LED_FUNCTION_HEARTBEAT; gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>; label = "rockpis:blue:user"; linux,default-trigger = "heartbeat"; @@ -127,10 +133,12 @@ }; &emmc { - bus-width = <4>; cap-mmc-highspeed; - mmc-hs200-1_8v; + cap-sd-highspeed; + no-sdio; non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; vmmc-supply = <&vcc_io>; status = "okay"; }; @@ -259,11 +267,11 @@ }; leds { - green_led_gio: green-led-gpio { + green_led: green-led { rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>; }; - heartbeat_led_gpio: heartbeat-led-gpio { + heartbeat_led: heartbeat-led { rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index 9996e022fef8..31c25de2d689 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -840,7 +840,7 @@ clocks = <&cru SCLK_I2S2_8CH_TX_OUT>, <&cru SCLK_I2S2_8CH_RX_OUT>, <&cru PCLK_ACODEC>; - reset-names = "codec-reset"; + reset-names = "codec"; resets = <&cru SRST_ACODEC_P>; #sound-dai-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts index f09d60bbe6c4..a608a219543e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts @@ -241,8 +241,8 @@ rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; - interrupt-parent = <&gpio2>; - interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&gpio0>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <1>; clock-output-names = "xin32k", "rk805-clkout2"; gpio-controller; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index 734f87db4d11..73618df7a889 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -793,6 +793,7 @@ dma-names = "tx"; pinctrl-names = "default"; pinctrl-0 = <&spdif_tx>; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -804,6 +805,7 @@ clocks = <&cru SCLK_I2S_2CH>, <&cru HCLK_I2S_2CH>; dmas = <&dmac_bus 6>, <&dmac_bus 7>; dma-names = "tx", "rx"; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -817,6 +819,7 @@ dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&i2s_8ch_bus>; + #sound-dai-cells = <0>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 789fd0dcc88b..3cd63d1e8f15 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -450,7 +450,7 @@ ap_i2c_audio: &i2c8 { dlg,btn-cfg = <50>; dlg,mic-det-thr = <500>; dlg,jack-ins-deb = <20>; - dlg,jack-det-rate = "32ms_64ms"; + dlg,jack-det-rate = "32_64"; dlg,jack-rem-deb = <1>; dlg,a-d-btn-thr = <0xa>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index a9dd60464b3f..13e599a85eb8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -289,7 +289,7 @@ regulator-name = "vdd_gpu"; regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; regulator-ramp-delay = <6001>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index 1a604429fb26..e74871491ef5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -444,6 +444,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index baeb08d665c7..e4a20cda65ed 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -435,6 +435,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index d0ba3bf16fc6..966bbc582d89 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -420,6 +420,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; sd-uhs-sdr104; vmmc-supply = <&vcc_3v3_s3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi index aebe1fedd2d8..615094bb8ba3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi @@ -344,6 +344,11 @@ }; }; +&pwm0 { + pinctrl-0 = <&pwm0m1_pins>; + pinctrl-names = "default"; +}; + &saradc { vref-supply = <&vcc_1v8_s0>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index 3b2ec1d0c542..074c316a9a69 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -288,9 +288,9 @@ pinctrl-0 = <&i2c7m0_xfer>; status = "okay"; - es8316: audio-codec@11 { + es8316: audio-codec@10 { compatible = "everest,es8316"; - reg = <0x11>; + reg = <0x10>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts index 4cc1790ebd08..03ed48246d36 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts @@ -366,6 +366,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; @@ -406,6 +407,7 @@ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 57a9abe78ee4..2c7bf4da0b80 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1036,6 +1036,7 @@ CONFIG_SND_AUDIO_GRAPH_CARD2=m CONFIG_HID_MULTITOUCH=m CONFIG_I2C_HID_ACPI=m CONFIG_I2C_HID_OF=m +CONFIG_I2C_HID_OF_ELAN=m CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 6792a1f83f2a..a407f9cd549e 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -119,6 +119,18 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (acpi_cpu_get_madt_gicc(cpu) && + uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 5f172611654b..9e96f024b2f1 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -175,21 +175,6 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF | - GIC_PRIO_PSR_I_SET)); - BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); - /* - * Need to make sure IRQON allows IRQs when SCR_EL3.FIQ is cleared - * and non-secure PMR accesses are not subject to the shifts that - * are applied to IRQ priorities - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); - /* - * Same situation as above, but now we make sure that we can mask - * regular interrupts. - */ - BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS | - GIC_PRIO_PSR_I_SET)); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 934c658ee947..f5794d50f51d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -15,7 +15,7 @@ #include <linux/bug.h> #include <linux/init.h> #include <linux/jump_label.h> -#include <linux/smp.h> +#include <linux/percpu.h> #include <linux/types.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index c27404fa4418..a4697a0b6835 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -6,7 +6,7 @@ #ifndef __ASM_PMUV3_H #define __ASM_PMUV3_H -#include <linux/kvm_host.h> +#include <asm/kvm_host.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 980d1dd8e1a3..b8a5861dc7b7 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -112,6 +112,9 @@ #define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) +#define _ASM_EXTABLE_KACCESS(insn, fixup) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ __ASM_EXTABLE_RAW(#insn, #fixup, \ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 7529c0263933..a6e5b07b64fd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -59,7 +59,7 @@ cpucap_is_possible(const unsigned int cap) case ARM64_WORKAROUND_REPEAT_TLBI: return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI); case ARM64_WORKAROUND_SPECULATIVE_SSBS: - return IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS); + return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386); } return true; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8b904a757bd3..558434267271 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -588,14 +588,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT); - return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL1_AARCH32; } static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT); - return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT; + return val == ID_AA64PFR0_EL1_EL0_AARCH32; } static inline bool id_aa64pfr0_sve(u64 pfr0) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7b32b99023a2..1cb0704c6163 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,9 +86,12 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +165,12 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 7abf09df7033..3f482500f71f 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -121,6 +121,14 @@ #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) + /* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV_SHIFT (24) #define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT) @@ -388,20 +396,33 @@ static inline bool esr_is_data_abort(unsigned long esr) static inline bool esr_fsc_is_translation_fault(unsigned long esr) { - /* Translation fault, level -1 */ - if ((esr & ESR_ELx_FSC) == 0b101011) - return true; - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); } static inline bool esr_fsc_is_permission_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); } static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) { - return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); } /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index c768d16b81a4..bd19f4c758b7 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -72,11 +72,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr = read_sysreg(tcr_el1); - if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz) + if ((tcr & TCR_T0SZ_MASK) == t0sz) return; tcr &= ~TCR_T0SZ_MASK; - tcr |= t0sz << TCR_T0SZ_OFFSET; + tcr |= t0sz; write_sysreg(tcr, tcr_el1); isb(); } diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 91fbd5c8a391..0f84518632b4 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -182,7 +182,7 @@ void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; mte_check_tfsr_el1(); @@ -190,7 +190,7 @@ static inline void mte_check_tfsr_entry(void) static inline void mte_check_tfsr_exit(void) { - if (!system_supports_mte()) + if (!kasan_hw_tags_enabled()) return; /* diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 9943ff0af4c9..1f60aa1bc750 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -170,6 +170,7 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55))) #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 47ec58031f11..0abe975d68a8 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,35 +21,12 @@ #define INIT_PSTATE_EL2 \ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h) -/* - * PMR values used to mask/unmask interrupts. - * - * GIC priority masking works as follows: if an IRQ's priority is a higher value - * than the value held in PMR, that IRQ is masked. Lowering the value of PMR - * means masking more IRQs (or at least that the same IRQs remain masked). - * - * To mask interrupts, we clear the most significant bit of PMR. - * - * Some code sections either automatically switch back to PSR.I or explicitly - * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included - * in the priority mask, it indicates that PSR.I should be set and - * interrupt disabling temporarily does not rely on IRQ priorities. - */ -#define GIC_PRIO_IRQON 0xe0 -#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) -#define __GIC_PRIO_IRQOFF_NS 0xa0 -#define GIC_PRIO_PSR_I_SET (1 << 4) - -#define GIC_PRIO_IRQOFF \ - ({ \ - extern struct static_key_false gic_nonsecure_priorities;\ - u8 __prio = __GIC_PRIO_IRQOFF; \ - \ - if (static_branch_unlikely(&gic_nonsecure_priorities)) \ - __prio = __GIC_PRIO_IRQOFF_NS; \ - \ - __prio; \ - }) +#include <linux/irqchip/arm-gic-v3-prio.h> + +#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED +#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ + +#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h new file mode 100644 index 000000000000..be5915669d23 --- /dev/null +++ b/arch/arm64/include/asm/runtime-const.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#include <asm/cacheflush.h> + +/* Sigh. You can still run arm64 in BE mode */ +#include <asm/byteorder.h> + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("1:\t" \ + "movz %0, #0xcdef\n\t" \ + "movk %0, #0x89ab, lsl #16\n\t" \ + "movk %0, #0x4567, lsl #32\n\t" \ + "movk %0, #0x0123, lsl #48\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret)); \ + __ret; }) + +#define runtime_const_shift_right_32(val, sym) ({ \ + unsigned long __ret; \ + asm_inline("1:\t" \ + "lsr %w0,%w1,#12\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"r" (0u+(val))); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */ +static inline void __runtime_fixup_16(__le32 *p, unsigned int val) +{ + u32 insn = le32_to_cpu(*p); + insn &= 0xffe0001f; + insn |= (val & 0xffff) << 5; + *p = cpu_to_le32(insn); +} + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + unsigned long va = (unsigned long)where; + caches_clean_inval_pou(va, va + 4*insns); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + __runtime_fixup_16(p, val); + __runtime_fixup_16(p+1, val >> 16); + __runtime_fixup_16(p+2, val >> 32); + __runtime_fixup_16(p+3, val >> 48); + __runtime_fixup_caches(where, 4); +} + +/* Immediate value is 6 bits starting at bit #16 */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + u32 insn = le32_to_cpu(*p); + insn &= 0xffc0ffff; + insn |= (val & 63) << 16; + *p = cpu_to_le32(insn); + __runtime_fixup_caches(where, 1); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index efb13112b408..2510eec026f7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -25,22 +25,11 @@ #ifndef __ASSEMBLY__ -#include <asm/percpu.h> - #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/thread_info.h> -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); - -/* - * We don't use this_cpu_read(cpu_number) as that has implicit writes to - * preempt_count, and associated (compiler) barriers, that we'd like to avoid - * the expense of. If we're preemptible, the value can be stale at use anyway. - * And we can't use this_cpu_ptr() either, as that winds up recursing back - * here under CONFIG_DEBUG_PREEMPT=y. - */ -#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) +#define raw_smp_processor_id() (current_thread_info()->cpu) /* * Logical CPU mapping. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index af3b206fa423..1b6e436dbb55 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -872,10 +872,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64pfr0 */ -#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 -#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 14be5000c5a0..28f665e0975a 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -184,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err, type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_mem_asm(load, reg, x, addr, label, type) \ + asm_goto_output( \ + "1: " load " " reg "0, [%1]\n" \ + _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + : "=r" (x) \ + : "r" (addr) : : label) +#else +#define __get_mem_asm(load, reg, x, addr, label, type) do { \ + int __gma_err = 0; \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ - : "+r" (err), "=r" (x) \ - : "r" (addr)) + : "+r" (__gma_err), "=r" (x) \ + : "r" (addr)); \ + if (__gma_err) goto label; } while (0) +#endif -#define __raw_get_mem(ldr, x, ptr, err, type) \ +#define __raw_get_mem(ldr, x, ptr, label, type) \ do { \ unsigned long __gu_val; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \ break; \ case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \ break; \ case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \ break; \ case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -219,27 +230,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_get_user(x, ptr, err) \ +#define __raw_get_user(x, ptr, label) \ do { \ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ - \ - uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ - \ - (x) = __rgu_val; \ + do { \ + __label__ __rgu_failed; \ + uaccess_ttbr0_enable(); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \ + uaccess_ttbr0_disable(); \ + (x) = __rgu_val; \ + break; \ + __rgu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __get_user_error(x, ptr, err) \ do { \ + __label__ __gu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_get_user((x), __p, (err)); \ + __raw_get_user((x), __p, __gu_failed); \ } else { \ + __gu_failed: \ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \ } \ } while (0) @@ -262,40 +280,42 @@ do { \ do { \ __typeof__(dst) __gkn_dst = (dst); \ __typeof__(src) __gkn_src = (src); \ - int __gkn_err = 0; \ - \ - __mte_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err, K); \ - __mte_disable_tco_async(); \ + do { \ + __label__ __gkn_label; \ \ - if (unlikely(__gkn_err)) \ + __mte_enable_tco_async(); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_label, K); \ + __mte_disable_tco_async(); \ + break; \ + __gkn_label: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err, type) \ - asm volatile( \ - "1: " store " " reg "1, [%2]\n" \ +#define __put_mem_asm(store, reg, x, addr, label, type) \ + asm goto( \ + "1: " store " " reg "0, [%1]\n" \ "2:\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ - : "+r" (err) \ - : "rZ" (x), "r" (addr)) + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : : "rZ" (x), "r" (addr) : : label) -#define __raw_put_mem(str, x, ptr, err, type) \ +#define __raw_put_mem(str, x, ptr, label, type) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \ break; \ case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \ break; \ case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \ break; \ case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -307,25 +327,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_put_user(x, ptr, err) \ +#define __raw_put_user(x, ptr, label) \ do { \ + __label__ __rpu_failed; \ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ __typeof__(*(ptr)) __rpu_val = (x); \ __chk_user_ptr(__rpu_ptr); \ \ - uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ + do { \ + uaccess_ttbr0_enable(); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \ + uaccess_ttbr0_disable(); \ + break; \ + __rpu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0) #define __put_user_error(x, ptr, err) \ do { \ + __label__ __pu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_put_user((x), __p, (err)); \ + __raw_put_user((x), __p, __pu_failed); \ } else { \ + __pu_failed: \ (err) = -EFAULT; \ } \ } while (0) @@ -348,15 +377,18 @@ do { \ do { \ __typeof__(dst) __pkn_dst = (dst); \ __typeof__(src) __pkn_src = (src); \ - int __pkn_err = 0; \ \ - __mte_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err, K); \ - __mte_disable_tco_async(); \ - \ - if (unlikely(__pkn_err)) \ + do { \ + __label__ __pkn_err; \ + __mte_enable_tco_async(); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err, K); \ + __mte_disable_tco_async(); \ + break; \ + __pkn_err: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while(0) extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); @@ -381,6 +413,51 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi __actu_ret; \ }) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr,len))) + return 0; + uaccess_ttbr0_enable(); + return 1; +} +#define user_access_begin(a,b) user_access_begin(a,b) +#define user_access_end() uaccess_ttbr0_disable() +#define unsafe_put_user(x, ptr, label) \ + __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) +#define unsafe_get_user(x, ptr, label) \ + __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) + +/* + * KCSAN uses these to save and restore ttbr state. + * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so + * they are no-ops. + */ +static inline unsigned long user_access_save(void) { return 0; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 266b96acc014..1386e8e751f2 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -840,7 +840,7 @@ __SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SYSCALL(__NR_io_pgetevents_time64, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 14251abee23c..824ca6987a51 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -27,20 +27,15 @@ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, } #define prep_zero_mask(a, bits, c) (bits) +#define create_zero_mask(bits) (bits) +#define find_zero(bits) (__ffs(bits) >> 3) -static inline unsigned long create_zero_mask(unsigned long bits) +static inline unsigned long zero_bytemask(unsigned long bits) { bits = (bits - 1) & ~bits; return bits >> 7; } -static inline unsigned long find_zero(unsigned long mask) -{ - return fls64(mask) >> 3; -} - -#define zero_bytemask(mask) (mask) - #else /* __AARCH64EB__ */ #include <asm-generic/word-at-a-time.h> #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 763824963ed1..2b112f3b7510 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -46,7 +46,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e0e7b93c16cc..e6f66491fbe9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -30,6 +30,7 @@ #include <linux/pgtable.h> #include <acpi/ghes.h> +#include <acpi/processor.h> #include <asm/cputype.h> #include <asm/cpu_ops.h> #include <asm/daifflags.h> @@ -45,6 +46,7 @@ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; +static bool param_acpi_nospcr __initdata; static int __init parse_acpi(char *arg) { @@ -58,6 +60,8 @@ static int __init parse_acpi(char *arg) param_acpi_on = true; else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ param_acpi_force = true; + else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */ + param_acpi_nospcr = true; else return -EINVAL; /* Core will print when we return error */ @@ -237,7 +241,20 @@ done: acpi_put_table(facs); } #endif - acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + + /* + * For varying privacy and security reasons, sometimes need + * to completely silence the serial console output, and only + * enable it when needed. + * But there are many existing systems that depend on this + * behaviour, use acpi=nospcr to disable console in ACPI SPCR + * table as default serial console. + */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, + !param_acpi_nospcr); + pr_info("Use ACPI SPCR as default console: %s\n", + param_acpi_nospcr ? "No" : "Yes"); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); } @@ -423,107 +440,23 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size) memblock_mark_nomap(addr, size); } -#ifdef CONFIG_ACPI_FFH -/* - * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as - * specified in https://developer.arm.com/docs/den0048/latest - */ -struct acpi_ffh_data { - struct acpi_ffh_info info; - void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - unsigned long a4, unsigned long a5, - unsigned long a6, unsigned long a7, - struct arm_smccc_res *args, - struct arm_smccc_quirk *res); - void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args, - struct arm_smccc_1_2_regs *res); -}; - -int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt) +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id, + int *pcpu) { - enum arm_smccc_conduit conduit; - struct acpi_ffh_data *ffh_ctxt; - - if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2) - return -EOPNOTSUPP; - - conduit = arm_smccc_1_1_get_conduit(); - if (conduit == SMCCC_CONDUIT_NONE) { - pr_err("%s: invalid SMCCC conduit\n", __func__); - return -EOPNOTSUPP; + /* If an error code is passed in this stub can't fix it */ + if (*pcpu < 0) { + pr_warn_once("Unable to map CPU to valid ID\n"); + return *pcpu; } - ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL); - if (!ffh_ctxt) - return -ENOMEM; - - if (conduit == SMCCC_CONDUIT_SMC) { - ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc; - ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc; - } else { - ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc; - ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc; - } - - memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info)); - - *region_ctxt = ffh_ctxt; - return AE_OK; -} - -static bool acpi_ffh_smccc_owner_allowed(u32 fid) -{ - int owner = ARM_SMCCC_OWNER_NUM(fid); - - if (owner == ARM_SMCCC_OWNER_STANDARD || - owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM) - return true; - - return false; + return 0; } +EXPORT_SYMBOL(acpi_map_cpu); -int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context) +int acpi_unmap_cpu(int cpu) { - int ret = 0; - struct acpi_ffh_data *ffh_ctxt = region_context; - - if (ffh_ctxt->info.offset == 0) { - /* SMC/HVC 32bit call */ - struct arm_smccc_res res; - u32 a[8] = { 0 }, *ptr = (u32 *)value; - - if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) || - !acpi_ffh_smccc_owner_allowed(*ptr) || - ffh_ctxt->info.length > 32) { - ret = AE_ERROR; - } else { - int idx, len = ffh_ctxt->info.length >> 2; - - for (idx = 0; idx < len; idx++) - a[idx] = *(ptr + idx); - - ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4], - a[5], a[6], a[7], &res, NULL); - memcpy(value, &res, sizeof(res)); - } - - } else if (ffh_ctxt->info.offset == 1) { - /* SMC/HVC 64bit call */ - struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value; - - if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) || - !acpi_ffh_smccc_owner_allowed(r->a0) || - ffh_ctxt->info.length > sizeof(*r)) { - ret = AE_ERROR; - } else { - ffh_ctxt->invoke_ffh64_fn(r, r); - memcpy(value, r, ffh_ctxt->info.length); - } - } else { - ret = AE_ERROR; - } - - return ret; + return 0; } -#endif /* CONFIG_ACPI_FFH */ +EXPORT_SYMBOL(acpi_unmap_cpu); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index e51535a5f939..0c036a9a3c33 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu) return acpi_early_node_map[cpu]; } -static inline int get_cpu_for_acpi_id(u32 uid) -{ - int cpu; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) - return cpu; - - return -EINVAL; -} - static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 828be635e7e1..617424b73f8c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -432,14 +432,17 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { }; #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS -static const struct midr_range erratum_spec_ssbs_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 +static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), -#endif -#ifdef CONFIG_ARM64_ERRATUM_3312417 + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), -#endif + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), {} }; #endif @@ -741,9 +744,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), }, #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS +#ifdef CONFIG_ARM64_ERRATUM_3194386 { - .desc = "ARM errata 3194386, 3312417", + .desc = "SSBS not fully self-synchronizing", .capability = ARM64_WORKAROUND_SPECULATIVE_SSBS, ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list), }, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 48e7029f1054..646ecd3069fd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -285,8 +285,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP), ARM64_FTR_END, }; @@ -429,6 +429,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c deleted file mode 100644 index f372295207fb..000000000000 --- a/arch/arm64/kernel/cpuidle.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ARM64 CPU idle arch support - * - * Copyright (C) 2014 ARM Ltd. - * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> - */ - -#include <linux/acpi.h> -#include <linux/cpuidle.h> -#include <linux/cpu_pm.h> -#include <linux/psci.h> - -#ifdef CONFIG_ACPI_PROCESSOR_IDLE - -#include <acpi/processor.h> - -#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) - -static int psci_acpi_cpu_init_idle(unsigned int cpu) -{ - int i, count; - struct acpi_lpi_state *lpi; - struct acpi_processor *pr = per_cpu(processors, cpu); - - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - count = pr->power.count - 1; - if (count <= 0) - return -ENODEV; - - for (i = 0; i < count; i++) { - u32 state; - - lpi = &pr->power.lpi_states[i + 1]; - /* - * Only bits[31:0] represent a PSCI power_state while - * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification - */ - state = lpi->address; - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - return -EINVAL; - } - } - - return 0; -} - -int acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return psci_acpi_cpu_init_idle(cpu); -} - -__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) -{ - u32 state = lpi->address; - - if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter, - lpi->index, state); - else - return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, - lpi->index, state); -} -#endif diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index ba4f8f7d6a91..8f5422ed1b75 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -105,11 +105,6 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); -#ifdef CONFIG_ARM64_PSEUDO_NMI -/* Static key checked in GIC_PRIO_IRQOFF. */ -KVM_NVHE_ALIAS(gic_nonsecure_priorities); -#endif - /* EL2 exception handling */ KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index dcdcccd40891..6174671be7c1 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -582,12 +582,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl); size_t mte_probe_user_range(const char __user *uaddr, size_t size) { const char __user *end = uaddr + size; - int err = 0; char val; - __raw_get_user(val, uaddr, err); - if (err) - return size; + __raw_get_user(val, uaddr, efault); uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE); while (uaddr < end) { @@ -595,12 +592,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size) * A read is sufficient for mte, the caller should have probed * for the pte write permission if required. */ - __raw_get_user(val, uaddr, err); - if (err) - return end - uaddr; + __raw_get_user(val, uaddr, efault); uaddr += MTE_GRANULE_SIZE; } (void)val; return 0; + +efault: + return end - uaddr; } diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index 5fa08e13e17e..f374a3e5a5fe 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -173,7 +173,7 @@ static void __init remap_idmap_for_lpa2(void) * Don't bother with the FDT, we no longer need it after this. */ memset(init_idmap_pg_dir, 0, - (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end); + (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir); create_init_idmap(init_idmap_pg_dir, mask); dsb(ishst); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index baca47bd443c..da53722f95d4 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -567,7 +567,7 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) * Mitigate this with an unconditional speculation barrier, as CPUs * could mis-speculate branches and bypass a conditional barrier. */ - if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS)) + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386)) spec_bar(); return SPECTRE_MITIGATED; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 29a8e444db83..fabd732d0a2d 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry); int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry); - if (err) + if (err && err != -EPERM) pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index 99f2ffe9fc05..5b0891146054 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void) module_init(reloc_test_init); module_exit(reloc_test_exit); +MODULE_DESCRIPTION("Relocation testing module"); MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 31c8b3094dd7..5e18fbcee9a2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -55,9 +55,6 @@ #include <trace/events/ipi.h> -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -132,7 +129,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); if (ret) { - pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + if (ret != -EPERM) + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } @@ -510,6 +508,59 @@ static int __init smp_cpu_setup(int cpu) static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; +int arch_register_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (!acpi_disabled && !acpi_handle && + IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU)) + return -EPROBE_DEFER; + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + /* For now block anything that looks like physical CPU Hotplug */ + if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return -ENODEV; + } +#endif + + /* + * Availability of the acpi handle is sufficient to establish + * that _STA has aleady been checked. No need to recheck here. + */ + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); + + return register_cpu(c, cpu); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +void arch_unregister_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + acpi_status status; + unsigned long long sta; + + if (!acpi_handle) { + pr_err_once("Removing a CPU without associated ACPI handle\n"); + return; + } + + status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return; + + /* For now do not allow anything that looks like physical CPU HP */ + if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return; + } + + unregister_cpu(c); +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + #ifdef CONFIG_ACPI static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; @@ -530,7 +581,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (!acpi_gicc_is_usable(processor)) { + if (!(processor->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) { pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } @@ -749,8 +801,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) */ for_each_possible_cpu(cpu) { - per_cpu(cpu_number, cpu) = cpu; - if (cpu == smp_processor_id()) continue; @@ -767,13 +817,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -static const char *ipi_types[NR_IPI] __tracepoint_string = { +static const char *ipi_types[MAX_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_TIMER] = "Timer broadcast interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); @@ -784,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) { unsigned int cpu, i; - for (i = 0; i < NR_IPI; i++) { + for (i = 0; i < MAX_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) @@ -1028,12 +1080,12 @@ void __init set_smp_ipi_range(int ipi_base, int n) if (ipi_should_be_nmi(i)) { err = request_percpu_nmi(ipi_base + i, ipi_handler, - "IPI", &cpu_number); + "IPI", &irq_stat); WARN(err, "Could not request IPI %d as NMI, err=%d\n", i, err); } else { err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &cpu_number); + "IPI", &irq_stat); WARN(err, "Could not request IPI %d as IRQ, err=%d\n", i, err); } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index ad198262b981..7230f6e20ab8 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, syscall_set_return_value(current, regs, 0, ret); /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for arm64 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 9 bits. + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints: the AAPCS mandates a + * 16-byte aligned SP at function boundaries, which will remove the + * 4 low bits from any entropy chosen here. * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: the AAPCS mandates a - * 16-byte (i.e. 4-bit) aligned SP at function boundaries. - * - * The resulting 5 bits of entropy is seen in SP[8:4]. + * The resulting 6 bits of entropy is seen in SP[9:4]. */ - choose_random_kstack_offset(get_random_u16() & 0x1FF); + choose_random_kstack_offset(get_random_u16()); } static inline bool has_syscall_work(unsigned long flags) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 755a22d4f840..55a8e310ea12 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -264,6 +264,9 @@ SECTIONS EXIT_DATA } + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 51f043649146..f957890c7e38 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -52,11 +52,11 @@ * Supported by KVM */ #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \ + SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \ ) /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 95cf18574251..187a5f4d56c0 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -33,9 +33,9 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) /* Protected KVM does not support AArch32 guests. */ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP); BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP); /* * Linux guests assume support for floating-point and Advanced SIMD. Do diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index edd969a1f36b..2860548d4250 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -276,7 +276,7 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, * of AArch32 feature id registers. */ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_EL1_IMP); return pvm_access_raz_wi(vcpu, p, r); } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index a35ce10e0a9f..d1a476b08f54 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -14,7 +14,6 @@ #include <asm/kvm_emulate.h> #include <kvm/arm_pmu.h> #include <kvm/arm_vgic.h> -#include <asm/arm_pmuv3.h> #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c927e9312f10..353ea5dc32b8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -124,7 +124,8 @@ bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; + pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG | + PTE_SWBITS_MASK; /* creating or taking down mappings is always safe */ if (!pte_valid(__pte(old)) || !pte_valid(__pte(new))) diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index 7ff6a2466af1..e0594b6370a6 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -6,6 +6,7 @@ #define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYNC_FILE_RANGE2 #include <asm-generic/unistd.h> #define __NR_set_thread_area (__NR_arch_specific_syscall + 0) diff --git a/arch/csky/kernel/syscall.c b/arch/csky/kernel/syscall.c index 3d30e58a45d2..4540a271ee39 100644 --- a/arch/csky/kernel/syscall.c +++ b/arch/csky/kernel/syscall.c @@ -20,7 +20,7 @@ SYSCALL_DEFINE6(mmap2, unsigned long, prot, unsigned long, flags, unsigned long, fd, - off_t, offset) + unsigned long, offset) { if (unlikely(offset & (~PAGE_MASK >> 12))) return -EINVAL; diff --git a/arch/hexagon/include/asm/syscalls.h b/arch/hexagon/include/asm/syscalls.h new file mode 100644 index 000000000000..40f2d08bec92 --- /dev/null +++ b/arch/hexagon/include/asm/syscalls.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <asm-generic/syscalls.h> + +asmlinkage long sys_hexagon_fadvise64_64(int fd, int advice, + u32 a2, u32 a3, u32 a4, u32 a5); diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h index 432c4db1b623..21ae22306b5d 100644 --- a/arch/hexagon/include/uapi/asm/unistd.h +++ b/arch/hexagon/include/uapi/asm/unistd.h @@ -36,5 +36,6 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYNC_FILE_RANGE2 #include <asm-generic/unistd.h> diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c index 0fadd582cfc7..5d98bdc494ec 100644 --- a/arch/hexagon/kernel/syscalltab.c +++ b/arch/hexagon/kernel/syscalltab.c @@ -14,6 +14,13 @@ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), +SYSCALL_DEFINE6(hexagon_fadvise64_64, int, fd, int, advice, + SC_ARG64(offset), SC_ARG64(len)) +{ + return ksys_fadvise64_64(fd, SC_VAL64(loff_t, offset), SC_VAL64(loff_t, len), advice); +} +#define sys_fadvise64_64 sys_hexagon_fadvise64_64 + void *sys_call_table[__NR_syscalls] = { #include <asm/unistd.h> }; diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index b4c5acd7aa3b..8801611143ab 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -22,7 +22,7 @@ #define __SYSCALL(nr, call) [nr] = (call), SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, - prot, unsigned long, flags, unsigned long, fd, off_t, offset) + prot, unsigned long, flags, unsigned long, fd, unsigned long, offset) { if (offset & ~PAGE_MASK) return -EINVAL; diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index d4b170c861bf..0147130dc34e 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -180,6 +180,15 @@ int __init amiga_parse_bootinfo(const struct bi_record *record) dev->slotsize = be16_to_cpu(cd->cd_SlotSize); dev->boardaddr = be32_to_cpu(cd->cd_BoardAddr); dev->boardsize = be32_to_cpu(cd->cd_BoardSize); + + /* CS-LAB Warp 1260 workaround */ + if (be16_to_cpu(dev->rom.er_Manufacturer) == ZORRO_MANUF(ZORRO_PROD_CSLAB_WARP_1260) && + dev->rom.er_Product == ZORRO_PROD(ZORRO_PROD_CSLAB_WARP_1260)) { + + /* turn off all interrupts */ + pr_info("Warp 1260 card detected: applying interrupt storm workaround\n"); + *(uint32_t *)(dev->boardaddr + 0x1000) = 0xfff; + } } else pr_warn("amiga_parse_bootinfo: too many AutoConfig devices\n"); #endif /* CONFIG_ZORRO */ diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c index 23256434191c..0465444ceb21 100644 --- a/arch/m68k/atari/ataints.c +++ b/arch/m68k/atari/ataints.c @@ -301,11 +301,7 @@ void __init atari_init_IRQ(void) if (ATARIHW_PRESENT(SCU)) { /* init the SCU if present */ - tt_scu.sys_mask = 0x10; /* enable VBL (for the cursor) and - * disable HSYNC interrupts (who - * needs them?) MFP and SCC are - * enabled in VME mask - */ + tt_scu.sys_mask = 0x0; /* disable all interrupts */ tt_scu.vme_mask = 0x60; /* enable MFP and SCC ints */ } else { /* If no SCU and no Hades, the HSYNC interrupt needs to be diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 6bb202d03d34..9a084943a68a 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -373,6 +373,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 3598e0cc66b0..58ec725bf392 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -353,6 +353,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 3fbb8a9a307d..63ab7e892e59 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -368,6 +368,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 9a2b0c7871ce..ca40744eec60 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -351,6 +351,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 2408785e0e48..77bcc6faf468 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -352,6 +352,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 6789d03b7b52..e73d4032b659 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -364,6 +364,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b57af39db3b4..638df8442c98 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -407,6 +407,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 29b70f27aedd..2248db426081 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -350,6 +350,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 757c582ffe84..2975b66521f6 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -351,6 +351,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index f15d1009108a..0a0e32344033 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -357,6 +357,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5218c1e614b5..f16f1a99c2ba 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -347,6 +347,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index acdf4eb7af28..667bd61ae9b3 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -348,6 +348,7 @@ CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_BAREUDP=m CONFIG_GTP=m +CONFIG_PFCP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 642fb80c5c4e..83410f8184ec 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -71,7 +71,7 @@ static void nfhd_submit_bio(struct bio *bio) len = bvec.bv_len; len >>= 9; nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, - page_to_phys(bvec.bv_page) + bvec.bv_offset); + bvec_phys(&bvec)); sec += len; } bio_endio(bio); @@ -98,6 +98,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) { struct queue_limits lim = { .logical_block_size = bsize, + .features = BLK_FEAT_ROTATIONAL, }; struct nfhd_device *dev; int dev_id = id - NFHD_DEV_OFFSET; @@ -193,4 +194,5 @@ static void __exit nfhd_exit(void) module_init(nfhd_init); module_exit(nfhd_exit); +MODULE_DESCRIPTION("Atari NatFeat block device driver"); MODULE_LICENSE("GPL"); diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c index 17b2987c2bf5..d41260672e24 100644 --- a/arch/m68k/emu/nfcon.c +++ b/arch/m68k/emu/nfcon.c @@ -173,4 +173,5 @@ static void __exit nfcon_exit(void) module_init(nfcon_init); module_exit(nfcon_exit); +MODULE_DESCRIPTION("Atari NatFeat console driver"); MODULE_LICENSE("GPL"); diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h index d7f3de9c5d6f..4ba14f3535fc 100644 --- a/arch/m68k/include/asm/cmpxchg.h +++ b/arch/m68k/include/asm/cmpxchg.h @@ -32,7 +32,7 @@ static inline unsigned long __arch_xchg(unsigned long x, volatile void * ptr, in x = tmp; break; default: - tmp = __invalid_xchg_size(x, ptr, size); + x = __invalid_xchg_size(x, ptr, size); break; } diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index ed9f34da1a2a..0850b099f300 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -35,7 +35,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, - off_t, pgoff) + unsigned long, pgoff) { if (pgoff & ~PAGE_MASK) return -EINVAL; diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index cc869f5d5693..953f5b7dc723 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -354,7 +354,7 @@ 412 n32 utimensat_time64 sys_utimensat 413 n32 pselect6_time64 compat_sys_pselect6_time64 414 n32 ppoll_time64 compat_sys_ppoll_time64 -416 n32 io_pgetevents_time64 sys_io_pgetevents +416 n32 io_pgetevents_time64 compat_sys_io_pgetevents_time64 417 n32 recvmmsg_time64 compat_sys_recvmmsg_time64 418 n32 mq_timedsend_time64 sys_mq_timedsend 419 n32 mq_timedreceive_time64 sys_mq_timedreceive diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 81428a2eb660..2439a2491cff 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -403,7 +403,7 @@ 412 o32 utimensat_time64 sys_utimensat sys_utimensat 413 o32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 o32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 o32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 o32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 o32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 o32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 o32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index daafeb20f993..dc9b902de8ea 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,6 +16,7 @@ config PARISC select ARCH_HAS_UBSAN select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN + select ARCH_SPLIT_ARG64 if !64BIT select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 2a12a547b447..826c8e51b585 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -23,12 +23,3 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, current->comm, current->pid, r20); return -ENOSYS; } - -asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags, - compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd, - const char __user * pathname) -{ - return sys_fanotify_mark(fanotify_fd, flags, - ((__u64)mask1 << 32) | mask0, - dfd, pathname); -} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index b13c21373974..66dc406b12e4 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -108,7 +108,7 @@ 95 common fchown sys_fchown 96 common getpriority sys_getpriority 97 common setpriority sys_setpriority -98 common recv sys_recv +98 common recv sys_recv compat_sys_recv 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 common stat64 sys_stat64 @@ -135,7 +135,7 @@ 120 common clone sys_clone_wrapper 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile -123 common recvfrom sys_recvfrom +123 common recvfrom sys_recvfrom compat_sys_recvfrom 124 32 adjtimex sys_adjtimex_time32 124 64 adjtimex sys_adjtimex 125 common mprotect sys_mprotect @@ -364,7 +364,7 @@ 320 common accept4 sys_accept4 321 common prlimit64 sys_prlimit64 322 common fanotify_init sys_fanotify_init -323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark +323 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 324 32 clock_adjtime sys_clock_adjtime32 324 64 clock_adjtime sys_clock_adjtime 325 common name_to_handle_at sys_name_to_handle_at diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d1030bc52564..d283d281d28e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4690c219bfa4..63432a33ec49 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -647,8 +647,9 @@ __after_prom_start: * Note: This process overwrites the OF exception vectors. */ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) - mr. r4,r26 /* In some cases the loader may */ - beq 9f /* have already put us at zero */ + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 85050be08a23..72b12bc10f90 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -27,6 +27,7 @@ #include <asm/paca.h> #include <asm/mmu.h> #include <asm/sections.h> /* _end */ +#include <asm/setup.h> #include <asm/smp.h> #include <asm/hw_breakpoint.h> #include <asm/svm.h> @@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); +#ifdef CONFIG_PPC_PSERIES + /* + * This must be done after other CPUs have shut down, otherwise they + * could execute the 'scv' instruction, which is not supported with + * reloc disabled (see configure_exceptions()). + */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + pseries_disable_reloc_on_exc(); +#endif + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 096d09ed89f6..431be156ca9b 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } else xics_kexec_teardown_cpu(secondary); } - -void pseries_machine_kexec(struct kimage *image) -{ - if (firmware_has_feature(FW_FEATURE_SET_MODE)) - pseries_disable_reloc_on_exc(); - - default_machine_kexec(image); -} diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index bba4ad192b0f..3968a6970fa8 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); -void pseries_machine_kexec(struct kimage *image); extern void pSeries_final_fixup(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 284a6fa04b0c..b10a25325238 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void) { void (*ctor)(void *) = get_dtl_cache_ctor(); - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, ctor); + dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor); if (!dtl_cache) { pr_warn("Failed to create dispatch trace log buffer cache\n"); pr_warn("Stolen time statistics will be unreliable\n"); @@ -1159,7 +1159,6 @@ define_machine(pseries) { .machine_check_exception = pSeries_machine_check_exception, .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE - .machine_kexec = pseries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts index 8df4cf3656f2..a7d753b6fdfd 100644 --- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts +++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts @@ -15,6 +15,10 @@ model = "Kendryte KD233"; compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -46,7 +50,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -118,6 +121,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &spi0 { @@ -125,6 +129,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "canaan,kd233-tft", "ilitek,ili9341"; diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index f87c5164d9cf..4f5d40fa1e77 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -16,13 +16,6 @@ #size-cells = <1>; compatible = "canaan,kendryte-k210"; - aliases { - serial0 = &uarths0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - }; - /* * The K210 has an sv39 MMU following the privileged specification v1.9. * Since this is a non-ratified draft specification, the kernel does not @@ -137,6 +130,7 @@ reg = <0x38000000 0x1000>; interrupts = <33>; clocks = <&sysclk K210_CLK_CPU>; + status = "disabled"; }; gpio0: gpio-controller@38001000 { @@ -152,6 +146,7 @@ <62>, <63>, <64>, <65>; gpio-controller; ngpios = <32>; + status = "disabled"; }; dmac0: dma-controller@50000000 { @@ -187,6 +182,7 @@ <&sysclk K210_CLK_GPIO>; clock-names = "bus", "db"; resets = <&sysrst K210_RST_GPIO>; + status = "disabled"; gpio1_0: gpio-port@0 { #gpio-cells = <2>; @@ -214,6 +210,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; uart2: serial@50220000 { @@ -230,6 +227,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; uart3: serial@50230000 { @@ -246,6 +244,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; spi2: spi@50240000 { @@ -259,6 +258,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI2>; + status = "disabled"; }; i2s0: i2s@50250000 { @@ -268,6 +268,7 @@ clocks = <&sysclk K210_CLK_I2S0>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S0>; + status = "disabled"; }; i2s1: i2s@50260000 { @@ -277,6 +278,7 @@ clocks = <&sysclk K210_CLK_I2S1>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S1>; + status = "disabled"; }; i2s2: i2s@50270000 { @@ -286,6 +288,7 @@ clocks = <&sysclk K210_CLK_I2S2>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S2>; + status = "disabled"; }; i2c0: i2c@50280000 { @@ -296,6 +299,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C0>; + status = "disabled"; }; i2c1: i2c@50290000 { @@ -306,6 +310,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C1>; + status = "disabled"; }; i2c2: i2c@502a0000 { @@ -316,6 +321,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C2>; + status = "disabled"; }; fpioa: pinmux@502b0000 { @@ -464,6 +470,7 @@ reset-names = "spi"; num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; spi1: spi@53000000 { @@ -479,6 +486,7 @@ reset-names = "spi"; num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; spi3: spi@54000000 { @@ -495,6 +503,7 @@ num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts index 396c8ca4d24d..5734cc03753b 100644 --- a/arch/riscv/boot/dts/canaan/k210_generic.dts +++ b/arch/riscv/boot/dts/canaan/k210_generic.dts @@ -15,6 +15,10 @@ model = "Kendryte K210 generic"; compatible = "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -24,7 +28,6 @@ &fpioa { pinctrl-0 = <&jtag_pins>; pinctrl-names = "default"; - status = "okay"; jtag_pins: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 6d25bf07481a..2ab376d609d2 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -17,6 +17,10 @@ compatible = "sipeed,maix-bit", "sipeed,maix-bitm", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -58,7 +62,6 @@ &fpioa { pinctrl-names = "default"; pinctrl-0 = <&jtag_pinctrl>; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -156,6 +159,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -170,6 +174,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -199,6 +204,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index f4f4d8d5e8b8..d98e20775c07 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -17,6 +17,10 @@ compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -63,7 +67,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -159,6 +162,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -173,6 +177,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index 0d86df47e1ed..79ecd549700a 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -16,6 +16,10 @@ model = "SiPeed MAIX GO"; compatible = "sipeed,maix-go", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -69,7 +73,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>, @@ -167,6 +170,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -181,6 +185,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -209,6 +214,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 5c05c498e2b8..019c03ae51f6 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -15,6 +15,10 @@ model = "SiPeed MAIXDUINO"; compatible = "sipeed,maixduino", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -39,8 +43,6 @@ }; &fpioa { - status = "okay"; - uarths_pinctrl: uarths-pinmux { pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */ @@ -132,6 +134,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -146,6 +149,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -174,6 +178,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index 20bc8c03b821..ca2d44d59d48 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -244,7 +244,7 @@ regulator-boot-on; regulator-always-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; regulator-name = "emmc_vdd"; }; }; diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h index 06e439eeef9a..09fde95a5e8f 100644 --- a/arch/riscv/include/asm/insn.h +++ b/arch/riscv/include/asm/insn.h @@ -145,7 +145,7 @@ /* parts of opcode for RVF, RVD and RVQ */ #define RVFDQ_FL_FS_WIDTH_OFF 12 -#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0) +#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0) #define RVFDQ_FL_FS_WIDTH_W 2 #define RVFDQ_FL_FS_WIDTH_D 3 #define RVFDQ_LS_FS_WIDTH_Q 4 diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 87cbd86576b2..4b95c574fd04 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); mutex_unlock(&text_mutex); - if (!mod) - local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE); - return out; } @@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data) } else { while (atomic_read(¶m->cpu_count) <= num_online_cpus()) cpu_relax(); - } - local_flush_icache_all(); + local_flush_icache_all(); + } return 0; } diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ed9cad20c039..3c830a6f7ef4 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void) for_each_irq_desc(i, desc) { struct irq_chip *chip; - int ret; chip = irq_desc_get_chip(desc); if (!chip) continue; - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) chip->irq_eoi(&desc->irq_data); if (chip->irq_mask) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 4007563fb607..ab03732d06c4 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) memset(waddr, c, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) @@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) ret = copy_to_kernel_nofault(waddr, insn, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) @@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) ret = patch_insn_set(tp, c, len); - if (!ret) - flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); - return ret; } NOKPROBE_SYMBOL(patch_text_set_nosync); @@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) ret = patch_insn_write(tp, insns, len); - if (!ret) - flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); - return ret; } NOKPROBE_SYMBOL(patch_text_nosync); @@ -253,9 +263,9 @@ static int patch_text_cb(void *data) } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - } - local_flush_icache_all(); + local_flush_icache_all(); + } return ret; } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 528ec7cc9a62..10e311b2759d 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); if (pc == (unsigned long)ret_from_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) @@ -156,7 +157,7 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 64155323cc92..d77afe05578f 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -23,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, #ifdef CONFIG_64BIT SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } @@ -32,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { /* * Note that the shift for mmap2 is constant (12), diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 04db1f993c47..bcf41d6e0df0 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 48ef5fe5c08a..5a36d5538dae 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -170,11 +170,14 @@ static void kaslr_adjust_got(unsigned long offset) u64 *entry; /* - * Even without -fPIE, Clang still uses a global offset table for some - * reason. Adjust the GOT entries. + * Adjust GOT entries, except for ones for undefined weak symbols + * that resolved to zero. This also skips the first three reserved + * entries on s390x that are zero. */ - for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) - *entry += offset - __START_KERNEL; + for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { + if (*entry) + *entry += offset - __START_KERNEL; + } } /* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8c4adece8911..f3602414a961 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -601,17 +601,16 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y # CONFIG_FB_DEVICE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m -CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_MLX5_VFIO_PCI=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 6dd11d3b6aaa..d0d8925fdf09 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -592,17 +592,16 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y # CONFIG_FB_DEVICE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m -CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_MLX5_VFIO_PCI=m diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 7f5004065e8a..35555c944630 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void) static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, unsigned long ti_work) { - choose_random_kstack_offset(get_tod_clock_fast() & 0xff); + choose_random_kstack_offset(get_tod_clock_fast()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 95990461888f..9281063636a7 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -427,6 +427,7 @@ struct kvm_vcpu_stat { u64 instruction_io_other; u64 instruction_lpsw; u64 instruction_lpswe; + u64 instruction_lpswey; u64 instruction_pfmf; u64 instruction_ptff; u64 instruction_sck; diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index dc2355c623d6..50cbcbbaa03d 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -38,33 +38,6 @@ #include "entry.h" -/* - * Perform the mmap() system call. Linux for S/390 isn't able to handle more - * than 5 system call parameters, so this system call uses a memory block - * for parameter passing. - */ - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) -{ - struct s390_mmap_arg_struct a; - int error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - #ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 82e9631cd9ef..54b5b2565df8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_io_other), STATS_DESC_COUNTER(VCPU, instruction_lpsw), STATS_DESC_COUNTER(VCPU, instruction_lpswe), + STATS_DESC_COUNTER(VCPU, instruction_lpswey), STATS_DESC_COUNTER(VCPU, instruction_pfmf), STATS_DESC_COUNTER(VCPU, instruction_ptff), STATS_DESC_COUNTER(VCPU, instruction_sck), diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 111eb5c74784..bf8534218af3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar) +{ + u32 base1 = vcpu->arch.sie_block->ipb >> 28; + s64 disp1; + + /* The displacement is a 20bit _SIGNED_ value */ + disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19); + + if (ar) + *ar = base1; + + return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; +} + static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, u64 *address1, u64 *address2, u8 *ar_b1, u8 *ar_b2) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1be19cc9d73c..1a49b89706f8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) return 0; } +static int handle_lpswey(struct kvm_vcpu *vcpu) +{ + psw_t new_psw; + u64 addr; + int rc; + u8 ar; + + vcpu->stat.instruction_lpswey++; + + if (!test_kvm_facility(vcpu->kvm, 193)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_siy(vcpu, &ar); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + return 0; +} + static int handle_stidp(struct kvm_vcpu *vcpu) { u64 stidp_data = vcpu->kvm->arch.model.cpuid; @@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) case 0x61: case 0x62: return handle_ri(vcpu); + case 0x71: + return handle_lpswey(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index abb629d7e131..7e3e767ab87d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) void crst_table_free(struct mm_struct *mm, unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } @@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val) static void base_crst_free(unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index ff8f24854c64..0ef83b6ac0db 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused) union zpci_sic_iib iib = {{0}}; union zpci_sic_iib ziib = {{0}}; - iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector; + iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector); zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib); zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib); diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index 9dca568509a5..d6f4afcb0e87 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -59,3 +59,14 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, (u64)len0 << 32 | len1, advice); #endif } + +/* + * swap the arguments the way that libc wants them instead of + * moving flags ahead of the 64-bit nbytes argument + */ +SYSCALL_DEFINE6(sh_sync_file_range6, int, fd, SC_ARG64(offset), + SC_ARG64(nbytes), unsigned int, flags) +{ + return ksys_sync_file_range(fd, SC_VAL64(loff_t, offset), + SC_VAL64(loff_t, nbytes), flags); +} diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index bbf83a2db986..c55fd7696d40 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -321,7 +321,7 @@ 311 common set_robust_list sys_set_robust_list 312 common get_robust_list sys_get_robust_list 313 common splice sys_splice -314 common sync_file_range sys_sync_file_range +314 common sync_file_range sys_sh_sync_file_range6 315 common tee sys_tee 316 common vmsplice sys_vmsplice 317 common move_pages sys_move_pages @@ -395,6 +395,7 @@ 385 common pkey_alloc sys_pkey_alloc 386 common pkey_free sys_pkey_free 387 common rseq sys_rseq +388 common sync_file_range2 sys_sync_file_range2 # room for arch specific syscalls 393 common semget sys_semget 394 common semctl sys_semctl diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index a45f0f31fe51..a3d308f2043e 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -18,224 +18,3 @@ sys32_mmap2: sethi %hi(sys_mmap), %g1 jmpl %g1 + %lo(sys_mmap), %g0 sllx %o5, 12, %o5 - - .align 32 - .globl sys32_socketcall -sys32_socketcall: /* %o0=call, %o1=args */ - cmp %o0, 1 - bl,pn %xcc, do_einval - cmp %o0, 18 - bg,pn %xcc, do_einval - sub %o0, 1, %o0 - sllx %o0, 5, %o0 - sethi %hi(__socketcall_table_begin), %g2 - or %g2, %lo(__socketcall_table_begin), %g2 - jmpl %g2 + %o0, %g0 - nop -do_einval: - retl - mov -EINVAL, %o0 - - .align 32 -__socketcall_table_begin: - - /* Each entry is exactly 32 bytes. */ -do_sys_socket: /* sys_socket(int, int, int) */ -1: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_socket), %g1 -2: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_socket), %g0 -3: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */ -4: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_bind), %g1 -5: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_bind), %g0 -6: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */ -7: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_connect), %g1 -8: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_connect), %g0 -9: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_listen: /* sys_listen(int, int) */ -10: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_listen), %g1 - jmpl %g1 + %lo(sys_listen), %g0 -11: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop - nop -do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */ -12: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_accept), %g1 -13: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_accept), %g0 -14: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */ -15: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getsockname), %g1 -16: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_getsockname), %g0 -17: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */ -18: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getpeername), %g1 -19: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_getpeername), %g0 -20: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */ -21: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_socketpair), %g1 -22: ldswa [%o1 + 0x8] %asi, %o2 -23: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_socketpair), %g0 -24: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */ -25: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_send), %g1 -26: lduwa [%o1 + 0x8] %asi, %o2 -27: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_send), %g0 -28: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */ -29: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_recv), %g1 -30: lduwa [%o1 + 0x8] %asi, %o2 -31: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_recv), %g0 -32: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */ -33: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_sendto), %g1 -34: lduwa [%o1 + 0x8] %asi, %o2 -35: lduwa [%o1 + 0xc] %asi, %o3 -36: lduwa [%o1 + 0x10] %asi, %o4 -37: ldswa [%o1 + 0x14] %asi, %o5 - jmpl %g1 + %lo(sys_sendto), %g0 -38: lduwa [%o1 + 0x4] %asi, %o1 -do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */ -39: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_recvfrom), %g1 -40: lduwa [%o1 + 0x8] %asi, %o2 -41: lduwa [%o1 + 0xc] %asi, %o3 -42: lduwa [%o1 + 0x10] %asi, %o4 -43: lduwa [%o1 + 0x14] %asi, %o5 - jmpl %g1 + %lo(sys_recvfrom), %g0 -44: lduwa [%o1 + 0x4] %asi, %o1 -do_sys_shutdown: /* sys_shutdown(int, int) */ -45: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_shutdown), %g1 - jmpl %g1 + %lo(sys_shutdown), %g0 -46: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop - nop -do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */ -47: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_setsockopt), %g1 -48: ldswa [%o1 + 0x8] %asi, %o2 -49: lduwa [%o1 + 0xc] %asi, %o3 -50: ldswa [%o1 + 0x10] %asi, %o4 - jmpl %g1 + %lo(sys_setsockopt), %g0 -51: ldswa [%o1 + 0x4] %asi, %o1 - nop -do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */ -52: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getsockopt), %g1 -53: ldswa [%o1 + 0x8] %asi, %o2 -54: lduwa [%o1 + 0xc] %asi, %o3 -55: lduwa [%o1 + 0x10] %asi, %o4 - jmpl %g1 + %lo(sys_getsockopt), %g0 -56: ldswa [%o1 + 0x4] %asi, %o1 - nop -do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */ -57: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(compat_sys_sendmsg), %g1 -58: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(compat_sys_sendmsg), %g0 -59: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */ -60: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(compat_sys_recvmsg), %g1 -61: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(compat_sys_recvmsg), %g0 -62: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ -63: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_accept4), %g1 -64: lduwa [%o1 + 0x8] %asi, %o2 -65: ldswa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_accept4), %g0 -66: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - - .section __ex_table,"a" - .align 4 - .word 1b, __retl_efault, 2b, __retl_efault - .word 3b, __retl_efault, 4b, __retl_efault - .word 5b, __retl_efault, 6b, __retl_efault - .word 7b, __retl_efault, 8b, __retl_efault - .word 9b, __retl_efault, 10b, __retl_efault - .word 11b, __retl_efault, 12b, __retl_efault - .word 13b, __retl_efault, 14b, __retl_efault - .word 15b, __retl_efault, 16b, __retl_efault - .word 17b, __retl_efault, 18b, __retl_efault - .word 19b, __retl_efault, 20b, __retl_efault - .word 21b, __retl_efault, 22b, __retl_efault - .word 23b, __retl_efault, 24b, __retl_efault - .word 25b, __retl_efault, 26b, __retl_efault - .word 27b, __retl_efault, 28b, __retl_efault - .word 29b, __retl_efault, 30b, __retl_efault - .word 31b, __retl_efault, 32b, __retl_efault - .word 33b, __retl_efault, 34b, __retl_efault - .word 35b, __retl_efault, 36b, __retl_efault - .word 37b, __retl_efault, 38b, __retl_efault - .word 39b, __retl_efault, 40b, __retl_efault - .word 41b, __retl_efault, 42b, __retl_efault - .word 43b, __retl_efault, 44b, __retl_efault - .word 45b, __retl_efault, 46b, __retl_efault - .word 47b, __retl_efault, 48b, __retl_efault - .word 49b, __retl_efault, 50b, __retl_efault - .word 51b, __retl_efault, 52b, __retl_efault - .word 53b, __retl_efault, 54b, __retl_efault - .word 55b, __retl_efault, 56b, __retl_efault - .word 57b, __retl_efault, 58b, __retl_efault - .word 59b, __retl_efault, 60b, __retl_efault - .word 61b, __retl_efault, 62b, __retl_efault - .word 63b, __retl_efault, 64b, __retl_efault - .word 65b, __retl_efault, 66b, __retl_efault - .previous diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ac6c281ccfe0..cfdfb3707c16 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -117,7 +117,7 @@ 90 common dup2 sys_dup2 91 32 setfsuid32 sys_setfsuid 92 common fcntl sys_fcntl compat_sys_fcntl -93 common select sys_select +93 common select sys_select compat_sys_select 94 32 setfsgid32 sys_setfsgid 95 common fsync sys_fsync 96 common setpriority sys_setpriority @@ -155,7 +155,7 @@ 123 32 fchown sys_fchown16 123 64 fchown sys_fchown 124 common fchmod sys_fchmod -125 common recvfrom sys_recvfrom +125 common recvfrom sys_recvfrom compat_sys_recvfrom 126 32 setreuid sys_setreuid16 126 64 setreuid sys_setreuid 127 32 setregid sys_setregid16 @@ -247,7 +247,7 @@ 204 32 readdir sys_old_readdir compat_sys_old_readdir 204 64 readdir sys_nis_syscall 205 common readahead sys_readahead compat_sys_readahead -206 common socketcall sys_socketcall sys32_socketcall +206 common socketcall sys_socketcall compat_sys_socketcall 207 common syslog sys_syslog 208 common lookup_dcookie sys_ni_syscall 209 common fadvise64 sys_fadvise64 compat_sys_fadvise64 @@ -461,7 +461,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index ef805eaa9e01..9f1e76ddda5a 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -447,43 +447,31 @@ static int bulk_req_safe_read( return n; } -/* Called without dev->lock held, and only in interrupt context. */ -static void ubd_handler(void) +static void ubd_end_request(struct io_thread_req *io_req) { - int n; - int count; - - while(1){ - n = bulk_req_safe_read( - thread_fd, - irq_req_buffer, - &irq_remainder, - &irq_remainder_size, - UBD_REQ_BUFFER_SIZE - ); - if (n < 0) { - if(n == -EAGAIN) - break; - printk(KERN_ERR "spurious interrupt in ubd_handler, " - "err = %d\n", -n); - return; - } - for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - struct io_thread_req *io_req = (*irq_req_buffer)[count]; - - if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { - blk_queue_max_discard_sectors(io_req->req->q, 0); - blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); - } - blk_mq_end_request(io_req->req, io_req->error); - kfree(io_req); - } + if (io_req->error == BLK_STS_NOTSUPP) { + if (req_op(io_req->req) == REQ_OP_DISCARD) + blk_queue_disable_discard(io_req->req->q); + else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES) + blk_queue_disable_write_zeroes(io_req->req->q); } + blk_mq_end_request(io_req->req, io_req->error); + kfree(io_req); } static irqreturn_t ubd_intr(int irq, void *dev) { - ubd_handler(); + int len, i; + + while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer, + &irq_remainder, &irq_remainder_size, + UBD_REQ_BUFFER_SIZE)) >= 0) { + for (i = 0; i < len / sizeof(struct io_thread_req *); i++) + ubd_end_request((*irq_req_buffer)[i]); + } + + if (len < 0 && len != -EAGAIN) + pr_err("spurious interrupt in %s, err = %d\n", __func__, len); return IRQ_HANDLED; } @@ -847,6 +835,7 @@ static int ubd_add(int n, char **error_out) struct queue_limits lim = { .max_segments = MAX_SG, .seg_boundary_mask = PAGE_SIZE - 1, + .features = BLK_FEAT_WRITE_CACHE, }; struct gendisk *disk; int err = 0; @@ -893,8 +882,6 @@ static int ubd_add(int n, char **error_out) goto out_cleanup_tags; } - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_write_cache(disk->queue, true, false); disk->major = UBD_MAJOR; disk->first_minor = n << UBD_SHIFT; disk->minors = 1 << UBD_SHIFT; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d7122a1883e..125914536825 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1118,6 +1118,13 @@ config X86_LOCAL_APIC depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select IRQ_DOMAIN_HIERARCHY +config ACPI_MADT_WAKEUP + def_bool y + depends on X86_64 + depends on ACPI + depends on SMP + depends on X86_LOCAL_APIC + config X86_IO_APIC def_bool y depends on X86_LOCAL_APIC || X86_UP_IOAPIC diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 59aedf32c4ea..6d20a6ce0507 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -36,6 +36,6 @@ config AS_VPCLMULQDQ Supported by binutils >= 2.30 and LLVM integrated assembler config AS_WRUSS - def_bool $(as-instr,wrussq %rax$(comma)(%rbx)) + def_bool $(as-instr64,wrussq %rax$(comma)(%rbx)) help Supported by binutils >= 2.31 and LLVM integrated assembler diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b70e4a21c15f..944454306ef4 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -531,8 +531,3 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) return output + entry_offset; } - -void __fortify_panic(const u8 reason, size_t avail, size_t size) -{ - error("detected buffer overflow"); -} diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 0457a9d7e515..cd44e120fe53 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -127,7 +127,35 @@ static bool fault_in_kernel_space(unsigned long address) #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-shared.c" +#include "../../coco/sev/shared.c" + +static struct svsm_ca *svsm_get_caa(void) +{ + return boot_svsm_caa; +} + +static u64 svsm_get_caa_pa(void) +{ + return boot_svsm_caa_pa; +} + +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb *ghcb; + int ret; + + if (boot_ghcb) + ghcb = boot_ghcb; + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + return ret; +} bool sev_snp_enabled(void) { @@ -145,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * If private -> shared then invalidate the page before requesting the * state change in the RMP table. */ - if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(paddr, paddr, false); /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); @@ -161,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. */ - if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(paddr, paddr, true); } void snp_set_page_private(unsigned long paddr) @@ -256,6 +284,16 @@ void sev_es_shutdown_ghcb(void) error("SEV-ES CPU Features missing."); /* + * This denotes whether to use the GHCB MSR protocol or the GHCB + * shared page to perform a GHCB request. Since the GHCB page is + * being changed to encrypted, it can't be used to perform GHCB + * requests. Clear the boot_ghcb variable so that the GHCB MSR + * protocol is used to change the GHCB page over to an encrypted + * page. + */ + boot_ghcb = NULL; + + /* * GHCB Page must be flushed from the cache and mapped encrypted again. * Otherwise the running kernel will see strange cache effects when * trying to use that page. @@ -463,6 +501,13 @@ static bool early_snp_init(struct boot_params *bp) setup_cpuid_table(cc_info); /* + * Record the SVSM Calling Area (CA) address if the guest is not + * running at VMPL0. The CA will be used to communicate with the + * SVSM and request its services. + */ + svsm_setup_ca(cc_info); + + /* * Pass run-time kernel a pointer to CC info via boot_params so EFI * config table doesn't need to be searched again during early startup * phase. @@ -565,22 +610,31 @@ void sev_enable(struct boot_params *bp) * features. */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { - if (!(get_hv_features() & GHCB_HV_FT_SNP)) + u64 hv_features; + int ret; + + hv_features = get_hv_features(); + if (!(hv_features & GHCB_HV_FT_SNP)) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); /* - * Enforce running at VMPL0. - * - * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically - * higher) privilege level. Here, clear the VMPL1 permission mask of the - * GHCB page. If the guest is not running at VMPL0, this will fail. + * Enforce running at VMPL0 or with an SVSM. * - * If the guest is running at VMPL0, it will succeed. Even if that operation - * modifies permission bits, it is still ok to do so currently because Linux - * SNP guests running at VMPL0 only run at VMPL0, so VMPL1 or higher - * permission mask changes are a don't-care. + * Use RMPADJUST (see the rmpadjust() function for a description of + * what the instruction does) to update the VMPL1 permissions of a + * page. If the guest is running at VMPL0, this will succeed. If the + * guest is running at any other VMPL, this will fail. Linux SNP guests + * only ever run at a single VMPL level so permission mask changes of a + * lesser-privileged VMPL are a don't-care. + */ + ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1); + + /* + * Running at VMPL0 is not required if an SVSM is present and the hypervisor + * supports the required SVSM GHCB events. */ - if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1)) + if (ret && + !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index fed8d13ce252..0aae4d4ed615 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -203,7 +203,7 @@ int check_knl_erratum(void) */ if (!is_intel() || cpu.family != 6 || - cpu.model != INTEL_FAM6_XEON_PHI_KNL) + cpu.model != 0x57 /*INTEL_XEON_PHI_KNL*/) return 0; /* diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 9049f390d834..9d0fea18d3c8 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -27,34 +27,32 @@ char *heap_end = _end; /* Default end of heap = no heap */ * screws up the old-style command line protocol, adjust by * filling in the new-style command line pointer instead. */ - static void copy_boot_params(void) { struct old_cmdline { u16 cl_magic; u16 cl_offset; }; - const struct old_cmdline * const oldcmd = - absolute_pointer(OLD_CL_ADDRESS); + const struct old_cmdline * const oldcmd = absolute_pointer(OLD_CL_ADDRESS); BUILD_BUG_ON(sizeof(boot_params) != 4096); memcpy(&boot_params.hdr, &hdr, sizeof(hdr)); - if (!boot_params.hdr.cmd_line_ptr && - oldcmd->cl_magic == OLD_CL_MAGIC) { - /* Old-style command line protocol. */ + if (!boot_params.hdr.cmd_line_ptr && oldcmd->cl_magic == OLD_CL_MAGIC) { + /* Old-style command line protocol */ u16 cmdline_seg; - /* Figure out if the command line falls in the region - of memory that an old kernel would have copied up - to 0x90000... */ + /* + * Figure out if the command line falls in the region + * of memory that an old kernel would have copied up + * to 0x90000... + */ if (oldcmd->cl_offset < boot_params.hdr.setup_move_size) cmdline_seg = ds(); else cmdline_seg = 0x9000; - boot_params.hdr.cmd_line_ptr = - (cmdline_seg << 4) + oldcmd->cl_offset; + boot_params.hdr.cmd_line_ptr = (cmdline_seg << 4) + oldcmd->cl_offset; } } @@ -66,6 +64,7 @@ static void copy_boot_params(void) static void keyboard_init(void) { struct biosregs ireg, oreg; + initregs(&ireg); ireg.ah = 0x02; /* Get keyboard status */ @@ -83,8 +82,10 @@ static void query_ist(void) { struct biosregs ireg, oreg; - /* Some older BIOSes apparently crash on this call, so filter - it from machines too old to have SpeedStep at all. */ + /* + * Some older BIOSes apparently crash on this call, so filter + * it from machines too old to have SpeedStep at all. + */ if (cpu.level < 6) return; @@ -119,17 +120,13 @@ static void init_heap(void) char *stack_end; if (boot_params.hdr.loadflags & CAN_USE_HEAP) { - asm("leal %n1(%%esp),%0" - : "=r" (stack_end) : "i" (STACK_SIZE)); - - heap_end = (char *) - ((size_t)boot_params.hdr.heap_end_ptr + 0x200); + stack_end = (char *) (current_stack_pointer - STACK_SIZE); + heap_end = (char *) ((size_t)boot_params.hdr.heap_end_ptr + 0x200); if (heap_end > stack_end) heap_end = stack_end; } else { /* Boot protocol 2.00 only, no heap available */ - puts("WARNING: Ancient bootloader, some functionality " - "may be limited!\n"); + puts("WARNING: Ancient bootloader, some functionality may be limited!\n"); } } @@ -150,12 +147,11 @@ void main(void) /* Make sure we have all the proper CPU support */ if (validate_cpu()) { - puts("Unable to boot - please use a kernel appropriate " - "for your CPU.\n"); + puts("Unable to boot - please use a kernel appropriate for your CPU.\n"); die(); } - /* Tell the BIOS what CPU mode we intend to run in. */ + /* Tell the BIOS what CPU mode we intend to run in */ set_bios_mode(); /* Detect memory layout */ diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile index c816acf78b6a..eabdc7486538 100644 --- a/arch/x86/coco/Makefile +++ b/arch/x86/coco/Makefile @@ -6,3 +6,4 @@ CFLAGS_core.o += -fno-stack-protector obj-y += core.o obj-$(CONFIG_INTEL_TDX_GUEST) += tdx/ +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev/ diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index b31ef2424d19..0f81f70aca82 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -29,7 +29,6 @@ static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_UNROLL_STRING_IO: - case CC_ATTR_HOTPLUG_DISABLED: case CC_ATTR_GUEST_MEM_ENCRYPT: case CC_ATTR_MEM_ENCRYPT: return true; diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile new file mode 100644 index 000000000000..4e375e7305ac --- /dev/null +++ b/arch/x86/coco/sev/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += core.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_core.o = -pg +endif + +KASAN_SANITIZE_core.o := n +KMSAN_SANITIZE_core.o := n +KCOV_INSTRUMENT_core.o := n + +# With some compiler versions the generated code results in boot hangs, caused +# by several compilation units. To be safe, disable all instrumentation. +KCSAN_SANITIZE := n diff --git a/arch/x86/kernel/sev.c b/arch/x86/coco/sev/core.c index 3342ed58e168..082d61d85dfc 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/coco/sev/core.c @@ -133,16 +133,20 @@ struct ghcb_state { struct ghcb *ghcb; }; +/* For early boot SVSM communication */ +static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); + static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); +static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); +static DEFINE_PER_CPU(u64, svsm_caa_pa); struct sev_config { __u64 debug : 1, /* - * A flag used by __set_pages_state() that indicates when the - * per-CPU GHCB has been created and registered and thus can be - * used by the BSP instead of the early boot GHCB. + * Indicates when the per-CPU GHCB has been created and registered + * and thus can be used by the BSP instead of the early boot GHCB. * * For APs, the per-CPU GHCB is created before they are started * and registered upon startup, so this flag can be used globally @@ -150,6 +154,15 @@ struct sev_config { */ ghcbs_initialized : 1, + /* + * Indicates when the per-CPU SVSM CA is to be used instead of the + * boot SVSM CA. + * + * For APs, the per-CPU SVSM CA is created as part of the AP + * bringup, so this flag can be used globally for the BSP and APs. + */ + use_cas : 1, + __reserved : 62; }; @@ -572,8 +585,61 @@ fault: return ES_EXCEPTION; } +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) +{ + long error_code = ctxt->fi.error_code; + int trapnr = ctxt->fi.vector; + + ctxt->regs->orig_ax = ctxt->fi.error_code; + + switch (trapnr) { + case X86_TRAP_GP: + exc_general_protection(ctxt->regs, error_code); + break; + case X86_TRAP_UD: + exc_invalid_op(ctxt->regs); + break; + case X86_TRAP_PF: + write_cr2(ctxt->fi.cr2); + exc_page_fault(ctxt->regs, error_code); + break; + case X86_TRAP_AC: + exc_alignment_check(ctxt->regs, error_code); + break; + default: + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); + BUG(); + } +} + /* Include code shared with pre-decompression boot stage */ -#include "sev-shared.c" +#include "shared.c" + +static inline struct svsm_ca *svsm_get_caa(void) +{ + /* + * Use rIP-relative references when called early in the boot. If + * ->use_cas is set, then it is late in the boot and no need + * to worry about rIP-relative references. + */ + if (RIP_REL_REF(sev_cfg).use_cas) + return this_cpu_read(svsm_caa); + else + return RIP_REL_REF(boot_svsm_caa); +} + +static u64 svsm_get_caa_pa(void) +{ + /* + * Use rIP-relative references when called early in the boot. If + * ->use_cas is set, then it is late in the boot and no need + * to worry about rIP-relative references. + */ + if (RIP_REL_REF(sev_cfg).use_cas) + return this_cpu_read(svsm_caa_pa); + else + return RIP_REL_REF(boot_svsm_caa_pa); +} static noinstr void __sev_put_ghcb(struct ghcb_state *state) { @@ -600,6 +666,44 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state) } } +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + int ret; + + /* + * This can be called very early in the boot, use native functions in + * order to avoid paravirt issues. + */ + flags = native_local_irq_save(); + + /* + * Use rip-relative references when called early in the boot. If + * ghcbs_initialized is set, then it is late in the boot and no need + * to worry about rip-relative references in called functions. + */ + if (RIP_REL_REF(sev_cfg).ghcbs_initialized) + ghcb = __sev_get_ghcb(&state); + else if (RIP_REL_REF(boot_ghcb)) + ghcb = RIP_REL_REF(boot_ghcb); + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + if (RIP_REL_REF(sev_cfg).ghcbs_initialized) + __sev_put_ghcb(&state); + + native_local_irq_restore(flags); + + return ret; +} + void noinstr __sev_es_nmi_complete(void) { struct ghcb_state state; @@ -709,7 +813,6 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, { unsigned long paddr_end; u64 val; - int ret; vaddr = vaddr & PAGE_MASK; @@ -717,12 +820,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, paddr_end = paddr + (npages << PAGE_SHIFT); while (paddr < paddr_end) { - if (op == SNP_PAGE_STATE_SHARED) { - /* Page validation must be rescinded before changing to shared */ - ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false); - if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) - goto e_term; - } + /* Page validation must be rescinded before changing to shared */ + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(vaddr, paddr, false); /* * Use the MSR protocol because this function can be called before @@ -744,12 +844,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, paddr, GHCB_MSR_PSC_RESP_VAL(val))) goto e_term; - if (op == SNP_PAGE_STATE_PRIVATE) { - /* Page validation must be performed after changing to private */ - ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true); - if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) - goto e_term; - } + /* Page validation must be performed after changing to private */ + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(vaddr, paddr, true); vaddr += PAGE_SIZE; paddr += PAGE_SIZE; @@ -913,22 +1010,49 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end) set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } -static int snp_set_vmsa(void *va, bool vmsa) +static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) { - u64 attrs; + int ret; - /* - * Running at VMPL0 allows the kernel to change the VMSA bit for a page - * using the RMPADJUST instruction. However, for the instruction to - * succeed it must target the permissions of a lesser privileged - * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST - * instruction in the AMD64 APM Volume 3). - */ - attrs = 1; - if (vmsa) - attrs |= RMPADJUST_VMSA_PAGE_BIT; + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + + local_irq_save(flags); - return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + call.caa = this_cpu_read(svsm_caa); + call.rcx = __pa(va); + + if (make_vmsa) { + /* Protocol 0, Call ID 2 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); + call.rdx = __pa(caa); + call.r8 = apic_id; + } else { + /* Protocol 0, Call ID 3 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); + } + + ret = svsm_perform_call_protocol(&call); + + local_irq_restore(flags); + } else { + /* + * If the kernel runs at VMPL0, it can change the VMSA + * bit for a page using the RMPADJUST instruction. + * However, for the instruction to succeed it must + * target the permissions of a lesser privileged (higher + * numbered) VMPL level, so use VMPL1. + */ + u64 attrs = 1; + + if (make_vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + } + + return ret; } #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) @@ -962,11 +1086,11 @@ static void *snp_alloc_vmsa_page(int cpu) return page_address(p + 1); } -static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) { int err; - err = snp_set_vmsa(vmsa, false); + err = snp_set_vmsa(vmsa, NULL, apic_id, false); if (err) pr_err("clear VMSA page failed (%u), leaking page\n", err); else @@ -977,6 +1101,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *cur_vmsa, *vmsa; struct ghcb_state state; + struct svsm_ca *caa; unsigned long flags; struct ghcb *ghcb; u8 sipi_vector; @@ -1023,6 +1148,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) if (!vmsa) return -ENOMEM; + /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ + caa = per_cpu(svsm_caa, cpu); + /* CR4 should maintain the MCE value */ cr4 = native_read_cr4() & X86_CR4_MCE; @@ -1070,11 +1198,11 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) * VMPL level * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) */ - vmsa->vmpl = 0; + vmsa->vmpl = snp_vmpl; vmsa->sev_features = sev_status >> 2; /* Switch the page over to a VMSA page now that it is initialized */ - ret = snp_set_vmsa(vmsa, true); + ret = snp_set_vmsa(vmsa, caa, apic_id, true); if (ret) { pr_err("set VMSA page failed (%u)\n", ret); free_page((unsigned long)vmsa); @@ -1090,7 +1218,10 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) vc_ghcb_invalidate(ghcb); ghcb_set_rax(ghcb, vmsa->sev_features); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); - ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE); + ghcb_set_sw_exit_info_1(ghcb, + ((u64)apic_id << 32) | + ((u64)snp_vmpl << 16) | + SVM_VMGEXIT_AP_CREATE); ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); sev_es_wr_ghcb_msr(__pa(ghcb)); @@ -1108,13 +1239,13 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) /* Perform cleanup if there was an error */ if (ret) { - snp_cleanup_vmsa(vmsa); + snp_cleanup_vmsa(vmsa, apic_id); vmsa = NULL; } /* Free up any previous VMSA page */ if (cur_vmsa) - snp_cleanup_vmsa(cur_vmsa); + snp_cleanup_vmsa(cur_vmsa, apic_id); /* Record the current VMSA page */ per_cpu(sev_vmsa, cpu) = vmsa; @@ -1209,6 +1340,17 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) /* Is it a WRMSR? */ exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; + if (regs->cx == MSR_SVSM_CAA) { + /* Writes to the SVSM CAA msr are ignored */ + if (exit_info_1) + return ES_OK; + + regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); + regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); + + return ES_OK; + } + ghcb_set_rcx(ghcb, regs->cx); if (exit_info_1) { ghcb_set_rax(ghcb, regs->ax); @@ -1346,6 +1488,18 @@ static void __init alloc_runtime_data(int cpu) panic("Can't allocate SEV-ES runtime data"); per_cpu(runtime_data, cpu) = data; + + if (snp_vmpl) { + struct svsm_ca *caa; + + /* Allocate the SVSM CA page if an SVSM is present */ + caa = memblock_alloc(sizeof(*caa), PAGE_SIZE); + if (!caa) + panic("Can't allocate SVSM CA page\n"); + + per_cpu(svsm_caa, cpu) = caa; + per_cpu(svsm_caa_pa, cpu) = __pa(caa); + } } static void __init init_ghcb(int cpu) @@ -1395,6 +1549,32 @@ void __init sev_es_init_vc_handling(void) init_ghcb(cpu); } + /* If running under an SVSM, switch to the per-cpu CA */ + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + int ret; + + local_irq_save(flags); + + /* + * SVSM_CORE_REMAP_CA call: + * RAX = 0 (Protocol=0, CallID=0) + * RCX = New CA GPA + */ + call.caa = svsm_get_caa(); + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); + call.rcx = this_cpu_read(svsm_caa_pa); + ret = svsm_perform_call_protocol(&call); + if (ret) + panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", + ret, call.rax_out); + + sev_cfg.use_cas = true; + + local_irq_restore(flags); + } + sev_es_setup_play_dead(); /* Secondary CPUs use the runtime #VC handler */ @@ -1819,33 +1999,6 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, return result; } -static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) -{ - long error_code = ctxt->fi.error_code; - int trapnr = ctxt->fi.vector; - - ctxt->regs->orig_ax = ctxt->fi.error_code; - - switch (trapnr) { - case X86_TRAP_GP: - exc_general_protection(ctxt->regs, error_code); - break; - case X86_TRAP_UD: - exc_invalid_op(ctxt->regs); - break; - case X86_TRAP_PF: - write_cr2(ctxt->fi.cr2); - exc_page_fault(ctxt->regs, error_code); - break; - case X86_TRAP_AC: - exc_alignment_check(ctxt->regs, error_code); - break; - default: - pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); - BUG(); - } -} - static __always_inline bool is_vc2_stack(unsigned long sp) { return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); @@ -2095,6 +2248,47 @@ found_cc_info: return cc_info; } +static __head void svsm_setup(struct cc_blob_sev_info *cc_info) +{ + struct svsm_call call = {}; + int ret; + u64 pa; + + /* + * Record the SVSM Calling Area address (CAA) if the guest is not + * running at VMPL0. The CA will be used to communicate with the + * SVSM to perform the SVSM services. + */ + if (!svsm_setup_ca(cc_info)) + return; + + /* + * It is very early in the boot and the kernel is running identity + * mapped but without having adjusted the pagetables to where the + * kernel was loaded (physbase), so the get the CA address using + * RIP-relative addressing. + */ + pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); + + /* + * Switch over to the boot SVSM CA while the current CA is still + * addressable. There is no GHCB at this point so use the MSR protocol. + * + * SVSM_CORE_REMAP_CA call: + * RAX = 0 (Protocol=0, CallID=0) + * RCX = New CA GPA + */ + call.caa = svsm_get_caa(); + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); + call.rcx = pa; + ret = svsm_perform_call_protocol(&call); + if (ret) + panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", ret, call.rax_out); + + RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; + RIP_REL_REF(boot_svsm_caa_pa) = pa; +} + bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2108,6 +2302,8 @@ bool __head snp_init(struct boot_params *bp) setup_cpuid_table(cc_info); + svsm_setup(cc_info); + /* * The CC blob will be used later to access the secrets page. Cache * it here like the boot kernel does. @@ -2156,23 +2352,27 @@ static void dump_cpuid_table(void) * expected, but that initialization happens too early in boot to print any * sort of indicator, and there's not really any other good place to do it, * so do it here. + * + * If running as an SNP guest, report the current VM privilege level (VMPL). */ -static int __init report_cpuid_table(void) +static int __init report_snp_info(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); - if (!cpuid_table->count) - return 0; + if (cpuid_table->count) { + pr_info("Using SNP CPUID table, %d entries present.\n", + cpuid_table->count); - pr_info("Using SNP CPUID table, %d entries present.\n", - cpuid_table->count); + if (sev_cfg.debug) + dump_cpuid_table(); + } - if (sev_cfg.debug) - dump_cpuid_table(); + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + pr_info("SNP running at VMPL%u.\n", snp_vmpl); return 0; } -arch_initcall(report_cpuid_table); +arch_initcall(report_snp_info); static int __init init_sev_config(char *str) { @@ -2191,6 +2391,56 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); +static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) +{ + /* If (new) lengths have been returned, propagate them up */ + if (call->rcx_out != call->rcx) + input->manifest_buf.len = call->rcx_out; + + if (call->rdx_out != call->rdx) + input->certificates_buf.len = call->rdx_out; + + if (call->r8_out != call->r8) + input->report_buf.len = call->r8_out; +} + +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, + struct svsm_attest_call *input) +{ + struct svsm_attest_call *ac; + unsigned long flags; + u64 attest_call_pa; + int ret; + + if (!snp_vmpl) + return -EINVAL; + + local_irq_save(flags); + + call->caa = svsm_get_caa(); + + ac = (struct svsm_attest_call *)call->caa->svsm_buffer; + attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + *ac = *input; + + /* + * Set input registers for the request and set RDX and R8 to known + * values in order to detect length values being returned in them. + */ + call->rax = call_id; + call->rcx = attest_call_pa; + call->rdx = -1; + call->r8 = -1; + ret = svsm_perform_call_protocol(call); + update_attest_input(call, input); + + local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); + int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; @@ -2299,3 +2549,58 @@ void sev_show_status(void) } pr_cont("\n"); } + +void __init snp_update_svsm_ca(void) +{ + if (!snp_vmpl) + return; + + /* Update the CAA to a proper kernel address */ + boot_svsm_caa = &boot_svsm_ca_page; +} + +#ifdef CONFIG_SYSFS +static ssize_t vmpl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", snp_vmpl); +} + +static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); + +static struct attribute *vmpl_attrs[] = { + &vmpl_attr.attr, + NULL +}; + +static struct attribute_group sev_attr_group = { + .attrs = vmpl_attrs, +}; + +static int __init sev_sysfs_init(void) +{ + struct kobject *sev_kobj; + struct device *dev_root; + int ret; + + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return -ENODEV; + + dev_root = bus_get_dev_root(&cpu_subsys); + if (!dev_root) + return -ENODEV; + + sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); + put_device(dev_root); + + if (!sev_kobj) + return -ENOMEM; + + ret = sysfs_create_group(sev_kobj, &sev_attr_group); + if (ret) + kobject_put(sev_kobj); + + return ret; +} +arch_initcall(sev_sysfs_init); +#endif // CONFIG_SYSFS diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/coco/sev/shared.c index b4f8fa0f722c..71de53194089 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/coco/sev/shared.c @@ -21,8 +21,30 @@ #define WARN(condition, format...) (!!(condition)) #define sev_printk(fmt, ...) #define sev_printk_rtl(fmt, ...) +#undef vc_forward_exception +#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n") #endif +/* + * SVSM related information: + * When running under an SVSM, the VMPL that Linux is executing at must be + * non-zero. The VMPL is therefore used to indicate the presence of an SVSM. + * + * During boot, the page tables are set up as identity mapped and later + * changed to use kernel virtual addresses. Maintain separate virtual and + * physical addresses for the CAA to allow SVSM functions to be used during + * early boot, both with identity mapped virtual addresses and proper kernel + * virtual addresses. + */ +u8 snp_vmpl __ro_after_init; +EXPORT_SYMBOL_GPL(snp_vmpl); +static struct svsm_ca *boot_svsm_caa __ro_after_init; +static u64 boot_svsm_caa_pa __ro_after_init; + +static struct svsm_ca *svsm_get_caa(void); +static u64 svsm_get_caa_pa(void); +static int svsm_perform_call_protocol(struct svsm_call *call); + /* I/O parameters for CPUID-related helpers */ struct cpuid_leaf { u32 fn; @@ -229,6 +251,126 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt return ES_VMM_ERROR; } +static inline int svsm_process_result_codes(struct svsm_call *call) +{ + switch (call->rax_out) { + case SVSM_SUCCESS: + return 0; + case SVSM_ERR_INCOMPLETE: + case SVSM_ERR_BUSY: + return -EAGAIN; + default: + return -EINVAL; + } +} + +/* + * Issue a VMGEXIT to call the SVSM: + * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9) + * - Set the CA call pending field to 1 + * - Issue VMGEXIT + * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9) + * - Perform atomic exchange of the CA call pending field + * + * - See the "Secure VM Service Module for SEV-SNP Guests" specification for + * details on the calling convention. + * - The calling convention loosely follows the Microsoft X64 calling + * convention by putting arguments in RCX, RDX, R8 and R9. + * - RAX specifies the SVSM protocol/callid as input and the return code + * as output. + */ +static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending) +{ + register unsigned long rax asm("rax") = call->rax; + register unsigned long rcx asm("rcx") = call->rcx; + register unsigned long rdx asm("rdx") = call->rdx; + register unsigned long r8 asm("r8") = call->r8; + register unsigned long r9 asm("r9") = call->r9; + + call->caa->call_pending = 1; + + asm volatile("rep; vmmcall\n\t" + : "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9) + : : "memory"); + + *pending = xchg(&call->caa->call_pending, *pending); + + call->rax_out = rax; + call->rcx_out = rcx; + call->rdx_out = rdx; + call->r8_out = r8; + call->r9_out = r9; +} + +static int svsm_perform_msr_protocol(struct svsm_call *call) +{ + u8 pending = 0; + u64 val, resp; + + /* + * When using the MSR protocol, be sure to save and restore + * the current MSR value. + */ + val = sev_es_rd_ghcb_msr(); + + sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0)); + + svsm_issue_call(call, &pending); + + resp = sev_es_rd_ghcb_msr(); + + sev_es_wr_ghcb_msr(val); + + if (pending) + return -EINVAL; + + if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP) + return -EINVAL; + + if (GHCB_MSR_VMPL_RESP_VAL(resp)) + return -EINVAL; + + return svsm_process_result_codes(call); +} + +static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) +{ + struct es_em_ctxt ctxt; + u8 pending = 0; + + vc_ghcb_invalidate(ghcb); + + /* + * Fill in protocol and format specifiers. This can be called very early + * in the boot, so use rip-relative references as needed. + */ + ghcb->protocol_version = RIP_REL_REF(ghcb_version); + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + + svsm_issue_call(call, &pending); + + if (pending) + return -EINVAL; + + switch (verify_exception_info(ghcb, &ctxt)) { + case ES_OK: + break; + case ES_EXCEPTION: + vc_forward_exception(&ctxt); + fallthrough; + default: + return -EINVAL; + } + + return svsm_process_result_codes(call); +} + static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, struct es_em_ctxt *ctxt, u64 exit_code, u64 exit_info_1, @@ -1079,38 +1221,268 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) } } -static void pvalidate_pages(struct snp_psc_desc *desc) +static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size, + int ret, u64 svsm_ret) +{ + WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n", + pfn, action, page_size, ret, svsm_ret); + + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); +} + +static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret) +{ + unsigned int page_size; + bool action; + u64 pfn; + + pfn = pc->entry[pc->cur_index].pfn; + action = pc->entry[pc->cur_index].action; + page_size = pc->entry[pc->cur_index].page_size; + + __pval_terminate(pfn, action, page_size, ret, svsm_ret); +} + +static void svsm_pval_4k_page(unsigned long paddr, bool validate) +{ + struct svsm_pvalidate_call *pc; + struct svsm_call call = {}; + unsigned long flags; + u64 pc_pa; + int ret; + + /* + * This can be called very early in the boot, use native functions in + * order to avoid paravirt issues. + */ + flags = native_local_irq_save(); + + call.caa = svsm_get_caa(); + + pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; + pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + pc->num_entries = 1; + pc->cur_index = 0; + pc->entry[0].page_size = RMP_PG_SIZE_4K; + pc->entry[0].action = validate; + pc->entry[0].ignore_cf = 0; + pc->entry[0].pfn = paddr >> PAGE_SHIFT; + + /* Protocol 0, Call ID 1 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); + call.rcx = pc_pa; + + ret = svsm_perform_call_protocol(&call); + if (ret) + svsm_pval_terminate(pc, ret, call.rax_out); + + native_local_irq_restore(flags); +} + +static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, bool validate) +{ + int ret; + + /* + * This can be called very early during boot, so use rIP-relative + * references as needed. + */ + if (RIP_REL_REF(snp_vmpl)) { + svsm_pval_4k_page(paddr, validate); + } else { + ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); + if (ret) + __pval_terminate(PHYS_PFN(paddr), validate, RMP_PG_SIZE_4K, ret, 0); + } +} + +static void pval_pages(struct snp_psc_desc *desc) { struct psc_entry *e; unsigned long vaddr; unsigned int size; unsigned int i; bool validate; + u64 pfn; int rc; for (i = 0; i <= desc->hdr.end_entry; i++) { e = &desc->entries[i]; - vaddr = (unsigned long)pfn_to_kaddr(e->gfn); + pfn = e->gfn; + vaddr = (unsigned long)pfn_to_kaddr(pfn); size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; validate = e->operation == SNP_PAGE_STATE_PRIVATE; rc = pvalidate(vaddr, size, validate); + if (!rc) + continue; + if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { unsigned long vaddr_end = vaddr + PMD_SIZE; - for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) { + for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) { rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); if (rc) - break; + __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0); } + } else { + __pval_terminate(pfn, validate, size, rc, 0); } + } +} + +static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, + struct svsm_pvalidate_call *pc) +{ + struct svsm_pvalidate_entry *pe; + + /* Nothing in the CA yet */ + pc->num_entries = 0; + pc->cur_index = 0; + + pe = &pc->entry[0]; + + while (pfn < pfn_end) { + pe->page_size = RMP_PG_SIZE_4K; + pe->action = action; + pe->ignore_cf = 0; + pe->pfn = pfn; + + pe++; + pfn++; + + pc->num_entries++; + if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) + break; + } + + return pfn; +} + +static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry, + struct svsm_pvalidate_call *pc) +{ + struct svsm_pvalidate_entry *pe; + struct psc_entry *e; + + /* Nothing in the CA yet */ + pc->num_entries = 0; + pc->cur_index = 0; + + pe = &pc->entry[0]; + e = &desc->entries[desc_entry]; + + while (desc_entry <= desc->hdr.end_entry) { + pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; + pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; + pe->ignore_cf = 0; + pe->pfn = e->gfn; + + pe++; + e++; + + desc_entry++; + pc->num_entries++; + if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) + break; + } + + return desc_entry; +} + +static void svsm_pval_pages(struct snp_psc_desc *desc) +{ + struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY]; + unsigned int i, pv_4k_count = 0; + struct svsm_pvalidate_call *pc; + struct svsm_call call = {}; + unsigned long flags; + bool action; + u64 pc_pa; + int ret; + + /* + * This can be called very early in the boot, use native functions in + * order to avoid paravirt issues. + */ + flags = native_local_irq_save(); + + /* + * The SVSM calling area (CA) can support processing 510 entries at a + * time. Loop through the Page State Change descriptor until the CA is + * full or the last entry in the descriptor is reached, at which time + * the SVSM is invoked. This repeats until all entries in the descriptor + * are processed. + */ + call.caa = svsm_get_caa(); + + pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; + pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + /* Protocol 0, Call ID 1 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); + call.rcx = pc_pa; + + for (i = 0; i <= desc->hdr.end_entry;) { + i = svsm_build_ca_from_psc_desc(desc, i, pc); + + do { + ret = svsm_perform_call_protocol(&call); + if (!ret) + continue; + + /* + * Check if the entry failed because of an RMP mismatch (a + * PVALIDATE at 2M was requested, but the page is mapped in + * the RMP as 4K). + */ - if (rc) { - WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc); - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH && + pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) { + /* Save this entry for post-processing at 4K */ + pv_4k[pv_4k_count++] = pc->entry[pc->cur_index]; + + /* Skip to the next one unless at the end of the list */ + pc->cur_index++; + if (pc->cur_index < pc->num_entries) + ret = -EAGAIN; + else + ret = 0; + } + } while (ret == -EAGAIN); + + if (ret) + svsm_pval_terminate(pc, ret, call.rax_out); + } + + /* Process any entries that failed to be validated at 2M and validate them at 4K */ + for (i = 0; i < pv_4k_count; i++) { + u64 pfn, pfn_end; + + action = pv_4k[i].action; + pfn = pv_4k[i].pfn; + pfn_end = pfn + 512; + + while (pfn < pfn_end) { + pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc); + + ret = svsm_perform_call_protocol(&call); + if (ret) + svsm_pval_terminate(pc, ret, call.rax_out); } } + + native_local_irq_restore(flags); +} + +static void pvalidate_pages(struct snp_psc_desc *desc) +{ + if (snp_vmpl) + svsm_pval_pages(desc); + else + pval_pages(desc); } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) @@ -1269,3 +1641,77 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, return ES_UNSUPPORTED; } + +/* + * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM + * services needed when not running in VMPL0. + */ +static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) +{ + struct snp_secrets_page *secrets_page; + struct snp_cpuid_table *cpuid_table; + unsigned int i; + u64 caa; + + BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE); + + /* + * Check if running at VMPL0. + * + * Use RMPADJUST (see the rmpadjust() function for a description of what + * the instruction does) to update the VMPL1 permissions of a page. If + * the guest is running at VMPL0, this will succeed and implies there is + * no SVSM. If the guest is running at any other VMPL, this will fail. + * Linux SNP guests only ever run at a single VMPL level so permission mask + * changes of a lesser-privileged VMPL are a don't-care. + * + * Use a rip-relative reference to obtain the proper address, since this + * routine is running identity mapped when called, both by the decompressor + * code and the early kernel code. + */ + if (!rmpadjust((unsigned long)&RIP_REL_REF(boot_ghcb_page), RMP_PG_SIZE_4K, 1)) + return false; + + /* + * Not running at VMPL0, ensure everything has been properly supplied + * for running under an SVSM. + */ + if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE); + + secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys; + if (!secrets_page->svsm_size) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM); + + if (!secrets_page->svsm_guest_vmpl) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0); + + RIP_REL_REF(snp_vmpl) = secrets_page->svsm_guest_vmpl; + + caa = secrets_page->svsm_caa; + + /* + * An open-coded PAGE_ALIGNED() in order to avoid including + * kernel-proper headers into the decompressor. + */ + if (caa & (PAGE_SIZE - 1)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA); + + /* + * The CA is identity mapped when this routine is called, both by the + * decompressor code and the early kernel code. + */ + RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)caa; + RIP_REL_REF(boot_svsm_caa_pa) = caa; + + /* Advertise the SVSM presence via CPUID. */ + cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table(); + for (i = 0; i < cpuid_table->count; i++) { + struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; + + if (fn->eax_in == 0x8000001f) + fn->eax |= BIT(28); + } + + return true; +} diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index c1cb90369915..078e2bac2553 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -7,6 +7,7 @@ #include <linux/cpufeature.h> #include <linux/export.h> #include <linux/io.h> +#include <linux/kexec.h> #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> @@ -14,6 +15,7 @@ #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/pgtable.h> +#include <asm/set_memory.h> /* MMIO direction */ #define EPT_READ 0 @@ -38,6 +40,8 @@ #define TDREPORT_SUBTYPE_0 0 +static atomic_long_t nr_shared; + /* Called from __tdx_hypercall() for unrecoverable failure */ noinstr void __noreturn __tdx_hypercall_failed(void) { @@ -798,28 +802,124 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) return true; } -static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, - bool enc) +static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) { /* * Only handle shared->private conversion here. * See the comment in tdx_early_init(). */ - if (enc) - return tdx_enc_status_changed(vaddr, numpages, enc); - return true; + if (enc && !tdx_enc_status_changed(vaddr, numpages, enc)) + return -EIO; + + return 0; } -static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, +static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, bool enc) { /* * Only handle private->shared conversion here. * See the comment in tdx_early_init(). */ - if (!enc) - return tdx_enc_status_changed(vaddr, numpages, enc); - return true; + if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc)) + return -EIO; + + if (enc) + atomic_long_sub(numpages, &nr_shared); + else + atomic_long_add(numpages, &nr_shared); + + return 0; +} + +/* Stop new private<->shared conversions */ +static void tdx_kexec_begin(void) +{ + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* + * Crash kernel reaches here with interrupts disabled: can't wait for + * conversions to finish. + * + * If race happened, just report and proceed. + */ + if (!set_memory_enc_stop_conversion()) + pr_warn("Failed to stop shared<->private conversions\n"); +} + +/* Walk direct mapping and convert all shared memory back to private */ +static void tdx_kexec_finish(void) +{ + unsigned long addr, end; + long found = 0, shared; + + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + lockdep_assert_irqs_disabled(); + + addr = PAGE_OFFSET; + end = PAGE_OFFSET + get_max_mapped(); + + while (addr < end) { + unsigned long size; + unsigned int level; + pte_t *pte; + + pte = lookup_address(addr, &level); + size = page_level_size(level); + + if (pte && pte_decrypted(*pte)) { + int pages = size / PAGE_SIZE; + + /* + * Touching memory with shared bit set triggers implicit + * conversion to shared. + * + * Make sure nobody touches the shared range from + * now on. + */ + set_pte(pte, __pte(0)); + + /* + * Memory encryption state persists across kexec. + * If tdx_enc_status_changed() fails in the first + * kernel, it leaves memory in an unknown state. + * + * If that memory remains shared, accessing it in the + * *next* kernel through a private mapping will result + * in an unrecoverable guest shutdown. + * + * The kdump kernel boot is not impacted as it uses + * a pre-reserved memory range that is always private. + * However, gathering crash information could lead to + * a crash if it accesses unconverted memory through + * a private mapping which is possible when accessing + * that memory through /proc/vmcore, for example. + * + * In all cases, print error info in order to leave + * enough bread crumbs for debugging. + */ + if (!tdx_enc_status_changed(addr, pages, true)) { + pr_err("Failed to unshare range %#lx-%#lx\n", + addr, addr + size); + } + + found += pages; + } + + addr += size; + } + + __flush_tlb_all(); + + shared = atomic_long_read(&nr_shared); + if (shared != found) { + pr_err("shared page accounting is off\n"); + pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found); + } } void __init tdx_early_init(void) @@ -881,6 +981,9 @@ void __init tdx_early_init(void) x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; + x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 11c9b8efdc4c..ed0a5f2dc129 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -89,10 +89,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld - IBRS_ENTER - UNTRAIN_RET - CLEAR_BRANCH_HISTORY - /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -116,6 +112,16 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: + /* + * CPU bugs mitigations mechanisms can call other functions. They + * should be invoked after making sure TF is cleared because + * single-step is ignored only for instructions inside the + * entry_SYSENTER_compat function. + */ + IBRS_ENTER + UNTRAIN_RET + CLEAR_BRANCH_HISTORY + movq %rsp, %rdi call do_SYSENTER_32 jmp sysret32_from_system_call diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index c2235bae17ef..8cc9950d7104 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -14,9 +14,12 @@ #endif #define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *); - +#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __ia32_##sym(const struct pt_regs *); #include <asm/syscalls_32.h> -#undef __SYSCALL +#undef __SYSCALL + +#undef __SYSCALL_NORETURN +#define __SYSCALL_NORETURN __SYSCALL /* * The sys_call_table[] is no longer used for system calls, but @@ -28,11 +31,10 @@ const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_32.h> }; -#undef __SYSCALL +#undef __SYSCALL #endif #define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); - long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) { switch (nr) { diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 33b3f09e6f15..ba8354424860 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -8,8 +8,12 @@ #include <asm/syscall.h> #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); +#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *); #include <asm/syscalls_64.h> -#undef __SYSCALL +#undef __SYSCALL + +#undef __SYSCALL_NORETURN +#define __SYSCALL_NORETURN __SYSCALL /* * The sys_call_table[] is no longer used for system calls, but @@ -20,10 +24,9 @@ const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_64.h> }; -#undef __SYSCALL +#undef __SYSCALL #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); - long x64_sys_call(const struct pt_regs *regs, unsigned int nr) { switch (nr) { diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index 03de4a932131..fb77908f44f3 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -8,11 +8,14 @@ #include <asm/syscall.h> #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); +#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *); #include <asm/syscalls_x32.h> -#undef __SYSCALL +#undef __SYSCALL -#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); +#undef __SYSCALL_NORETURN +#define __SYSCALL_NORETURN __SYSCALL +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); long x32_sys_call(const struct pt_regs *regs, unsigned int nr) { switch (nr) { diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 7fd1f57ad3d3..534c74b14fab 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 32-bit system call numbers and entry vectors # # The format is: -# <number> <abi> <name> <entry point> <compat entry point> +# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]] # # The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for # sys_*() system calls and compat_sys_*() compat system calls if @@ -12,7 +13,7 @@ # The abi is always "i386" for this file. # 0 i386 restart_syscall sys_restart_syscall -1 i386 exit sys_exit +1 i386 exit sys_exit - noreturn 2 i386 fork sys_fork 3 i386 read sys_read 4 i386 write sys_write @@ -263,7 +264,7 @@ 249 i386 io_cancel sys_io_cancel 250 i386 fadvise64 sys_ia32_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) -252 i386 exit_group sys_exit_group +252 i386 exit_group sys_exit_group - noreturn 253 i386 lookup_dcookie 254 i386 epoll_create sys_epoll_create 255 i386 epoll_ctl sys_epoll_ctl @@ -420,7 +421,7 @@ 412 i386 utimensat_time64 sys_utimensat 413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 i386 io_pgetevents_time64 sys_io_pgetevents +416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 i386 mq_timedsend_time64 sys_mq_timedsend 419 i386 mq_timedreceive_time64 sys_mq_timedreceive diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..097e5a18db52 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# <number> <abi> <name> <entry point> +# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 38c1b1f1deaa..0e835dc134a5 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4698,8 +4698,8 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); static inline bool intel_pmu_broken_perf_cap(void) { /* The Perf Metric (Bit 15) is always cleared */ - if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) || - (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L)) + if (boot_cpu_data.x86_vfm == INTEL_METEORLAKE || + boot_cpu_data.x86_vfm == INTEL_METEORLAKE_L) return true; return false; @@ -5187,35 +5187,35 @@ static __init void intel_clovertown_quirk(void) } static const struct x86_cpu_desc isolation_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e), + INTEL_CPU_DESC(INTEL_HASWELL, 3, 0x0000001f), + INTEL_CPU_DESC(INTEL_HASWELL_L, 1, 0x0000001e), + INTEL_CPU_DESC(INTEL_HASWELL_G, 1, 0x00000015), + INTEL_CPU_DESC(INTEL_HASWELL_X, 2, 0x00000037), + INTEL_CPU_DESC(INTEL_HASWELL_X, 4, 0x0000000a), + INTEL_CPU_DESC(INTEL_BROADWELL, 4, 0x00000023), + INTEL_CPU_DESC(INTEL_BROADWELL_G, 1, 0x00000014), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 2, 0x00000010), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 3, 0x07000009), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 4, 0x0f000009), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 5, 0x0e000002), + INTEL_CPU_DESC(INTEL_BROADWELL_X, 1, 0x0b000014), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 3, 0x00000021), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 11, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_L, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_SKYLAKE, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_KABYLAKE, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 13, 0x0000004e), {} }; @@ -5232,9 +5232,9 @@ static __init void intel_pebs_isolation_quirk(void) } static const struct x86_cpu_desc pebs_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618), - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c), + INTEL_CPU_DESC(INTEL_SANDYBRIDGE, 7, 0x00000028), + INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 6, 0x00000618), + INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 7, 0x0000070c), {} }; @@ -6238,19 +6238,19 @@ __init int intel_pmu_init(void) /* * Install the hw-cache-events table: */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_CORE_YONAH: + switch (boot_cpu_data.x86_vfm) { + case INTEL_CORE_YONAH: pr_cont("Core events, "); name = "core"; break; - case INTEL_FAM6_CORE2_MEROM: + case INTEL_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); fallthrough; - case INTEL_FAM6_CORE2_MEROM_L: - case INTEL_FAM6_CORE2_PENRYN: - case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_CORE2_MEROM_L: + case INTEL_CORE2_PENRYN: + case INTEL_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6262,9 +6262,9 @@ __init int intel_pmu_init(void) name = "core2"; break; - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_NEHALEM_EP: - case INTEL_FAM6_NEHALEM_EX: + case INTEL_NEHALEM: + case INTEL_NEHALEM_EP: + case INTEL_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -6296,11 +6296,11 @@ __init int intel_pmu_init(void) name = "nehalem"; break; - case INTEL_FAM6_ATOM_BONNELL: - case INTEL_FAM6_ATOM_BONNELL_MID: - case INTEL_FAM6_ATOM_SALTWELL: - case INTEL_FAM6_ATOM_SALTWELL_MID: - case INTEL_FAM6_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_BONNELL: + case INTEL_ATOM_BONNELL_MID: + case INTEL_ATOM_SALTWELL: + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6313,11 +6313,11 @@ __init int intel_pmu_init(void) name = "bonnell"; break; - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_D: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT: - case INTEL_FAM6_ATOM_AIRMONT_MID: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_SILVERMONT_D: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_AIRMONT_MID: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -6335,8 +6335,8 @@ __init int intel_pmu_init(void) name = "silvermont"; break; - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_D: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -6362,7 +6362,7 @@ __init int intel_pmu_init(void) name = "goldmont"; break; - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_ATOM_GOLDMONT_PLUS: memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, @@ -6391,9 +6391,9 @@ __init int intel_pmu_init(void) name = "goldmont_plus"; break; - case INTEL_FAM6_ATOM_TREMONT_D: - case INTEL_FAM6_ATOM_TREMONT: - case INTEL_FAM6_ATOM_TREMONT_L: + case INTEL_ATOM_TREMONT_D: + case INTEL_ATOM_TREMONT: + case INTEL_ATOM_TREMONT_L: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6420,7 +6420,7 @@ __init int intel_pmu_init(void) name = "Tremont"; break; - case INTEL_FAM6_ATOM_GRACEMONT: + case INTEL_ATOM_GRACEMONT: intel_pmu_init_grt(NULL); intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; @@ -6432,8 +6432,8 @@ __init int intel_pmu_init(void) name = "gracemont"; break; - case INTEL_FAM6_ATOM_CRESTMONT: - case INTEL_FAM6_ATOM_CRESTMONT_X: + case INTEL_ATOM_CRESTMONT: + case INTEL_ATOM_CRESTMONT_X: intel_pmu_init_grt(NULL); x86_pmu.extra_regs = intel_cmt_extra_regs; intel_pmu_pebs_data_source_cmt(); @@ -6446,9 +6446,9 @@ __init int intel_pmu_init(void) name = "crestmont"; break; - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_WESTMERE_EP: - case INTEL_FAM6_WESTMERE_EX: + case INTEL_WESTMERE: + case INTEL_WESTMERE_EP: + case INTEL_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -6477,8 +6477,8 @@ __init int intel_pmu_init(void) name = "westmere"; break; - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_SANDYBRIDGE: + case INTEL_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -6491,7 +6491,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) + if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -6517,8 +6517,8 @@ __init int intel_pmu_init(void) name = "sandybridge"; break; - case INTEL_FAM6_IVYBRIDGE: - case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_IVYBRIDGE: + case INTEL_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6534,7 +6534,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) + if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -6556,10 +6556,10 @@ __init int intel_pmu_init(void) break; - case INTEL_FAM6_HASWELL: - case INTEL_FAM6_HASWELL_X: - case INTEL_FAM6_HASWELL_L: - case INTEL_FAM6_HASWELL_G: + case INTEL_HASWELL: + case INTEL_HASWELL_X: + case INTEL_HASWELL_L: + case INTEL_HASWELL_G: x86_add_quirk(intel_ht_bug); x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; @@ -6589,10 +6589,10 @@ __init int intel_pmu_init(void) name = "haswell"; break; - case INTEL_FAM6_BROADWELL: - case INTEL_FAM6_BROADWELL_D: - case INTEL_FAM6_BROADWELL_G: - case INTEL_FAM6_BROADWELL_X: + case INTEL_BROADWELL: + case INTEL_BROADWELL_D: + case INTEL_BROADWELL_G: + case INTEL_BROADWELL_X: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6631,8 +6631,8 @@ __init int intel_pmu_init(void) name = "broadwell"; break; - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: + case INTEL_XEON_PHI_KNL: + case INTEL_XEON_PHI_KNM: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -6651,15 +6651,15 @@ __init int intel_pmu_init(void) name = "knights-landing"; break; - case INTEL_FAM6_SKYLAKE_X: + case INTEL_SKYLAKE_X: pmem = true; fallthrough; - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_COMETLAKE_L: - case INTEL_FAM6_COMETLAKE: + case INTEL_SKYLAKE_L: + case INTEL_SKYLAKE: + case INTEL_KABYLAKE_L: + case INTEL_KABYLAKE: + case INTEL_COMETLAKE_L: + case INTEL_COMETLAKE: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6708,16 +6708,16 @@ __init int intel_pmu_init(void) name = "skylake"; break; - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: + case INTEL_ICELAKE_X: + case INTEL_ICELAKE_D: x86_pmu.pebs_ept = 1; pmem = true; fallthrough; - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_ROCKETLAKE: + case INTEL_ICELAKE_L: + case INTEL_ICELAKE: + case INTEL_TIGERLAKE_L: + case INTEL_TIGERLAKE: + case INTEL_ROCKETLAKE: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -6752,13 +6752,13 @@ __init int intel_pmu_init(void) name = "icelake"; break; - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_EMERALDRAPIDS_X: + case INTEL_SAPPHIRERAPIDS_X: + case INTEL_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.extra_regs = intel_glc_extra_regs; fallthrough; - case INTEL_FAM6_GRANITERAPIDS_X: - case INTEL_FAM6_GRANITERAPIDS_D: + case INTEL_GRANITERAPIDS_X: + case INTEL_GRANITERAPIDS_D: intel_pmu_init_glc(NULL); if (!x86_pmu.extra_regs) x86_pmu.extra_regs = intel_rwc_extra_regs; @@ -6776,11 +6776,11 @@ __init int intel_pmu_init(void) name = "sapphire_rapids"; break; - case INTEL_FAM6_ALDERLAKE: - case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_RAPTORLAKE: - case INTEL_FAM6_RAPTORLAKE_P: - case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_ALDERLAKE: + case INTEL_ALDERLAKE_L: + case INTEL_RAPTORLAKE: + case INTEL_RAPTORLAKE_P: + case INTEL_RAPTORLAKE_S: /* * Alder Lake has 2 types of CPU, core and atom. * @@ -6838,8 +6838,8 @@ __init int intel_pmu_init(void) name = "alderlake_hybrid"; break; - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: + case INTEL_METEORLAKE: + case INTEL_METEORLAKE_L: intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = mtl_latency_data_small; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 0c5e7a7c43ac..b985ca79cf97 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -765,51 +765,51 @@ static struct rapl_model model_amd_hygon = { }; static const struct x86_cpu_id rapl_model_match[] __initconst = { - X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), - X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE_H, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &model_skl), + X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), + X86_MATCH_VFM(INTEL_SANDYBRIDGE, &model_snb), + X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &model_snbep), + X86_MATCH_VFM(INTEL_IVYBRIDGE, &model_snb), + X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &model_snbep), + X86_MATCH_VFM(INTEL_HASWELL, &model_hsw), + X86_MATCH_VFM(INTEL_HASWELL_X, &model_hsx), + X86_MATCH_VFM(INTEL_HASWELL_L, &model_hsw), + X86_MATCH_VFM(INTEL_HASWELL_G, &model_hsw), + X86_MATCH_VFM(INTEL_BROADWELL, &model_hsw), + X86_MATCH_VFM(INTEL_BROADWELL_G, &model_hsw), + X86_MATCH_VFM(INTEL_BROADWELL_X, &model_hsx), + X86_MATCH_VFM(INTEL_BROADWELL_D, &model_hsx), + X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &model_knl), + X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &model_knl), + X86_MATCH_VFM(INTEL_SKYLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_SKYLAKE, &model_skl), + X86_MATCH_VFM(INTEL_SKYLAKE_X, &model_hsx), + X86_MATCH_VFM(INTEL_KABYLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_KABYLAKE, &model_skl), + X86_MATCH_VFM(INTEL_CANNONLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &model_hsw), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &model_hsw), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &model_hsw), + X86_MATCH_VFM(INTEL_ICELAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ICELAKE, &model_skl), + X86_MATCH_VFM(INTEL_ICELAKE_D, &model_hsx), + X86_MATCH_VFM(INTEL_ICELAKE_X, &model_hsx), + X86_MATCH_VFM(INTEL_COMETLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_COMETLAKE, &model_skl), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_TIGERLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ALDERLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &model_skl), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &model_spr), + X86_MATCH_VFM(INTEL_RAPTORLAKE, &model_skl), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &model_skl), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &model_skl), + X86_MATCH_VFM(INTEL_METEORLAKE, &model_skl), + X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 768d73de0d09..b4a851d27c7c 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -523,9 +523,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], * transition is complete, hv_vtom_set_host_visibility() marks the pages * as "present" again. */ -static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) +static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) { - return !set_memory_np(kbuffer, pagecount); + return set_memory_np(kbuffer, pagecount); } /* @@ -536,20 +536,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc * with host. This function works as wrap of hv_mark_gpa_visibility() * with memory base and size. */ -static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) +static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) { enum hv_mem_host_visibility visibility = enc ? VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; phys_addr_t paddr; + int i, pfn, err; void *vaddr; int ret = 0; - bool result = true; - int i, pfn; pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!pfn_array) { - result = false; + ret = -ENOMEM; goto err_set_memory_p; } @@ -568,10 +567,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { ret = hv_mark_gpa_visibility(pfn, pfn_array, visibility); - if (ret) { - result = false; + if (ret) goto err_free_pfn_array; - } pfn = 0; } } @@ -586,10 +583,11 @@ err_set_memory_p: * order to avoid leaving the memory range in a "broken" state. Setting * the PRESENT bits shouldn't fail, but return an error if it does. */ - if (set_memory_p(kbuffer, pagecount)) - result = false; + err = set_memory_p(kbuffer, pagecount); + if (err && !ret) + ret = err; - return result; + return ret; } static bool hv_vtom_tlb_flush_required(bool private) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5af926c050f0..21bc53f5ed0c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -78,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void) #define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address +union acpi_subtable_headers; + +int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, + const unsigned long end); + +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa); + /* * Check if the CPU can handle C2 and deeper */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ba99ef75f56c..ca9ae606aab9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -156,102 +156,50 @@ static inline int alternatives_text_reserved(void *start, void *end) #define ALT_CALL_INSTR "call BUG_func" -#define b_replacement(num) "664"#num -#define e_replacement(num) "665"#num +#define alt_slen "772b-771b" +#define alt_total_slen "773b-771b" +#define alt_rlen "775f-774f" -#define alt_end_marker "663" -#define alt_slen "662b-661b" -#define alt_total_slen alt_end_marker"b-661b" -#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" - -#define OLDINSTR(oldinstr, num) \ - "# ALT: oldnstr\n" \ - "661:\n\t" oldinstr "\n662:\n" \ +#define OLDINSTR(oldinstr) \ + "# ALT: oldinstr\n" \ + "771:\n\t" oldinstr "\n772:\n" \ "# ALT: padding\n" \ - ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ - "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \ - alt_end_marker ":\n" - -/* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" - -/* - * Pad the second replacement alternative with additional NOPs if it is - * additionally longer than the first replacement alternative. - */ -#define OLDINSTR_2(oldinstr, num1, num2) \ - "# ALT: oldinstr2\n" \ - "661:\n\t" oldinstr "\n662:\n" \ - "# ALT: padding2\n" \ - ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ - "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ - alt_end_marker ":\n" - -#define OLDINSTR_3(oldinsn, n1, n2, n3) \ - "# ALT: oldinstr3\n" \ - "661:\n\t" oldinsn "\n662:\n" \ - "# ALT: padding3\n" \ - ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ - " - (" alt_slen ")) > 0) * " \ - "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ - " - (" alt_slen ")), 0x90\n" \ - alt_end_marker ":\n" - -#define ALTINSTR_ENTRY(ft_flags, num) \ - " .long 661b - .\n" /* label */ \ - " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen ")-(" alt_slen ")),0x90\n" \ + "773:\n" + +#define ALTINSTR_ENTRY(ft_flags) \ + ".pushsection .altinstructions,\"a\"\n" \ + " .long 771b - .\n" /* label */ \ + " .long 774f - .\n" /* new instruction */ \ " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n" /* replacement len */ + " .byte " alt_rlen "\n" /* replacement len */ \ + ".popsection\n" -#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \ - "# ALT: replacement " #num "\n" \ - b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" +#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + "# ALT: replacement\n" \ + "774:\n\t" newinstr "\n775:\n" \ + ".popsection\n" /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, ft_flags) \ - OLDINSTR(oldinstr, 1) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(ft_flags, 1) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr, 1) \ - ".popsection\n" + OLDINSTR(oldinstr) \ + ALTINSTR_ENTRY(ft_flags) \ + ALTINSTR_REPLACEMENT(newinstr) #define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ - OLDINSTR_2(oldinstr, 1, 2) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(ft_flags1, 1) \ - ALTINSTR_ENTRY(ft_flags2, 2) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr1, 1) \ - ALTINSTR_REPLACEMENT(newinstr2, 2) \ - ".popsection\n" + ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2) /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ #define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ - ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ - newinstr_yes, ft_flags) - -#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \ - newinsn3, ft_flags3) \ - OLDINSTR_3(oldinsn, 1, 2, 3) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(ft_flags1, 1) \ - ALTINSTR_ENTRY(ft_flags2, 2) \ - ALTINSTR_ENTRY(ft_flags3, 3) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinsn1, 1) \ - ALTINSTR_REPLACEMENT(newinsn2, 2) \ - ALTINSTR_REPLACEMENT(newinsn3, 3) \ - ".popsection\n" + ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags) + +#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \ + newinstr3, ft_flags3) \ + ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \ + newinstr3, ft_flags3) /* * Alternative instructions for different CPU types or capabilities. @@ -266,14 +214,11 @@ static inline int alternatives_text_reserved(void *start, void *end) * without volatile and memory clobber. */ #define alternative(oldinstr, newinstr, ft_flags) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory") + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory") #define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory") -#define alternative_ternary(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ - asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) ::: "memory") - /* * Alternative inline assembly with input. * @@ -283,18 +228,28 @@ static inline int alternatives_text_reserved(void *start, void *end) * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, ft_flags, input...) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : output : "i" (0), ## input) -/* Like alternative_io, but for replacing a direct call with another one. */ -#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ - asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ - : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) +/* + * Like alternative_io, but for replacing a direct call with another one. + * + * Use the %c operand modifier which is the generic way to print a bare + * constant expression with all syntax-specific punctuation omitted. %P + * is the x86-specific variant which can handle constants too, for + * historical reasons, but it should be used primarily for PIC + * references: i.e., if used for a function, it would add the PLT + * suffix. + */ +#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ + asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ + : ALT_OUTPUT_SP(output) \ + : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* * Like alternative_call, but there are two features and respective functions. @@ -302,12 +257,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, function1 is used. * Otherwise, old function is used. */ -#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ - output, input...) \ - asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ - "call %c[new2]", ft_flags2) \ - : output, ASM_CALL_CONSTRAINT \ - : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ +#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ + output, input...) \ + asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ + "call %c[new2]", ft_flags2) \ + : ALT_OUTPUT_SP(output) \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input) /* @@ -322,6 +277,8 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__ + /* Macro for creating assembler functions avoiding any C magic. */ #define DEFINE_ASM_FUNC(func, instr, sec) \ asm (".pushsection " #sec ", \"ax\"\n" \ @@ -388,22 +345,23 @@ void nop_func(void); * @newinstr. ".skip" directive takes care of proper instruction padding * in case @newinstr is longer than @oldinstr. */ -.macro ALTERNATIVE oldinstr, newinstr, ft_flags -140: - \oldinstr -141: - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f - .popsection +#define __ALTERNATIVE(oldinst, newinst, flag) \ +740: \ + oldinst ; \ +741: \ + .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\ +742: \ + .pushsection .altinstructions,"a" ; \ + altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \ + .popsection ; \ + .pushsection .altinstr_replacement,"ax" ; \ +743: \ + newinst ; \ +744: \ + .popsection ; - .pushsection .altinstr_replacement,"ax" -143: - \newinstr -144: - .popsection +.macro ALTERNATIVE oldinstr, newinstr, ft_flags + __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags) .endm #define old_len 141b-140b @@ -412,65 +370,18 @@ void nop_func(void); #define new_len3 146f-145f /* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) -#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c)) - - -/* * Same as ALTERNATIVE macro above but for two alternatives. If CPU * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has * @feature2, it replaces @oldinstr with @feature2. */ .macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2 -140: - \oldinstr -141: - .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_2(new_len1, new_len2) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f - altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - .popsection + __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1), + \newinstr2, \ft_flags2) .endm .macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3 -140: - \oldinstr -141: - .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \ - (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f - altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f - altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - \newinstr3 -146: - .popsection + __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2), + \newinstr3, \ft_flags3) .endm /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 5c37944c8a5e..6f3b6aef47ba 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -21,8 +21,8 @@ extern int amd_numa_init(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, unsigned long); -extern int amd_smn_read(u16 node, u32 address, u32 *value); -extern int amd_smn_write(u16 node, u32 address, u32 value); +int __must_check amd_smn_read(u16 node, u32 address, u32 *value); +int __must_check amd_smn_write(u16 node, u32 address, u32 value); struct amd_l3_cache { unsigned indices; diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ed2797f132ce..62cef2113ca7 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ - : [ptr] "+m" (*(_ptr)), \ - "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ o.full; \ @@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ CC_SET(e) \ : CC_OUT(e) (ret), \ - [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ if (unlikely(!ret)) \ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index b6325ee30871..3831f612e89c 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -280,10 +280,10 @@ struct x86_cpu_desc { u32 x86_microcode_rev; }; -#define INTEL_CPU_DESC(model, stepping, revision) { \ - .x86_family = 6, \ - .x86_vendor = X86_VENDOR_INTEL, \ - .x86_model = (model), \ +#define INTEL_CPU_DESC(vfm, stepping, revision) { \ + .x86_family = VFM_FAMILY(vfm), \ + .x86_vendor = VFM_VENDOR(vfm), \ + .x86_model = VFM_MODEL(vfm), \ .x86_stepping = (stepping), \ .x86_microcode_rev = (revision), \ } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3c7434329661..b51e88d01b21 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,182 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +466,59 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 7e523bb3d2d3..fb2809b20b0a 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #endif /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for x86 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 8 bits. - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints (see cc_stack_align4/8 in + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints (see cc_stack_align4/8 in * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) * low bits from any entropy chosen here. * - * Therefore, final stack offset entropy will be 5 (x86_64) or - * 6 (ia32) bits. + * Therefore, final stack offset entropy will be 7 (x86_64) or + * 8 (ia32) bits. */ - choose_random_kstack_offset(rdtsc() & 0xFF); + choose_random_kstack_offset(rdtsc()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index cc9ccf61b6bd..14d72727d7ee 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -6,6 +6,7 @@ struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ + void (*free_pgt_page)(void *, void *); /* free buf for page table */ void *context; /* context for alloc_pgt_page */ unsigned long page_flag; /* page flag for PMD or PUD entry */ unsigned long offset; /* ident mapping offset */ @@ -16,4 +17,6 @@ struct x86_mapping_info { int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, unsigned long pstart, unsigned long pend); +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd); + #endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h deleted file mode 100644 index 994638ef171b..000000000000 --- a/arch/x86/include/asm/intel_pconfig.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _ASM_X86_INTEL_PCONFIG_H -#define _ASM_X86_INTEL_PCONFIG_H - -#include <asm/asm.h> -#include <asm/processor.h> - -enum pconfig_target { - INVALID_TARGET = 0, - MKTME_TARGET = 1, - PCONFIG_TARGET_NR -}; - -int pconfig_target_supported(enum pconfig_target target); - -enum pconfig_leaf { - MKTME_KEY_PROGRAM = 0, - PCONFIG_LEAF_INVALID, -}; - -#define PCONFIG ".byte 0x0f, 0x01, 0xc5" - -/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ - -/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ -#define MKTME_KEYID_SET_KEY_DIRECT 0 -#define MKTME_KEYID_SET_KEY_RANDOM 1 -#define MKTME_KEYID_CLEAR_KEY 2 -#define MKTME_KEYID_NO_ENCRYPT 3 - -/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ -#define MKTME_AES_XTS_128 (1 << 8) - -/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ -#define MKTME_PROG_SUCCESS 0 -#define MKTME_INVALID_PROG_CMD 1 -#define MKTME_ENTROPY_ERROR 2 -#define MKTME_INVALID_KEYID 3 -#define MKTME_INVALID_ENC_ALG 4 -#define MKTME_DEVICE_BUSY 5 - -/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */ -struct mktme_key_program { - u16 keyid; - u32 keyid_ctrl; - u8 __rsvd[58]; - u8 key_field_1[64]; - u8 key_field_2[64]; -} __packed __aligned(256); - -static inline int mktme_key_program(struct mktme_key_program *key_program) -{ - unsigned long rax = MKTME_KEY_PROGRAM; - - if (!pconfig_target_supported(MKTME_TARGET)) - return -ENXIO; - - asm volatile(PCONFIG - : "=a" (rax), "=b" (key_program) - : "0" (rax), "1" (key_program) - : "memory", "cc"); - - return rax; -} - -#endif /* _ASM_X86_INTEL_PCONFIG_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c5ae649d2df..cf7fc2b8e3ce 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -54,6 +54,26 @@ static __always_inline void native_halt(void) asm volatile("hlt": : :"memory"); } +static __always_inline int native_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static __always_inline unsigned long native_local_irq_save(void) +{ + unsigned long flags = native_save_fl(); + + native_irq_disable(); + + return flags; +} + +static __always_inline void native_local_irq_restore(unsigned long flags) +{ + if (!native_irqs_disabled_flags(flags)) + native_irq_enable(); +} + #endif #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dfd2e9699bd7..3ad29b128943 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -261,7 +261,8 @@ enum mcp_flags { MCP_DONTLOG = BIT(2), /* only clear, don't log */ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ }; -bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); + +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e022e6eb766c..01342963011e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -660,6 +660,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -1164,6 +1166,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index cc6b8e087192..af4302d79b59 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -54,7 +54,7 @@ static inline void clear_page(void *page) clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), - "0" (page) + "D" (page) : "cc", "memory", "rax", "rcx"); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 65b8e5bb902c..e39311a89bf4 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline bool pte_decrypted(pte_t pte) +{ + return cc_mkdec(pte_val(pte)) == pte_val(pte); +} + #define pmd_dirty pmd_dirty static inline bool pmd_dirty(pmd_t pmd) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b78644962626..2f321137736c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -549,6 +549,7 @@ enum pg_level { PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, + PG_LEVEL_256T, PG_LEVEL_NUM }; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cb4f6c513c48..a75a07f4931f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -692,7 +692,17 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu) #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_highest_perf(void); -extern void amd_clear_divider(void); + +/* + * Issue a DIV 0/1 insn to clear any division data from previous DIV + * operations. + */ +static __always_inline void amd_clear_divider(void) +{ + asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) + :: "a" (0), "d" (0), "r" (1)); +} + extern void amd_check_microcode(void); #else static inline u32 amd_get_highest_perf(void) { return 0; } diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h new file mode 100644 index 000000000000..24e3a53ca255 --- /dev/null +++ b/arch/x86/include/asm/runtime-const.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("mov %1,%0\n1:\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - %c2 - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"i" ((unsigned long)0x0123456789abcdefull), \ + "i" (sizeof(long))); \ + __ret; }) + +// The 'typeof' will create at _least_ a 32-bit type, but +// will happily also take a bigger type and the 'shrl' will +// clear the upper bits +#define runtime_const_shift_right_32(val, sym) ({ \ + typeof(0u+(val)) __ret = (val); \ + asm_inline("shrl $12,%k0\n1:\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - 1 - .\n\t" \ + ".popsection" \ + :"+r" (__ret)); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* + * The text patching is trivial - you can only do this at init time, + * when the text section hasn't been marked RO, and before the text + * has ever been executed. + */ +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + *(unsigned long *)where = val; +} + +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + *(unsigned char *)where = val; +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 9aee31862b4a..4b2abce2e3e7 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_p(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); + +bool set_memory_enc_stop_conversion(void); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); + int set_memory_np_noalias(unsigned long addr, int numpages); int set_memory_nonglobal(unsigned long addr, int numpages); int set_memory_global(unsigned long addr, int numpages); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index e61e68d71cba..0667b2a88614 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -28,6 +28,8 @@ #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ #ifndef __ASSEMBLY__ +#include <linux/cache.h> + #include <asm/bootparam.h> #include <asm/x86_init.h> @@ -133,6 +135,12 @@ asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data #endif /* __i386__ */ #endif /* _SETUP */ +#ifdef CONFIG_CMDLINE_BOOL +extern bool builtin_cmdline_added __ro_after_init; +#else +#define builtin_cmdline_added 0 +#endif + #else /* __ASSEMBLY */ .macro __RESERVE_BRK name, size diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 5a8246dd532f..e90d403f2068 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -98,6 +98,19 @@ enum psc_op { /* GHCBData[63:32] */ \ (((u64)(val) & GENMASK_ULL(63, 32)) >> 32) +/* GHCB Run at VMPL Request/Response */ +#define GHCB_MSR_VMPL_REQ 0x016 +#define GHCB_MSR_VMPL_REQ_LEVEL(v) \ + /* GHCBData[39:32] */ \ + (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ + /* GHCBDdata[11:0] */ \ + GHCB_MSR_VMPL_REQ) + +#define GHCB_MSR_VMPL_RESP 0x017 +#define GHCB_MSR_VMPL_RESP_VAL(v) \ + /* GHCBData[63:32] */ \ + (((u64)(v) & GENMASK_ULL(63, 32)) >> 32) + /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 @@ -109,6 +122,7 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) +#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5) /* * SNP Page State Change NAE event @@ -163,6 +177,10 @@ struct snp_psc_desc { #define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ #define GHCB_TERM_CPUID 4 /* CPUID-validation failure */ #define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */ +#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */ +#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */ +#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */ +#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ca20cc4e5826..ac5886ce252e 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -152,10 +152,119 @@ struct snp_secrets_page { u8 vmpck2[VMPCK_KEY_LEN]; u8 vmpck3[VMPCK_KEY_LEN]; struct secrets_os_area os_area; - u8 rsvd3[3840]; + + u8 vmsa_tweak_bitmap[64]; + + /* SVSM fields */ + u64 svsm_base; + u64 svsm_size; + u64 svsm_caa; + u32 svsm_max_version; + u8 svsm_guest_vmpl; + u8 rsvd3[3]; + + /* Remainder of page */ + u8 rsvd4[3744]; } __packed; +/* + * The SVSM Calling Area (CA) related structures. + */ +struct svsm_ca { + u8 call_pending; + u8 mem_available; + u8 rsvd1[6]; + + u8 svsm_buffer[PAGE_SIZE - 8]; +}; + +#define SVSM_SUCCESS 0 +#define SVSM_ERR_INCOMPLETE 0x80000000 +#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001 +#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002 +#define SVSM_ERR_INVALID_ADDRESS 0x80000003 +#define SVSM_ERR_INVALID_FORMAT 0x80000004 +#define SVSM_ERR_INVALID_PARAMETER 0x80000005 +#define SVSM_ERR_INVALID_REQUEST 0x80000006 +#define SVSM_ERR_BUSY 0x80000007 +#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006 + +/* + * The SVSM PVALIDATE related structures + */ +struct svsm_pvalidate_entry { + u64 page_size : 2, + action : 1, + ignore_cf : 1, + rsvd : 8, + pfn : 52; +}; + +struct svsm_pvalidate_call { + u16 num_entries; + u16 cur_index; + + u8 rsvd1[4]; + + struct svsm_pvalidate_entry entry[]; +}; + +#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \ + offsetof(struct svsm_pvalidate_call, entry)) / \ + sizeof(struct svsm_pvalidate_entry)) + +/* + * The SVSM Attestation related structures + */ +struct svsm_loc_entry { + u64 pa; + u32 len; + u8 rsvd[4]; +}; + +struct svsm_attest_call { + struct svsm_loc_entry report_buf; + struct svsm_loc_entry nonce; + struct svsm_loc_entry manifest_buf; + struct svsm_loc_entry certificates_buf; + + /* For attesting a single service */ + u8 service_guid[16]; + u32 service_manifest_ver; + u8 rsvd[4]; +}; + +/* + * SVSM protocol structure + */ +struct svsm_call { + struct svsm_ca *caa; + u64 rax; + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 rax_out; + u64 rcx_out; + u64 rdx_out; + u64 r8_out; + u64 r9_out; +}; + +#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x)) +#define SVSM_CORE_REMAP_CA 0 +#define SVSM_CORE_PVALIDATE 1 +#define SVSM_CORE_CREATE_VCPU 2 +#define SVSM_CORE_DELETE_VCPU 3 + +#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x)) +#define SVSM_ATTEST_SERVICES 0 +#define SVSM_ATTEST_SINGLE_SERVICE 1 + #ifdef CONFIG_AMD_MEM_ENCRYPT + +extern u8 snp_vmpl; + extern void __sev_es_ist_enter(struct pt_regs *regs); extern void __sev_es_ist_exit(void); static __always_inline void sev_es_ist_enter(struct pt_regs *regs) @@ -181,6 +290,14 @@ static __always_inline void sev_es_nmi_complete(void) extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); extern void sev_enable(struct boot_params *bp); +/* + * RMPADJUST modifies the RMP permissions of a page of a lesser- + * privileged (numerically higher) VMPL. + * + * If the guest is running at a higher-privilege than the privilege + * level the instruction is targeting, the instruction will succeed, + * otherwise, it will fail. + */ static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { int rc; @@ -225,11 +342,16 @@ bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); void sev_show_status(void); -#else +void snp_update_svsm_ca(void); + +#else /* !CONFIG_AMD_MEM_ENCRYPT */ + +#define snp_vmpl 0 static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } @@ -253,12 +375,17 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } - +static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) +{ + return -ENOTTY; +} static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } -#endif +static inline void snp_update_svsm_ca(void) { } + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a35936b512fe..ca073f40698f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -35,6 +35,7 @@ struct smp_ops { int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); + void (*stop_this_cpu)(void); void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 405efb3e4996..94408a784c8e 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -28,9 +28,6 @@ static inline cycles_t get_cycles(void) } #define get_cycles get_cycles -extern struct system_counterval_t convert_art_to_tsc(u64 art); -extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); - extern void tsc_early_init(void); extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 0ef36190abe6..b2d2df026f6e 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -328,9 +328,8 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, * due to unsigned comparison. * * Due to the MSB/Sign-bit being used as invalid marker (see - * arch_vdso_cycles_valid() above), the effective mask is S64_MAX, - * but that case is also unlikely and will also take the unlikely path - * here. + * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that + * case is also unlikely and will also take the unlikely path here. */ if (unlikely(delta > vd->max_cycles)) { /* diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index be199a9b2676..93226281b450 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include <linux/hrtimer.h> #include <linux/timekeeper_internal.h> #include <vdso/datapage.h> #include <asm/vgtod.h> diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 7aa38b2ad8a9..a0ce291abcae 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -14,11 +14,6 @@ #include <uapi/linux/time.h> -#ifdef BUILD_VDSO32_64 -typedef u64 gtod_long_t; -#else -typedef unsigned long gtod_long_t; -#endif #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index ac9fc51e2b18..c9cf43d5ef23 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -7,51 +7,321 @@ #include <linux/stringify.h> /* - * The hypercall definitions differ in the low word of the %edx argument - * in the following way: the old port base interface uses the port - * number to distinguish between high- and low bandwidth versions. + * VMware hypercall ABI. + * + * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall) + * have up to 6 input and 6 output arguments passed and returned using + * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3), + * %esi (arg4), %edi (arg5). + * The following input arguments must be initialized by the caller: + * arg0 - VMWARE_HYPERVISOR_MAGIC + * arg2 - Hypercall command + * arg3 bits [15:0] - Port number, LB and direction flags + * + * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB + * hypercalls. They also have up to 6 input and 6 output on registers + * arguments, with different argument to register mapping: + * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3), + * %rsi (arg4), %rdi (arg5). + * + * - High bandwidth (HB) hypercalls are I/O port based only. They have + * up to 7 input and 7 output arguments passed and returned using + * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3), + * %esi (arg4), %edi (arg5), %ebp (arg6). + * The following input arguments must be initialized by the caller: + * arg0 - VMWARE_HYPERVISOR_MAGIC + * arg1 - Hypercall command + * arg3 bits [15:0] - Port number, HB and direction flags + * + * For compatibility purposes, x86_64 systems use only lower 32 bits + * for input and output arguments. + * + * The hypercall definitions differ in the low word of the %edx (arg3) + * in the following way: the old I/O port based interface uses the port + * number to distinguish between high- and low bandwidth versions, and + * uses IN/OUT instructions to define transfer direction. * * The new vmcall interface instead uses a set of flags to select * bandwidth mode and transfer direction. The flags should be loaded - * into %dx by any user and are automatically replaced by the port - * number if the VMWARE_HYPERVISOR_PORT method is used. - * - * In short, new driver code should strictly use the new definition of - * %dx content. + * into arg3 by any user and are automatically replaced by the port + * number if the I/O port method is used. */ -/* Old port-based version */ -#define VMWARE_HYPERVISOR_PORT 0x5658 -#define VMWARE_HYPERVISOR_PORT_HB 0x5659 +#define VMWARE_HYPERVISOR_HB BIT(0) +#define VMWARE_HYPERVISOR_OUT BIT(1) -/* Current vmcall / vmmcall version */ -#define VMWARE_HYPERVISOR_HB BIT(0) -#define VMWARE_HYPERVISOR_OUT BIT(1) +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \ + VMWARE_HYPERVISOR_HB) -/* The low bandwidth call. The low word of edx is presumed clear. */ -#define VMWARE_HYPERCALL \ - ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \ - "inl (%%dx), %%eax", \ - "vmcall", X86_FEATURE_VMCALL, \ - "vmmcall", X86_FEATURE_VMW_VMMCALL) +#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U +#define VMWARE_CMD_GETVERSION 10 +#define VMWARE_CMD_GETHZ 45 +#define VMWARE_CMD_GETVCPU_INFO 68 +#define VMWARE_CMD_STEALCLOCK 91 /* - * The high bandwidth out call. The low word of edx is presumed to have the - * HB and OUT bits set. + * Hypercall command mask: + * bits [6:0] command, range [0, 127] + * bits [19:16] sub-command, range [0, 15] */ -#define VMWARE_HYPERCALL_HB_OUT \ - ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ - "rep outsb", \ - "vmcall", X86_FEATURE_VMCALL, \ - "vmmcall", X86_FEATURE_VMW_VMMCALL) +#define VMWARE_CMD_MASK 0xf007fU + +#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) +#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) + +extern unsigned long vmware_hypercall_slow(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5); + +#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL +#define VMWARE_TDX_HCALL_FUNC 1 + +extern unsigned long vmware_tdx_hypercall(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5); /* - * The high bandwidth in call. The low word of edx is presumed to have the - * HB bit set. + * The low bandwidth call. The low word of %edx is presumed to have OUT bit + * set. The high word of %edx may contain input data from the caller. */ -#define VMWARE_HYPERCALL_HB_IN \ - ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ - "rep insb", \ - "vmcall", X86_FEATURE_VMCALL, \ +#define VMWARE_HYPERCALL \ + ALTERNATIVE_2("movw %[port], %%dx\n\t" \ + "inl (%%dx), %%eax", \ + "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) + +static inline +unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + NULL, NULL, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + NULL, NULL, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1, + u32 *out1, u32 *out2) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + out1, out2, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + out1, out2, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1, + u32 *out1, u32 *out2, u32 *out3) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + out1, out2, out3, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + out1, out2, out3, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, u32 *out2) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, in4, in5, + NULL, out2, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, in4, in5, + NULL, out2, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=c" (*out2) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1, + unsigned long in3, u32 *out2, + u32 *out3, u32 *out4, u32 *out5) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, 0, 0, + NULL, out2, out3, out4, out5); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, 0, 0, + NULL, out2, out3, out4, out5); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4), + "=D" (*out5) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, u32 *out1, + u32 *out2, u32 *out3) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, in4, in5, + out1, out2, out3, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, in4, in5, + out1, out2, out3, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + return out0; +} + +#ifdef CONFIG_X86_64 +#define VMW_BP_CONSTRAINT "r" +#else +#define VMW_BP_CONSTRAINT "m" +#endif + +/* + * High bandwidth calls are not supported on encrypted memory guests. + * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use + * low bandwidth hypercall if memory encryption is set. + * This assumption simplifies HB hypercall implementation to just I/O port + * based approach without alternative patching. + */ +static inline +unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + u32 *out1) +{ + unsigned long out0; + + asm_inline volatile ( + UNWIND_HINT_SAVE + "push %%" _ASM_BP "\n\t" + UNWIND_HINT_UNDEFINED + "mov %[in6], %%" _ASM_BP "\n\t" + "rep outsb\n\t" + "pop %%" _ASM_BP "\n\t" + UNWIND_HINT_RESTORE + : "=a" (out0), "=b" (*out1) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (cmd), + "c" (in2), + "d" (in3 | VMWARE_HYPERVISOR_PORT_HB), + "S" (in4), + "D" (in5), + [in6] VMW_BP_CONSTRAINT (in6) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + u32 *out1) +{ + unsigned long out0; + + asm_inline volatile ( + UNWIND_HINT_SAVE + "push %%" _ASM_BP "\n\t" + UNWIND_HINT_UNDEFINED + "mov %[in6], %%" _ASM_BP "\n\t" + "rep insb\n\t" + "pop %%" _ASM_BP "\n\t" + UNWIND_HINT_RESTORE + : "=a" (out0), "=b" (*out1) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (cmd), + "c" (in2), + "d" (in3 | VMWARE_HYPERVISOR_PORT_HB), + "S" (in4), + "D" (in5), + [in6] VMW_BP_CONSTRAINT (in6) + : "cc", "memory"); + return out0; +} +#undef VMW_BP_CONSTRAINT +#undef VMWARE_HYPERCALL + #endif diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index 695f36664889..09b1d7e607c1 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -9,85 +9,85 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature bit + * is not displayed in /proc/cpuinfo at all. */ /* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */ -#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */ -#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */ +#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* VM-Exit on vectored interrupts */ +#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* VM-Exit on NMIs */ #define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */ -#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */ -#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */ +#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* "preemption_timer" VMX Preemption Timer */ +#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* "posted_intr" Posted Interrupts */ /* EPT/VPID features, scattered to bits 16-23 */ -#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */ +#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* "invvpid" INVVPID is supported */ #define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */ -#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */ -#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */ -#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* 5-level EPT paging */ +#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */ +#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */ +#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* "ept_5level" 5-level EPT paging */ /* Aggregated APIC features 24-27 */ -#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */ -#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ +#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */ +#define VMX_FEATURE_APICV ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ /* VM-Functions, shifted to bits 28-31 */ -#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */ +#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */ /* Primary Processor-Based VM-Execution Controls, word 1 */ -#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* VM-Exit if INTRs are unblocked in guest */ #define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ -#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */ -#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */ -#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */ -#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */ -#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ -#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ -#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ -#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */ -#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ -#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ +#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* VM-Exit on HLT */ +#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* VM-Exit on INVLPG */ +#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* VM-Exit on MWAIT */ +#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* VM-Exit on RDPMC */ +#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* VM-Exit on RDTSC */ +#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* VM-Exit on writes to CR3 */ +#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* VM-Exit on reads from CR3 */ +#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */ +#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* VM-Exit on writes to CR8 */ +#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* VM-Exit on reads from CR8 */ #define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ -#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ -#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */ -#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/ -#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */ +#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* VM-Exit on accesses to debug registers */ +#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/ +#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* VM-Exit based on I/O port */ #define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */ -#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */ -#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */ -#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */ -#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */ +#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* VM-Exit based on MSR index */ +#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */ +#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */ +#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* Enable Secondary VM-Execution Controls */ /* Secondary Processor-Based VM-Execution Controls, word 2 */ #define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */ -#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */ -#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */ -#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */ -#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */ -#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */ -#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */ -#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */ +#define VMX_FEATURE_EPT ( 2*32+ 1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */ +#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* VM-Exit on {S,L}*DT instructions */ +#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* Enable RDTSCP in guest */ +#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* Virtualize X2APIC for the guest */ +#define VMX_FEATURE_VPID ( 2*32+ 5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */ +#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* VM-Exit on WBINVD */ +#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */ #define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */ #define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */ #define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */ -#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/ -#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */ -#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */ -#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */ -#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */ -#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */ +#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* VM-Exit on RDRAND*/ +#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* Enable INVPCID in guest */ +#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */ +#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */ +#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */ +#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */ #define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ -#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */ -#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */ -#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */ +#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */ #define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ -#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ -#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ -#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ -#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ -#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ -#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */ +#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* Processor Trace logs GPAs */ +#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */ +#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */ +#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */ +#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* VM-Exit when bus lock caused */ +#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */ /* Tertiary Processor-Based VM-Execution Controls, word 3 */ -#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ +#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* "ipi_virt" Enable IPI virtualization */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index e8d7d4941c4c..422a47746657 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -5,45 +5,12 @@ #include <linux/bitops.h> #include <linux/wordpart.h> -/* - * This is largely generic for little-endian machines, but the - * optimal byte mask counting is probably going to be something - * that is architecture-specific. If you have a reliably fast - * bit count instruction, that might be better than the multiply - * and shift, for example. - */ struct word_at_a_time { const unsigned long one_bits, high_bits; }; #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } -#ifdef CONFIG_64BIT - -/* - * Jan Achrenius on G+: microoptimized version of - * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" - * that works for the bytemasks without having to - * mask them first. - */ -static inline long count_masked_bytes(unsigned long mask) -{ - return mask*0x0001020304050608ul >> 56; -} - -#else /* 32-bit case */ - -/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ -static inline long count_masked_bytes(long mask) -{ - /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ - long a = (0x0ff0001+mask) >> 23; - /* Fix the 1 for 00 case */ - return a & mask; -} - -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -57,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, return bits; } +#ifdef CONFIG_64BIT + +/* Keep the initial has_zero() value for both bitmask and size calc */ +#define create_zero_mask(bits) (bits) + +static inline unsigned long zero_bytemask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +#define find_zero(bits) (__ffs(bits) >> 3) + +#else + +/* Create the final mask for both bytemask and size */ static inline unsigned long create_zero_mask(unsigned long bits) { bits = (bits - 1) & ~bits; @@ -66,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits) /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ static inline unsigned long find_zero(unsigned long mask) { - return count_masked_bytes(mask); + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; } +#endif + /* * Load an unaligned word from kernel space. * diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 6149eabe200f..213cf5379a5a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -149,12 +149,22 @@ struct x86_init_acpi { * @enc_status_change_finish Notify HV after the encryption status of a range is changed * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + * @enc_kexec_begin Begin the two-step process of converting shared memory back + * to private. It stops the new conversions from being started + * and waits in-flight conversions to finish, if possible. + * @enc_kexec_finish Finish the two-step process of converting shared memory to + * private. All memory is private after the call when + * the function returns. + * It is called on only one CPU while the others are shut down + * and with interrupts disabled. */ struct x86_guest { - bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); - bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); + void (*enc_kexec_begin)(void); + void (*enc_kexec_finish)(void); }; /** diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 20a0dd51700a..a847180836e4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,7 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_head32.o = -pg -CFLAGS_REMOVE_sev.o = -pg CFLAGS_REMOVE_rethook.o = -pg endif @@ -26,19 +25,16 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. KCSAN_SANITIZE := n KMSAN_SANITIZE_head$(BITS).o := n KMSAN_SANITIZE_nmi.o := n -KMSAN_SANITIZE_sev.o := n # If instrumentation of the following files is enabled, boot hangs during # first second. KCOV_INSTRUMENT_head$(BITS).o := n -KCOV_INSTRUMENT_sev.o := n CFLAGS_irq.o := -I $(src)/../include/asm/trace @@ -142,8 +138,6 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o - obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_CALL_THUNKS) += callthunks.o diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index fc17b3f136fe..842a5f449404 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o +obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o madt_playdead.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4bf82dbd2a6b..9f4618dcd704 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -67,13 +67,6 @@ static bool has_lapic_cpus __initdata; static bool acpi_support_online_capable; #endif -#ifdef CONFIG_X86_64 -/* Physical address of the Multiprocessor Wakeup Structure mailbox */ -static u64 acpi_mp_wake_mailbox_paddr; -/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; -#endif - #ifdef CONFIG_X86_IO_APIC /* * Locks related to IOAPIC hotplug @@ -341,60 +334,6 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e return 0; } - -#ifdef CONFIG_X86_64 -static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip) -{ - /* - * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). - * - * Wakeup of secondary CPUs is fully serialized in the core code. - * No need to protect acpi_mp_wake_mailbox from concurrent accesses. - */ - if (!acpi_mp_wake_mailbox) { - acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, - sizeof(*acpi_mp_wake_mailbox), - MEMREMAP_WB); - } - - /* - * Mailbox memory is shared between the firmware and OS. Firmware will - * listen on mailbox command address, and once it receives the wakeup - * command, the CPU associated with the given apicid will be booted. - * - * The value of 'apic_id' and 'wakeup_vector' must be visible to the - * firmware before the wakeup command is visible. smp_store_release() - * ensures ordering and visibility. - */ - acpi_mp_wake_mailbox->apic_id = apicid; - acpi_mp_wake_mailbox->wakeup_vector = start_ip; - smp_store_release(&acpi_mp_wake_mailbox->command, - ACPI_MP_WAKE_COMMAND_WAKEUP); - - /* - * Wait for the CPU to wake up. - * - * The CPU being woken up is essentially in a spin loop waiting to be - * woken up. It should not take long for it wake up and acknowledge by - * zeroing out ->command. - * - * ACPI specification doesn't provide any guidance on how long kernel - * has to wait for a wake up acknowledgement. It also doesn't provide - * a way to cancel a wake up request if it takes too long. - * - * In TDX environment, the VMM has control over how long it takes to - * wake up secondary. It can postpone scheduling secondary vCPU - * indefinitely. Giving up on wake up request and reporting error opens - * possible attack vector for VMM: it can wake up a secondary CPU when - * kernel doesn't expect it. Wait until positive result of the wake up - * request. - */ - while (READ_ONCE(acpi_mp_wake_mailbox->command)) - cpu_relax(); - - return 0; -} -#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC @@ -1124,29 +1063,6 @@ static int __init acpi_parse_madt_lapic_entries(void) } return 0; } - -#ifdef CONFIG_X86_64 -static int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, - const unsigned long end) -{ - struct acpi_madt_multiproc_wakeup *mp_wake; - - if (!IS_ENABLED(CONFIG_SMP)) - return -ENODEV; - - mp_wake = (struct acpi_madt_multiproc_wakeup *)header; - if (BAD_MADT_ENTRY(mp_wake, end)) - return -EINVAL; - - acpi_table_print_madt_entry(&header->common); - - acpi_mp_wake_mailbox_paddr = mp_wake->base_address; - - apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); - - return 0; -} -#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC @@ -1343,7 +1259,7 @@ static void __init acpi_process_madt(void) smp_found_config = 1; } -#ifdef CONFIG_X86_64 +#ifdef CONFIG_ACPI_MADT_WAKEUP /* * Parse MADT MP Wake entry. */ diff --git a/arch/x86/kernel/acpi/madt_playdead.S b/arch/x86/kernel/acpi/madt_playdead.S new file mode 100644 index 000000000000..4e498d28cdc8 --- /dev/null +++ b/arch/x86/kernel/acpi/madt_playdead.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/nospec-branch.h> +#include <asm/page_types.h> +#include <asm/processor-flags.h> + + .text + .align PAGE_SIZE + +/* + * asm_acpi_mp_play_dead() - Hand over control of the CPU to the BIOS + * + * rdi: Address of the ACPI MADT MPWK ResetVector + * rsi: PGD of the identity mapping + */ +SYM_FUNC_START(asm_acpi_mp_play_dead) + /* Turn off global entries. Following CR3 write will flush them. */ + movq %cr4, %rdx + andq $~(X86_CR4_PGE), %rdx + movq %rdx, %cr4 + + /* Switch to identity mapping */ + movq %rsi, %cr3 + + /* Jump to reset vector */ + ANNOTATE_RETPOLINE_SAFE + jmp *%rdi +SYM_FUNC_END(asm_acpi_mp_play_dead) diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c new file mode 100644 index 000000000000..6cfe762be28b --- /dev/null +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/acpi.h> +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/kexec.h> +#include <linux/memblock.h> +#include <linux/pgtable.h> +#include <linux/sched/hotplug.h> +#include <asm/apic.h> +#include <asm/barrier.h> +#include <asm/init.h> +#include <asm/intel_pt.h> +#include <asm/nmi.h> +#include <asm/processor.h> +#include <asm/reboot.h> + +/* Physical address of the Multiprocessor Wakeup Structure mailbox */ +static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; + +/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; + +static u64 acpi_mp_pgd __ro_after_init; +static u64 acpi_mp_reset_vector_paddr __ro_after_init; + +static void acpi_mp_stop_this_cpu(void) +{ + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); +} + +static void acpi_mp_play_dead(void) +{ + play_dead_common(); + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); +} + +static void acpi_mp_cpu_die(unsigned int cpu) +{ + u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); + unsigned long timeout; + + /* + * Use TEST mailbox command to prove that BIOS got control over + * the CPU before declaring it dead. + * + * BIOS has to clear 'command' field of the mailbox. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_TEST); + + /* Don't wait longer than a second. */ + timeout = USEC_PER_SEC; + while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout) + udelay(1); + + if (!timeout) + pr_err("Failed to hand over CPU %d to BIOS\n", cpu); +} + +/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */ +static void __init *alloc_pgt_page(void *dummy) +{ + return memblock_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static void __init free_pgt_page(void *pgt, void *dummy) +{ + return memblock_free(pgt, PAGE_SIZE); +} + +/* + * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at + * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches + * to the identity mapping and the function has be present at the same spot in + * the virtual address space before and after switching page tables. + */ +static int __init init_transition_pgtable(pgd_t *pgd) +{ + pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; + unsigned long vaddr, paddr; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + vaddr = (unsigned long)asm_acpi_mp_play_dead; + pgd += pgd_index(vaddr); + if (!pgd_present(*pgd)) { + p4d = (p4d_t *)alloc_pgt_page(NULL); + if (!p4d) + return -ENOMEM; + set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); + } + p4d = p4d_offset(pgd, vaddr); + if (!p4d_present(*p4d)) { + pud = (pud_t *)alloc_pgt_page(NULL); + if (!pud) + return -ENOMEM; + set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(p4d, vaddr); + if (!pud_present(*pud)) { + pmd = (pmd_t *)alloc_pgt_page(NULL); + if (!pmd) + return -ENOMEM; + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, vaddr); + if (!pmd_present(*pmd)) { + pte = (pte_t *)alloc_pgt_page(NULL); + if (!pte) + return -ENOMEM; + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + pte = pte_offset_kernel(pmd, vaddr); + + paddr = __pa(vaddr); + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); + + return 0; +} + +static int __init acpi_mp_setup_reset(u64 reset_vector) +{ + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .free_pgt_page = free_pgt_page, + .page_flag = __PAGE_KERNEL_LARGE_EXEC, + .kernpg_flag = _KERNPG_TABLE_NOENC, + }; + pgd_t *pgd; + + pgd = alloc_pgt_page(NULL); + if (!pgd) + return -ENOMEM; + + for (int i = 0; i < nr_pfn_mapped; i++) { + unsigned long mstart, mend; + + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + } + + if (kernel_ident_mapping_init(&info, pgd, + PAGE_ALIGN_DOWN(reset_vector), + PAGE_ALIGN(reset_vector + 1))) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + + if (init_transition_pgtable(pgd)) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + + smp_ops.play_dead = acpi_mp_play_dead; + smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu; + smp_ops.cpu_die = acpi_mp_cpu_die; + + acpi_mp_reset_vector_paddr = reset_vector; + acpi_mp_pgd = __pa(pgd); + + return 0; +} + +static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip) +{ + if (!acpi_mp_wake_mailbox_paddr) { + pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); + return -EOPNOTSUPP; + } + + /* + * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). + * + * Wakeup of secondary CPUs is fully serialized in the core code. + * No need to protect acpi_mp_wake_mailbox from concurrent accesses. + */ + if (!acpi_mp_wake_mailbox) { + acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, + sizeof(*acpi_mp_wake_mailbox), + MEMREMAP_WB); + } + + /* + * Mailbox memory is shared between the firmware and OS. Firmware will + * listen on mailbox command address, and once it receives the wakeup + * command, the CPU associated with the given apicid will be booted. + * + * The value of 'apic_id' and 'wakeup_vector' must be visible to the + * firmware before the wakeup command is visible. smp_store_release() + * ensures ordering and visibility. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + acpi_mp_wake_mailbox->wakeup_vector = start_ip; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_WAKEUP); + + /* + * Wait for the CPU to wake up. + * + * The CPU being woken up is essentially in a spin loop waiting to be + * woken up. It should not take long for it wake up and acknowledge by + * zeroing out ->command. + * + * ACPI specification doesn't provide any guidance on how long kernel + * has to wait for a wake up acknowledgment. It also doesn't provide + * a way to cancel a wake up request if it takes too long. + * + * In TDX environment, the VMM has control over how long it takes to + * wake up secondary. It can postpone scheduling secondary vCPU + * indefinitely. Giving up on wake up request and reporting error opens + * possible attack vector for VMM: it can wake up a secondary CPU when + * kernel doesn't expect it. Wait until positive result of the wake up + * request. + */ + while (READ_ONCE(acpi_mp_wake_mailbox->command)) + cpu_relax(); + + return 0; +} + +static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake) +{ + cpu_hotplug_disable_offlining(); + + /* + * ACPI MADT doesn't allow to offline a CPU after it was onlined. This + * limits kexec: the second kernel won't be able to use more than one CPU. + * + * To prevent a kexec kernel from onlining secondary CPUs invalidate the + * mailbox address in the ACPI MADT wakeup structure which prevents a + * kexec kernel to use it. + * + * This is safe as the booting kernel has the mailbox address cached + * already and acpi_wakeup_cpu() uses the cached value to bring up the + * secondary CPUs. + * + * Note: This is a Linux specific convention and not covered by the + * ACPI specification. + */ + mp_wake->mailbox_address = 0; +} + +int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_multiproc_wakeup *mp_wake; + + mp_wake = (struct acpi_madt_multiproc_wakeup *)header; + + /* + * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake + * entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger + * than the actual size of the MP wakeup entry in ACPI table because the + * 'reset_vector' is only available in the V1 MP wakeup structure. + */ + if (!mp_wake) + return -EINVAL; + if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0) + return -EINVAL; + if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + + acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address; + + if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 && + mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) { + if (acpi_mp_setup_reset(mp_wake->reset_vector)) { + pr_warn("Failed to setup MADT reset vector\n"); + acpi_mp_disable_offlining(mp_wake); + } + } else { + /* + * CPU offlining requires version 1 of the ACPI MADT wakeup + * structure. + */ + acpi_mp_disable_offlining(mp_wake); + } + + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); + + return 0; +} diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 89de61243272..333b16181357 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -432,6 +432,11 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) return 5; } +static inline u8 * instr_va(struct alt_instr *i) +{ + return (u8 *)&i->instr_offset + i->instr_offset; +} + /* * Replace instructions with better alternatives for this CPU type. This runs * before SMP is initialized to avoid SMP problems with self modifying code. @@ -447,7 +452,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, { u8 insn_buff[MAX_PATCH_LEN]; u8 *instr, *replacement; - struct alt_instr *a; + struct alt_instr *a, *b; DPRINTK(ALT, "alt table %px, -> %px", start, end); @@ -473,7 +478,18 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, for (a = start; a < end; a++) { int insn_buff_sz = 0; - instr = (u8 *)&a->instr_offset + a->instr_offset; + /* + * In case of nested ALTERNATIVE()s the outer alternative might + * add more padding. To ensure consistent patching find the max + * padding for all alt_instr entries for this site (nested + * alternatives result in consecutive entries). + */ + for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) { + u8 len = max(a->instrlen, b->instrlen); + a->instrlen = b->instrlen = len; + } + + instr = instr_va(a); replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->instrlen > sizeof(insn_buff)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); @@ -1641,7 +1657,7 @@ static noinline void __init alt_reloc_selftest(void) */ asm_inline volatile ( ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS) - : /* output */ + : ASM_CALL_CONSTRAINT : [mem] "m" (__alt_reloc_selftest_addr) : _ASM_ARG1 ); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 027a8c7a2c9e..059e5c16af05 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -180,6 +180,43 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev, return dev; } +/* + * SMN accesses may fail in ways that are difficult to detect here in the called + * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do + * their own checking based on what behavior they expect. + * + * For SMN reads, the returned value may be zero if the register is Read-as-Zero. + * Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI Error Response" + * can be checked here, and a proper error code can be returned. + * + * But the Read-as-Zero response cannot be verified here. A value of 0 may be + * correct in some cases, so callers must check that this correct is for the + * register/fields they need. + * + * For SMN writes, success can be determined through a "write and read back" + * However, this is not robust when done here. + * + * Possible issues: + * + * 1) Bits that are "Write-1-to-Clear". In this case, the read value should + * *not* match the write value. + * + * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be + * known here. + * + * 3) Bits that are "Reserved / Set to 1". Ditto above. + * + * Callers of amd_smn_write() should do the "write and read back" check + * themselves, if needed. + * + * For #1, they can see if their target bits got cleared. + * + * For #2 and #3, they can check if their target bits got set as intended. + * + * This matches what is done for RDMSR/WRMSR. As long as there's no #GP, then + * the operation is considered a success, and the caller does their own + * checking. + */ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) { struct pci_dev *root; @@ -202,9 +239,6 @@ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) err = (write ? pci_write_config_dword(root, 0x64, *value) : pci_read_config_dword(root, 0x64, value)); - if (err) - pr_warn("Error %s SMN address 0x%x.\n", - (write ? "writing to" : "reading from"), address); out_unlock: mutex_unlock(&smn_mutex); @@ -213,7 +247,7 @@ out: return err; } -int amd_smn_read(u16 node, u32 address, u32 *value) +int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { int err = __amd_smn_rw(node, address, value, false); @@ -226,7 +260,7 @@ int amd_smn_read(u16 node, u32 address, u32 *value) } EXPORT_SYMBOL_GPL(amd_smn_read); -int amd_smn_write(u16 node, u32 address, u32 value) +int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return __amd_smn_rw(node, address, &value, true); } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a02bba0ed6b9..5857a0f5d514 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL -obj-y += intel.o intel_pconfig.o tsx.o +obj-y += intel.o tsx.o obj-$(CONFIG_PM) += intel_epb.o endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 44df3f11e731..be5889bded49 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1220,14 +1220,3 @@ void amd_check_microcode(void) on_each_cpu(zenbleed_check_cpu, NULL, 1); } - -/* - * Issue a DIV 0/1 insn to clear any division data from previous DIV - * operations. - */ -void noinstr amd_clear_divider(void) -{ - asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) - :: "a" (0), "d" (0), "r" (1)); -} -EXPORT_SYMBOL_GPL(amd_clear_divider); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b6f927f6c567..45675da354f3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1625,6 +1625,7 @@ static bool __init spec_ctrl_bhi_dis(void) enum bhi_mitigations { BHI_MITIGATION_OFF, BHI_MITIGATION_ON, + BHI_MITIGATION_VMEXIT_ONLY, }; static enum bhi_mitigations bhi_mitigation __ro_after_init = @@ -1639,6 +1640,8 @@ static int __init spectre_bhi_parse_cmdline(char *str) bhi_mitigation = BHI_MITIGATION_OFF; else if (!strcmp(str, "on")) bhi_mitigation = BHI_MITIGATION_ON; + else if (!strcmp(str, "vmexit")) + bhi_mitigation = BHI_MITIGATION_VMEXIT_ONLY; else pr_err("Ignoring unknown spectre_bhi option (%s)", str); @@ -1659,19 +1662,22 @@ static void __init bhi_select_mitigation(void) return; } + /* Mitigate in hardware if supported */ if (spec_ctrl_bhi_dis()) return; if (!IS_ENABLED(CONFIG_X86_64)) return; - /* Mitigate KVM by default */ - setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); - pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) { + pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n"); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + return; + } - /* Mitigate syscalls when the mitigation is forced =on */ + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n"); setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); - pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); } static void __init spectre_v2_select_mitigation(void) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fdf3489d92a4..08b95a35b5cb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -72,19 +72,19 @@ static bool cpu_model_supports_sld __ro_after_init; */ static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) { - switch (c->x86_model) { - case INTEL_FAM6_CORE_YONAH: - case INTEL_FAM6_CORE2_MEROM: - case INTEL_FAM6_CORE2_MEROM_L: - case INTEL_FAM6_CORE2_PENRYN: - case INTEL_FAM6_CORE2_DUNNINGTON: - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_NEHALEM_G: - case INTEL_FAM6_NEHALEM_EP: - case INTEL_FAM6_NEHALEM_EX: - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_WESTMERE_EP: - case INTEL_FAM6_SANDYBRIDGE: + switch (c->x86_vfm) { + case INTEL_CORE_YONAH: + case INTEL_CORE2_MEROM: + case INTEL_CORE2_MEROM_L: + case INTEL_CORE2_PENRYN: + case INTEL_CORE2_DUNNINGTON: + case INTEL_NEHALEM: + case INTEL_NEHALEM_G: + case INTEL_NEHALEM_EP: + case INTEL_NEHALEM_EX: + case INTEL_WESTMERE: + case INTEL_WESTMERE_EP: + case INTEL_SANDYBRIDGE: setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); } } @@ -106,9 +106,9 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) */ if (c->x86 != 6) return; - switch (c->x86_model) { - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: + switch (c->x86_vfm) { + case INTEL_XEON_PHI_KNL: + case INTEL_XEON_PHI_KNM: break; default: return; @@ -134,32 +134,32 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) * - Release note from 20180108 microcode release */ struct sku_microcode { - u8 model; + u32 vfm; u8 stepping; u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 }, - { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 }, - { INTEL_FAM6_KABYLAKE, 0x09, 0x80 }, - { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 }, - { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 }, - { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, - { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_BROADWELL, 0x04, 0x28 }, - { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b }, - { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 }, - { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 }, - { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, - { INTEL_FAM6_HASWELL_L, 0x01, 0x21 }, - { INTEL_FAM6_HASWELL_G, 0x01, 0x18 }, - { INTEL_FAM6_HASWELL, 0x03, 0x23 }, - { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, - { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, - { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + { INTEL_KABYLAKE, 0x0B, 0x80 }, + { INTEL_KABYLAKE, 0x0A, 0x80 }, + { INTEL_KABYLAKE, 0x09, 0x80 }, + { INTEL_KABYLAKE_L, 0x0A, 0x80 }, + { INTEL_KABYLAKE_L, 0x09, 0x80 }, + { INTEL_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_BROADWELL, 0x04, 0x28 }, + { INTEL_BROADWELL_G, 0x01, 0x1b }, + { INTEL_BROADWELL_D, 0x02, 0x14 }, + { INTEL_BROADWELL_D, 0x03, 0x07000011 }, + { INTEL_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_HASWELL_L, 0x01, 0x21 }, + { INTEL_HASWELL_G, 0x01, 0x18 }, + { INTEL_HASWELL, 0x03, 0x23 }, + { INTEL_HASWELL_X, 0x02, 0x3b }, + { INTEL_HASWELL_X, 0x04, 0x10 }, + { INTEL_IVYBRIDGE_X, 0x04, 0x42a }, /* Observed in the wild */ - { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, - { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, + { INTEL_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_SANDYBRIDGE_X, 0x07, 0x712 }, }; static bool bad_spectre_microcode(struct cpuinfo_x86 *c) @@ -173,11 +173,8 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; - if (c->x86 != 6) - return false; - for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { - if (c->x86_model == spectre_bad_microcodes[i].model && + if (c->x86_vfm == spectre_bad_microcodes[i].vfm && c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } @@ -190,83 +187,35 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) #define TME_ACTIVATE_LOCKED(x) (x & 0x1) #define TME_ACTIVATE_ENABLED(x) (x & 0x2) -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - static void detect_tme_early(struct cpuinfo_x86 *c) { - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; + u64 tme_activate; + int keyid_bits; rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; clear_cpu_cap(c, X86_FEATURE_TME); return; } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: + pr_info_once("x86/tme: enabled by BIOS\n"); keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } + if (!keyid_bits) + return; /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + * KeyID bits are set by BIOS and can be present regardless + * of whether the kernel is using them. They effectively lower + * the number of physical address bits. + * + * Update cpuinfo_x86::x86_phys_bits accordingly. */ c->x86_phys_bits -= keyid_bits; + pr_info_once("x86/mktme: BIOS enabled: x86_phys_bits reduced by %d\n", + keyid_bits); } void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) @@ -320,7 +269,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && + if (c->x86_vfm == INTEL_ATOM_BONNELL && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -352,17 +301,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ - if (c->x86 == 6) { - switch (c->x86_model) { - case INTEL_FAM6_ATOM_SALTWELL_MID: - case INTEL_FAM6_ATOM_SALTWELL_TABLET: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT_NP: - set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); - break; - default: - break; - } + switch (c->x86_vfm) { + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT_NP: + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); + break; } /* @@ -401,7 +346,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE * to be modified. */ - if (c->x86 == 5 && c->x86_model == 9) { + if (c->x86_vfm == INTEL_QUARK_X1000) { pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } @@ -633,12 +578,13 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && - (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) + if (boot_cpu_has(X86_FEATURE_CLFLUSH) && + (c->x86_vfm == INTEL_CORE2_DUNNINGTON || + c->x86_vfm == INTEL_NEHALEM_EX || + c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); - if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && - ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) + if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) set_cpu_bug(c, X86_BUG_MONITOR); #ifdef CONFIG_X86_64 @@ -1254,9 +1200,9 @@ void handle_bus_lock(struct pt_regs *regs) * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), {} }; diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c deleted file mode 100644 index 5be2b1790282..000000000000 --- a/arch/x86/kernel/cpu/intel_pconfig.c +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Intel PCONFIG instruction support. - * - * Copyright (C) 2017 Intel Corporation - * - * Author: - * Kirill A. Shutemov <kirill.shutemov@linux.intel.com> - */ -#include <linux/bug.h> -#include <linux/limits.h> - -#include <asm/cpufeature.h> -#include <asm/intel_pconfig.h> - -#define PCONFIG_CPUID 0x1b - -#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1) - -/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */ -enum { - PCONFIG_CPUID_SUBLEAF_INVALID = 0, - PCONFIG_CPUID_SUBLEAF_TARGETID = 1, -}; - -/* Bitmask of supported targets */ -static u64 targets_supported __read_mostly; - -int pconfig_target_supported(enum pconfig_target target) -{ - /* - * We would need to re-think the implementation once we get > 64 - * PCONFIG targets. Spec allows up to 2^32 targets. - */ - BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64); - - if (WARN_ON_ONCE(target >= 64)) - return 0; - return targets_supported & (1ULL << target); -} - -static int __init intel_pconfig_init(void) -{ - int subleaf; - - if (!boot_cpu_has(X86_FEATURE_PCONFIG)) - return 0; - - /* - * Scan subleafs of PCONFIG CPUID leaf. - * - * Subleafs of the same type need not to be consecutive. - * - * Stop on the first invalid subleaf type. All subleafs after the first - * invalid are invalid too. - */ - for (subleaf = 0; subleaf < INT_MAX; subleaf++) { - struct cpuid_regs regs; - - cpuid_count(PCONFIG_CPUID, subleaf, - ®s.eax, ®s.ebx, ®s.ecx, ®s.edx); - - switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) { - case PCONFIG_CPUID_SUBLEAF_INVALID: - /* Stop on the first invalid subleaf */ - goto out; - case PCONFIG_CPUID_SUBLEAF_TARGETID: - /* Mark supported PCONFIG targets */ - if (regs.ebx < 64) - targets_supported |= (1ULL << regs.ebx); - if (regs.ecx < 64) - targets_supported |= (1ULL << regs.ecx); - if (regs.edx < 64) - targets_supported |= (1ULL << regs.edx); - break; - default: - /* Unknown CPUID.PCONFIG subleaf: ignore */ - break; - } - } -out: - return 0; -} -arch_initcall(intel_pconfig_init); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ad0623b659ed..b85ec7a4ec9e 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -677,10 +677,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); * is already totally * confused. In this case it's likely it will * not fully execute the machine check handler either. */ -bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - bool error_seen = false; struct mce m; int i; @@ -754,8 +753,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) continue; log_it: - error_seen = true; - if (flags & MCP_DONTLOG) goto clear_it; @@ -787,8 +784,6 @@ clear_it: */ sync_core(); - - return error_seen; } EXPORT_SYMBOL_GPL(machine_check_poll); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 94953d749475..49ed3428785d 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -487,12 +487,16 @@ static void prepare_msrs(void *info) wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr); } - wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc); wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd); + + if (m.misc) + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc); } else { wrmsrl(MSR_IA32_MCx_STATUS(b), m.status); wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr); - wrmsrl(MSR_IA32_MCx_MISC(b), m.misc); + + if (m.misc) + wrmsrl(MSR_IA32_MCx_MISC(b), m.misc); } } @@ -795,4 +799,5 @@ static void __exit inject_exit(void) module_init(inject_init); module_exit(inject_exit); +MODULE_DESCRIPTION("Machine check injection support"); MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 1db560ed2ca3..68f537347466 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -30,8 +30,7 @@ dump_array() # If the /* comment */ starts with a quote string, grab that. VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')" - [ -z "$VALUE" ] && VALUE="\"$NAME\"" - [ "$VALUE" = '""' ] && continue + [ ! "$VALUE" ] && continue # Name is uppercase, VALUE is all lowercase VALUE="$(echo "$VALUE" | tr A-Z a-z)" diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index a113d9aba553..1930fce9dfe9 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -19,7 +19,6 @@ #include <linux/cpu.h> #include <linux/slab.h> #include <linux/err.h> -#include <linux/cacheinfo.h> #include <linux/cpuhotplug.h> #include <asm/cpu_device_id.h> @@ -60,7 +59,8 @@ static void mba_wrmsr_intel(struct msr_param *m); static void cat_wrmsr(struct msr_param *m); static void mba_wrmsr_amd(struct msr_param *m); -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) +#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) +#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) struct rdt_hw_resource rdt_resources_all[] = { [RDT_RESOURCE_L3] = @@ -68,8 +68,10 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L3, .name = "L3", - .cache_level = 3, - .domains = domain_init(RDT_RESOURCE_L3), + .ctrl_scope = RESCTRL_L3_CACHE, + .mon_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), + .mon_domains = mon_domain_init(RDT_RESOURCE_L3), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, @@ -82,8 +84,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L2, .name = "L2", - .cache_level = 2, - .domains = domain_init(RDT_RESOURCE_L2), + .ctrl_scope = RESCTRL_L2_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, @@ -96,8 +98,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_MBA, .name = "MB", - .cache_level = 3, - .domains = domain_init(RDT_RESOURCE_MBA), + .ctrl_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), .parse_ctrlval = parse_bw, .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, @@ -108,8 +110,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_SMBA, .name = "SMBA", - .cache_level = 3, - .domains = domain_init(RDT_RESOURCE_SMBA), + .ctrl_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), .parse_ctrlval = parse_bw, .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, @@ -306,8 +308,8 @@ static void rdt_get_cdp_l2_config(void) static void mba_wrmsr_amd(struct msr_param *m) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; for (i = m->low; i < m->high; i++) @@ -330,8 +332,8 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) static void mba_wrmsr_intel(struct msr_param *m) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; /* Write the delay values for mba. */ @@ -341,23 +343,38 @@ static void mba_wrmsr_intel(struct msr_param *m) static void cat_wrmsr(struct msr_param *m) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; for (i = m->low; i < m->high; i++) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); } -struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) +struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) { - struct rdt_domain *d; + struct rdt_ctrl_domain *d; lockdep_assert_cpus_held(); - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->cpu_mask)) + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + +struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) +{ + struct rdt_mon_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) return d; } @@ -379,24 +396,21 @@ void rdt_ctrl_update(void *arg) } /* - * rdt_find_domain - Find a domain in a resource that matches input resource id + * rdt_find_domain - Search for a domain id in a resource domain list. * - * Search resource r's domain list to find the resource id. If the resource - * id is found in a domain, return the domain. Otherwise, if requested by - * caller, return the first domain whose id is bigger than the input id. - * The domain list is sorted by id in ascending order. + * Search the domain list to find the domain id. If the domain id is + * found, return the domain. NULL otherwise. If the domain id is not + * found (and NULL returned) then the first domain with id bigger than + * the input id can be returned to the caller via @pos. */ -struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, - struct list_head **pos) +struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, + struct list_head **pos) { - struct rdt_domain *d; + struct rdt_domain_hdr *d; struct list_head *l; - if (id < 0) - return ERR_PTR(-ENODEV); - - list_for_each(l, &r->domains) { - d = list_entry(l, struct rdt_domain, list); + list_for_each(l, h) { + d = list_entry(l, struct rdt_domain_hdr, list); /* When id is found, return its domain. */ if (id == d->id) return d; @@ -425,18 +439,23 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) *dc = r->default_ctrl; } -static void domain_free(struct rdt_hw_domain *hw_dom) +static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) +{ + kfree(hw_dom->ctrl_val); + kfree(hw_dom); +} + +static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) { kfree(hw_dom->arch_mbm_total); kfree(hw_dom->arch_mbm_local); - kfree(hw_dom->ctrl_val); kfree(hw_dom); } -static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) +static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; u32 *dc; @@ -461,7 +480,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) * @num_rmid: The size of the MBM counter array * @hw_dom: The domain that owns the allocated arrays */ -static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) +static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; @@ -484,37 +503,45 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) return 0; } -/* - * domain_add_cpu - Add a cpu to a resource's domain list. - * - * If an existing domain in the resource r's domain list matches the cpu's - * resource id, add the cpu in the domain. - * - * Otherwise, a new domain is allocated and inserted into the right position - * in the domain list sorted by id in ascending order. - * - * The order in the domain list is visible to users when we print entries - * in the schemata file and schemata input is validated to have the same order - * as this list. - */ -static void domain_add_cpu(int cpu, struct rdt_resource *r) +static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) { - int id = get_cpu_cacheinfo_id(cpu, r->cache_level); + switch (scope) { + case RESCTRL_L2_CACHE: + case RESCTRL_L3_CACHE: + return get_cpu_cacheinfo_id(cpu, scope); + case RESCTRL_L3_NODE: + return cpu_to_node(cpu); + default: + break; + } + + return -EINVAL; +} + +static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->ctrl_scope); + struct rdt_hw_ctrl_domain *hw_dom; struct list_head *add_pos = NULL; - struct rdt_hw_domain *hw_dom; - struct rdt_domain *d; + struct rdt_domain_hdr *hdr; + struct rdt_ctrl_domain *d; int err; lockdep_assert_held(&domain_list_lock); - d = rdt_find_domain(r, id, &add_pos); - if (IS_ERR(d)) { - pr_warn("Couldn't find cache id for CPU %d\n", cpu); + if (id < 0) { + pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->ctrl_scope, r->name); return; } - if (d) { - cpumask_set_cpu(cpu, &d->cpu_mask); + hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); + if (hdr) { + if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) + return; + d = container_of(hdr, struct rdt_ctrl_domain, hdr); + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); if (r->cache.arch_has_per_cpu_cfg) rdt_domain_reconfigure_cdp(r); return; @@ -525,64 +552,187 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; d = &hw_dom->d_resctrl; - d->id = id; - cpumask_set_cpu(cpu, &d->cpu_mask); + d->hdr.id = id; + d->hdr.type = RESCTRL_CTRL_DOMAIN; + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); rdt_domain_reconfigure_cdp(r); - if (r->alloc_capable && domain_setup_ctrlval(r, d)) { - domain_free(hw_dom); + if (domain_setup_ctrlval(r, d)) { + ctrl_domain_free(hw_dom); return; } - if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { - domain_free(hw_dom); + list_add_tail_rcu(&d->hdr.list, add_pos); + + err = resctrl_online_ctrl_domain(r, d); + if (err) { + list_del_rcu(&d->hdr.list); + synchronize_rcu(); + ctrl_domain_free(hw_dom); + } +} + +static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->mon_scope); + struct list_head *add_pos = NULL; + struct rdt_hw_mon_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_mon_domain *d; + int err; + + lockdep_assert_held(&domain_list_lock); + + if (id < 0) { + pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->mon_scope, r->name); return; } - list_add_tail_rcu(&d->list, add_pos); + hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); + if (hdr) { + if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) + return; + d = container_of(hdr, struct rdt_mon_domain, hdr); + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + return; + } - err = resctrl_online_domain(r, d); + hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); + if (!hw_dom) + return; + + d = &hw_dom->d_resctrl; + d->hdr.id = id; + d->hdr.type = RESCTRL_MON_DOMAIN; + d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!d->ci) { + pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); + mon_domain_free(hw_dom); + return; + } + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + + arch_mon_domain_online(r, d); + + if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + mon_domain_free(hw_dom); + return; + } + + list_add_tail_rcu(&d->hdr.list, add_pos); + + err = resctrl_online_mon_domain(r, d); if (err) { - list_del_rcu(&d->list); + list_del_rcu(&d->hdr.list); synchronize_rcu(); - domain_free(hw_dom); + mon_domain_free(hw_dom); } } -static void domain_remove_cpu(int cpu, struct rdt_resource *r) +static void domain_add_cpu(int cpu, struct rdt_resource *r) +{ + if (r->alloc_capable) + domain_add_cpu_ctrl(cpu, r); + if (r->mon_capable) + domain_add_cpu_mon(cpu, r); +} + +static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) { - int id = get_cpu_cacheinfo_id(cpu, r->cache_level); - struct rdt_hw_domain *hw_dom; - struct rdt_domain *d; + int id = get_domain_id_from_scope(cpu, r->ctrl_scope); + struct rdt_hw_ctrl_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_ctrl_domain *d; lockdep_assert_held(&domain_list_lock); - d = rdt_find_domain(r, id, NULL); - if (IS_ERR_OR_NULL(d)) { - pr_warn("Couldn't find cache id for CPU %d\n", cpu); + if (id < 0) { + pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->ctrl_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); + if (!hdr) { + pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n", + id, cpu, r->name); return; } - hw_dom = resctrl_to_arch_dom(d); - cpumask_clear_cpu(cpu, &d->cpu_mask); - if (cpumask_empty(&d->cpu_mask)) { - resctrl_offline_domain(r, d); - list_del_rcu(&d->list); + if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) + return; + + d = container_of(hdr, struct rdt_ctrl_domain, hdr); + hw_dom = resctrl_to_arch_ctrl_dom(d); + + cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + if (cpumask_empty(&d->hdr.cpu_mask)) { + resctrl_offline_ctrl_domain(r, d); + list_del_rcu(&d->hdr.list); synchronize_rcu(); /* - * rdt_domain "d" is going to be freed below, so clear + * rdt_ctrl_domain "d" is going to be freed below, so clear * its pointer from pseudo_lock_region struct. */ if (d->plr) d->plr->d = NULL; - domain_free(hw_dom); + ctrl_domain_free(hw_dom); return; } } +static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->mon_scope); + struct rdt_hw_mon_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_mon_domain *d; + + lockdep_assert_held(&domain_list_lock); + + if (id < 0) { + pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->mon_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->mon_domains, id, NULL); + if (!hdr) { + pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", + id, cpu, r->name); + return; + } + + if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) + return; + + d = container_of(hdr, struct rdt_mon_domain, hdr); + hw_dom = resctrl_to_arch_mon_dom(d); + + cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + if (cpumask_empty(&d->hdr.cpu_mask)) { + resctrl_offline_mon_domain(r, d); + list_del_rcu(&d->hdr.list); + synchronize_rcu(); + mon_domain_free(hw_dom); + + return; + } +} + +static void domain_remove_cpu(int cpu, struct rdt_resource *r) +{ + if (r->alloc_capable) + domain_remove_cpu_ctrl(cpu, r); + if (r->mon_capable) + domain_remove_cpu_mon(cpu, r); +} + static void clear_closid_rmid(int cpu) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b7291f60399c..50fa1fe9a073 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -60,7 +60,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) } int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -69,7 +69,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); return -EINVAL; } @@ -139,7 +139,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * resource type. */ int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -148,7 +148,7 @@ int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); return -EINVAL; } @@ -208,8 +208,8 @@ static int parse_line(char *line, struct resctrl_schema *s, struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; struct rdt_parse_data data; + struct rdt_ctrl_domain *d; char *dom = NULL, *id; - struct rdt_domain *d; unsigned long dom_id; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -231,8 +231,8 @@ next: return -EINVAL; } dom = strim(dom); - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; if (r->parse_ctrlval(&data, s, d)) @@ -272,15 +272,15 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type) } } -int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); u32 idx = get_config_index(closid, t); struct msr_param msr_param; - if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) return -EINVAL; hw_dom->ctrl_val[idx] = cfg_val; @@ -297,17 +297,17 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct msr_param msr_param; + struct rdt_ctrl_domain *d; enum resctrl_conf_type t; - struct rdt_domain *d; u32 idx; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - list_for_each_entry(d, &r->domains, list) { - hw_dom = resctrl_to_arch_dom(d); + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + hw_dom = resctrl_to_arch_ctrl_dom(d); msr_param.res = NULL; for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; @@ -330,7 +330,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) } } if (msr_param.res) - smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); + smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } return 0; @@ -430,10 +430,10 @@ out: return ret ?: nbytes; } -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); u32 idx = get_config_index(closid, type); return hw_dom->ctrl_val[idx]; @@ -442,7 +442,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) { struct rdt_resource *r = schema->res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; bool sep = false; u32 ctrl_val; @@ -450,7 +450,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo lockdep_assert_cpus_held(); seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -460,7 +460,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, r->format_str, dom->id, max_data_width, + seq_printf(s, r->format_str, dom->hdr.id, max_data_width, ctrl_val); sep = true; } @@ -489,7 +489,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, } else { seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->s->res->name, - rdtgrp->plr->d->id, + rdtgrp->plr->d->hdr.id, rdtgrp->plr->cbm); } } else { @@ -514,8 +514,8 @@ static int smp_mon_event_count(void *arg) } void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first) + struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + cpumask_t *cpumask, int evtid, int first) { int cpu; @@ -529,7 +529,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, rr->evtid = evtid; rr->r = r; rr->d = d; - rr->val = 0; rr->first = first; rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); if (IS_ERR(rr->arch_mon_ctx)) { @@ -537,7 +536,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, return; } - cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU); + cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); /* * cpumask_any_housekeeping() prefers housekeeping CPUs, but @@ -546,7 +545,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, * counters on some platforms if its called in IRQ context. */ if (tick_nohz_full_cpu(cpu)) - smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1); + smp_call_function_any(cpumask, mon_event_count, rr, 1); else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); @@ -556,12 +555,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; + struct rdt_domain_hdr *hdr; + struct rmid_read rr = {0}; + struct rdt_mon_domain *d; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; union mon_data_bits md; - struct rdt_domain *d; - struct rmid_read rr; int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); @@ -574,15 +574,40 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) resid = md.u.rid; domid = md.u.domid; evtid = md.u.evtid; - r = &rdt_resources_all[resid].r_resctrl; - d = rdt_find_domain(r, domid, NULL); - if (IS_ERR_OR_NULL(d)) { + + if (md.u.sum) { + /* + * This file requires summing across all domains that share + * the L3 cache id that was provided in the "domid" field of the + * mon_data_bits union. Search all domains in the resource for + * one that matches this cache id. + */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->ci->id == domid) { + rr.ci = d->ci; + mon_event_read(&rr, r, NULL, rdtgrp, + &d->ci->shared_cpu_map, evtid, false); + goto checkresult; + } + } ret = -ENOENT; goto out; + } else { + /* + * This file provides data from a single domain. Search + * the resource to find the domain with "domid". + */ + hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { + ret = -ENOENT; + goto out; + } + d = container_of(hdr, struct rdt_mon_domain, hdr); + mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false); } - mon_event_read(&rr, r, d, rdtgrp, evtid, false); +checkresult: if (rr.err == -EIO) seq_puts(m, "Error\n"); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index f1d926832ec8..955999aecfca 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -127,29 +127,54 @@ struct mon_evt { }; /** - * union mon_data_bits - Monitoring details for each event file + * union mon_data_bits - Monitoring details for each event file. * @priv: Used to store monitoring event data in @u - * as kernfs private data - * @rid: Resource id associated with the event file - * @evtid: Event id associated with the event file - * @domid: The domain to which the event file belongs - * @u: Name of the bit fields struct + * as kernfs private data. + * @u.rid: Resource id associated with the event file. + * @u.evtid: Event id associated with the event file. + * @u.sum: Set when event must be summed across multiple + * domains. + * @u.domid: When @u.sum is zero this is the domain to which + * the event file belongs. When @sum is one this + * is the id of the L3 cache that all domains to be + * summed share. + * @u: Name of the bit fields struct. */ union mon_data_bits { void *priv; struct { unsigned int rid : 10; - enum resctrl_event_id evtid : 8; + enum resctrl_event_id evtid : 7; + unsigned int sum : 1; unsigned int domid : 14; } u; }; +/** + * struct rmid_read - Data passed across smp_call*() to read event count. + * @rgrp: Resource group for which the counter is being read. If it is a parent + * resource group then its event count is summed with the count from all + * its child resource groups. + * @r: Resource describing the properties of the event being read. + * @d: Domain that the counter should be read from. If NULL then sum all + * domains in @r sharing L3 @ci.id + * @evtid: Which monitor event to read. + * @first: Initialize MBM counter when true. + * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @err: Error encountered when reading counter. + * @val: Returned value of event counter. If @rgrp is a parent resource group, + * @val includes the sum of event counts from its child resource groups. + * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id, + * (summed across child resource groups if @rgrp is a parent resource group). + * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only). + */ struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; - struct rdt_domain *d; + struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; + struct cacheinfo *ci; int err; u64 val; void *arch_mon_ctx; @@ -232,7 +257,7 @@ struct mongroup { */ struct pseudo_lock_region { struct resctrl_schema *s; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; u32 cbm; wait_queue_head_t lock_thread_wq; int thread_done; @@ -355,25 +380,41 @@ struct arch_mbm_state { }; /** - * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share - * a resource + * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share + * a resource for a control function * @d_resctrl: Properties exposed to the resctrl file system * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) + * + * Members of this structure are accessed via helpers that provide abstraction. + */ +struct rdt_hw_ctrl_domain { + struct rdt_ctrl_domain d_resctrl; + u32 *ctrl_val; +}; + +/** + * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share + * a resource for a monitor function + * @d_resctrl: Properties exposed to the resctrl file system * @arch_mbm_total: arch private state for MBM total bandwidth * @arch_mbm_local: arch private state for MBM local bandwidth * * Members of this structure are accessed via helpers that provide abstraction. */ -struct rdt_hw_domain { - struct rdt_domain d_resctrl; - u32 *ctrl_val; +struct rdt_hw_mon_domain { + struct rdt_mon_domain d_resctrl; struct arch_mbm_state *arch_mbm_total; struct arch_mbm_state *arch_mbm_local; }; -static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) +static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) { - return container_of(r, struct rdt_hw_domain, d_resctrl); + return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl); +} + +static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r) +{ + return container_of(r, struct rdt_hw_mon_domain, d_resctrl); } /** @@ -385,7 +426,7 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) */ struct msr_param { struct rdt_resource *res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; u32 low; u32 high; }; @@ -458,9 +499,9 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r } int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d); + struct rdt_ctrl_domain *d); int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d); + struct rdt_ctrl_domain *d); extern struct mutex rdtgroup_mutex; @@ -493,6 +534,8 @@ static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); + /* * To return the common struct rdt_resource, which is contained in struct * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. @@ -558,27 +601,28 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn); int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name); int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, umode_t mask); -struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, - struct list_head **pos); +struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, + struct list_head **pos); ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, bool exclusive); -unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, +unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d, unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); -struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); +struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r); +struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r); int closids_supported(void); void closid_free(int closid); int alloc_rmid(u32 closid); @@ -589,19 +633,19 @@ bool __init rdt_cpu_has(int flag); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first); -void mbm_setup_overflow_handler(struct rdt_domain *dom, + struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + cpumask_t *cpumask, int evtid, int first); +void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void mbm_handle_overflow(struct work_struct *work); void __init intel_rdt_mbm_apply_quirk(void); bool is_mba_sc(struct rdt_resource *r); -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void cqm_handle_limbo(struct work_struct *work); -bool has_busy_rmid(struct rdt_domain *d); -void __check_limbo(struct rdt_domain *d, bool force_free); +bool has_busy_rmid(struct rdt_mon_domain *d); +void __check_limbo(struct rdt_mon_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); void __init thread_throttle_mode_init(void); void __init mbm_config_rftype_init(const char *config); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 366f496ca3ce..851b561850e0 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -15,6 +15,8 @@ * Software Developer Manual June 2016, volume 3, section 17.17. */ +#define pr_fmt(fmt) "resctrl: " fmt + #include <linux/cpu.h> #include <linux/module.h> #include <linux/sizes.h> @@ -97,6 +99,8 @@ unsigned int resctrl_rmid_realloc_limit; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) +static int snc_nodes_per_l3_cache = 1; + /* * The correction factor table is documented in Documentation/arch/x86/resctrl.rst. * If rmid > rmid threshold, MBM total and local values should be multiplied @@ -185,7 +189,43 @@ static inline struct rmid_entry *__rmid_entry(u32 idx) return entry; } -static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +/* + * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by + * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is + * needed. The physical RMID is the same as the logical RMID. + * + * On a platform with SNC mode enabled, Linux enables RMID sharing mode + * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel + * Resource Director Technology Architecture Specification" for a full + * description of RMID sharing mode). + * + * In RMID sharing mode there are fewer "logical RMID" values available + * to accumulate data ("physical RMIDs" are divided evenly between SNC + * nodes that share an L3 cache). Linux creates an rdt_mon_domain for + * each SNC node. + * + * The value loaded into IA32_PQR_ASSOC is the "logical RMID". + * + * Data is collected independently on each SNC node and can be retrieved + * using the "physical RMID" value computed by this function and loaded + * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node. + * + * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3 + * cache. So a "physical RMID" may be read from any CPU that shares + * the L3 cache with the desired SNC node, not just from a CPU in + * the specific SNC node. + */ +static int logical_rmid_to_physical_rmid(int cpu, int lrmid) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + + if (snc_nodes_per_l3_cache == 1) + return lrmid; + + return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid; +} + +static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val) { u64 msr_val; @@ -197,7 +237,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) * are error bits. */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid); rdmsrl(MSR_IA32_QM_CTR, msr_val); if (msr_val & RMID_VAL_ERROR) @@ -209,7 +249,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) return 0; } -static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) { @@ -228,19 +268,22 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, return NULL; } -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; + u32 prmid; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { memset(am, 0, sizeof(*am)); + prmid = logical_rmid_to_physical_rmid(cpu, rmid); /* Record any initial, non-zero count value. */ - __rmid_read(rmid, eventid, &am->prev_msr); + __rmid_read_phys(prmid, eventid, &am->prev_msr); } } @@ -248,9 +291,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, * Assumes that hardware counters are also reset and thus that there is * no need to record initial non-zero counts. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); if (is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, @@ -269,22 +312,22 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >> shift; } -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; u64 msr_val, chunks; + u32 prmid; int ret; resctrl_arch_rmid_read_context_check(); - if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) - return -EINVAL; - - ret = __rmid_read(rmid, eventid, &msr_val); + prmid = logical_rmid_to_physical_rmid(cpu, rmid); + ret = __rmid_read_phys(prmid, eventid, &msr_val); if (ret) return ret; @@ -320,7 +363,7 @@ static void limbo_release_entry(struct rmid_entry *entry) * decrement the count. If the busy count gets to zero on an RMID, we * free the RMID */ -void __check_limbo(struct rdt_domain *d, bool force_free) +void __check_limbo(struct rdt_mon_domain *d, bool force_free) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -364,7 +407,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free) * CLOSID and RMID because there may be dependencies between them * on some architectures. */ - trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val); + trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val); } if (force_free || !rmid_dirty) { @@ -378,7 +421,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free) resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx); } -bool has_busy_rmid(struct rdt_domain *d) +bool has_busy_rmid(struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -479,7 +522,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_domain *d; + struct rdt_mon_domain *d; u32 idx; lockdep_assert_held(&rdtgroup_mutex); @@ -490,7 +533,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid); entry->busy = 0; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { /* * For the first limbo RMID in the domain, * setup up the limbo worker. @@ -532,7 +575,7 @@ void free_rmid(u32 closid, u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, +static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id evtid) { u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); @@ -549,7 +592,10 @@ static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { + int cpu = smp_processor_id(); + struct rdt_mon_domain *d; struct mbm_state *m; + int err, ret; u64 tval = 0; if (rr->first) { @@ -560,14 +606,47 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) return 0; } - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid, - &tval, rr->arch_mon_ctx); - if (rr->err) - return rr->err; + if (rr->d) { + /* Reading a single domain, must be on a CPU in that domain. */ + if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask)) + return -EINVAL; + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); + if (rr->err) + return rr->err; - rr->val += tval; + rr->val += tval; - return 0; + return 0; + } + + /* Summing domains that share a cache, must be on a CPU for that cache. */ + if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + return -EINVAL; + + /* + * Legacy files must report the sum of an event across all + * domains that share the same L3 cache instance. + * Report success if a read from any domain succeeds, -EINVAL + * (translated to "Unavailable" for user space) if reading from + * all domains fail for any reason. + */ + ret = -EINVAL; + list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { + if (d->ci->id != rr->ci->id) + continue; + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); + if (!err) { + rr->val += tval; + ret = 0; + } + } + + if (ret) + rr->err = ret; + + return ret; } /* @@ -668,12 +747,12 @@ void mon_event_count(void *info) * throttle MSRs already have low percentage values. To avoid * unnecessarily restricting such rdtgroups, we also increase the bandwidth. */ -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; + struct rdt_ctrl_domain *dom_mba; struct rdt_resource *r_mba; - struct rdt_domain *dom_mba; u32 cur_bw, user_bw, idx; struct list_head *head; struct rdtgroup *entry; @@ -688,7 +767,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) idx = resctrl_arch_rmid_idx_encode(closid, rmid); pmbm_data = &dom_mbm->mbm_local[idx]; - dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba); + dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba); if (!dom_mba) { pr_warn_once("Failure to get domain for MBA update\n"); return; @@ -734,12 +813,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); } -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, +static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid) { - struct rmid_read rr; + struct rmid_read rr = {0}; - rr.first = false; rr.r = r; rr.d = d; @@ -792,17 +870,17 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, void cqm_handle_limbo(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); - struct rdt_domain *d; + struct rdt_mon_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - d = container_of(work, struct rdt_domain, cqm_limbo.work); + d = container_of(work, struct rdt_mon_domain, cqm_limbo.work); __check_limbo(d, false); if (has_busy_rmid(d)) { - d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo, delay); @@ -820,13 +898,13 @@ void cqm_handle_limbo(struct work_struct *work) * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->cqm_work_cpu = cpu; if (cpu < nr_cpu_ids) @@ -837,9 +915,9 @@ void mbm_handle_overflow(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); struct rdtgroup *prgrp, *crgrp; + struct rdt_mon_domain *d; struct list_head *head; struct rdt_resource *r; - struct rdt_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -852,7 +930,7 @@ void mbm_handle_overflow(struct work_struct *work) goto out_unlock; r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - d = container_of(work, struct rdt_domain, mbm_over.work); + d = container_of(work, struct rdt_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { mbm_update(r, d, prgrp->closid, prgrp->mon.rmid); @@ -869,7 +947,7 @@ void mbm_handle_overflow(struct work_struct *work) * Re-check for housekeeping CPUs. This allows the overflow handler to * move off a nohz_full CPU quickly. */ - d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay); @@ -886,7 +964,7 @@ out_unlock: * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, +void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); @@ -898,7 +976,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, */ if (!resctrl_mounted || !resctrl_arch_mon_capable()) return; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->mbm_work_cpu = cpu; if (cpu < nr_cpu_ids) @@ -1015,6 +1093,88 @@ static void l3_mon_evt_init(struct rdt_resource *r) list_add_tail(&mbm_local_event.list, &r->evt_list); } +/* + * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1 + * which indicates that RMIDs are configured in legacy mode. + * This mode is incompatible with Linux resctrl semantics + * as RMIDs are partitioned between SNC nodes, which requires + * a user to know which RMID is allocated to a task. + * Clearing bit 0 reconfigures the RMID counters for use + * in RMID sharing mode. This mode is better for Linux. + * The RMID space is divided between all SNC nodes with the + * RMIDs renumbered to start from zero in each node when + * counting operations from tasks. Code to read the counters + * must adjust RMID counter numbers based on SNC node. See + * logical_rmid_to_physical_rmid() for code that does this. + */ +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + if (snc_nodes_per_l3_cache > 1) + msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); +} + +/* CPU models that support MSR_RMID_SNC_CONFIG */ +static const struct x86_cpu_id snc_cpu_ids[] __initconst = { + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + {} +}; + +/* + * There isn't a simple hardware bit that indicates whether a CPU is running + * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the + * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in + * the same NUMA node as CPU0. + * It is not possible to accurately determine SNC state if the system is + * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes + * to L3 caches. It will be OK if system is booted with hyperthreading + * disabled (since this doesn't affect the ratio). + */ +static __init int snc_get_config(void) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); + const cpumask_t *node0_cpumask; + int cpus_per_node, cpus_per_l3; + int ret; + + if (!x86_match_cpu(snc_cpu_ids) || !ci) + return 1; + + cpus_read_lock(); + if (num_online_cpus() != num_present_cpus()) + pr_warn("Some CPUs offline, SNC detection may be incorrect\n"); + cpus_read_unlock(); + + node0_cpumask = cpumask_of_node(cpu_to_node(0)); + + cpus_per_node = cpumask_weight(node0_cpumask); + cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map); + + if (!cpus_per_node || !cpus_per_l3) + return 1; + + ret = cpus_per_l3 / cpus_per_node; + + /* sanity check: Only valid results are 1, 2, 3, 4 */ + switch (ret) { + case 1: + break; + case 2 ... 4: + pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret); + rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE; + break; + default: + pr_warn("Ignore improbable SNC node count %d\n", ret); + ret = 1; + break; + } + + return ret; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; @@ -1022,9 +1182,11 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int threshold; int ret; + snc_nodes_per_l3_cache = snc_get_config(); + resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; - hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; - r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache; + r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index aacf236dfe3b..e69489d48625 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -11,7 +11,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/debugfs.h> @@ -221,7 +220,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) int cpu; int ret; - for_each_cpu(cpu, &plr->d->cpu_mask) { + for_each_cpu(cpu, &plr->d->hdr.cpu_mask) { pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); if (!pm_req) { rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); @@ -292,12 +291,15 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) */ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) { - struct cpu_cacheinfo *ci; + enum resctrl_scope scope = plr->s->res->ctrl_scope; + struct cacheinfo *ci; int ret; - int i; + + if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE)) + return -ENODEV; /* Pick the first cpu we find that is associated with the cache. */ - plr->cpu = cpumask_first(&plr->d->cpu_mask); + plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask); if (!cpu_online(plr->cpu)) { rdt_last_cmd_printf("CPU %u associated with cache not online\n", @@ -306,15 +308,11 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) goto out_region; } - ci = get_cpu_cacheinfo(plr->cpu); - - plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); - - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == plr->s->res->cache_level) { - plr->line_size = ci->info_list[i].coherency_line_size; - return 0; - } + ci = get_cpu_cacheinfo_level(plr->cpu, scope); + if (ci) { + plr->line_size = ci->coherency_line_size; + plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); + return 0; } ret = -1; @@ -810,7 +808,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) { unsigned int cbm_len; unsigned long cbm_b; @@ -837,11 +835,11 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm * if it is not possible to test due to memory allocation issue, * false otherwise. */ -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) { + struct rdt_ctrl_domain *d_i; cpumask_var_t cpu_with_psl; struct rdt_resource *r; - struct rdt_domain *d_i; bool ret = false; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -855,10 +853,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * associated with them. */ for_each_alloc_capable_rdt_resource(r) { - list_for_each_entry(d_i, &r->domains, list) { + list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) { if (d_i->plr) cpumask_or(cpu_with_psl, cpu_with_psl, - &d_i->cpu_mask); + &d_i->hdr.cpu_mask); } } @@ -866,7 +864,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * Next test if new pseudo-locked region would intersect with * existing region. */ - if (cpumask_intersects(&d->cpu_mask, cpu_with_psl)) + if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl)) ret = true; free_cpumask_var(cpu_with_psl); @@ -1198,7 +1196,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) } plr->thread_done = 0; - cpu = cpumask_first(&plr->d->cpu_mask); + cpu = cpumask_first(&plr->d->hdr.cpu_mask); if (!cpu_online(cpu)) { ret = -ENODEV; goto out; @@ -1528,7 +1526,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 02f213f1c51c..d7163b764c62 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/debugfs.h> #include <linux/fs.h> @@ -92,13 +91,13 @@ void rdt_last_cmd_printf(const char *fmt, ...) void rdt_staged_configs_clear(void) { + struct rdt_ctrl_domain *dom; struct rdt_resource *r; - struct rdt_domain *dom; lockdep_assert_held(&rdtgroup_mutex); for_each_alloc_capable_rdt_resource(r) { - list_for_each_entry(dom, &r->domains, list) + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) memset(dom->staged_config, 0, sizeof(dom->staged_config)); } } @@ -317,7 +316,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, rdt_last_cmd_puts("Cache domain offline\n"); ret = -ENODEV; } else { - mask = &rdtgrp->plr->d->cpu_mask; + mask = &rdtgrp->plr->d->hdr.cpu_mask; seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", cpumask_pr_args(mask)); @@ -1012,7 +1011,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, unsigned long sw_shareable = 0, hw_shareable = 0; unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_resource *r = s->res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; bool sep = false; @@ -1021,12 +1020,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, cpus_read_lock(); mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(seq, ';'); sw_shareable = 0; exclusive = 0; - seq_printf(seq, "%d=", dom->id); + seq_printf(seq, "%d=", dom->hdr.id); for (i = 0; i < closids_supported(); i++) { if (!closid_allocated(i)) continue; @@ -1243,7 +1242,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, * * Return: false if CBM does not overlap, true if it does. */ -static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, enum resctrl_conf_type type, bool exclusive) { @@ -1298,7 +1297,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d * * Return: true if CBM overlap detected, false if there is no overlap */ -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, bool exclusive) { enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); @@ -1329,10 +1328,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; + struct rdt_ctrl_domain *d; struct resctrl_schema *s; struct rdt_resource *r; bool has_cache = false; - struct rdt_domain *d; u32 ctrl; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -1343,7 +1342,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) continue; has_cache = true; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { ctrl = resctrl_arch_get_config(r, d, closid, s->conf_type); if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { @@ -1448,20 +1447,19 @@ out: * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, unsigned long cbm) + struct rdt_ctrl_domain *d, unsigned long cbm) { - struct cpu_cacheinfo *ci; unsigned int size = 0; - int num_b, i; + struct cacheinfo *ci; + int num_b; + + if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE)) + return size; num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == r->cache_level) { - size = ci->info_list[i].size / r->cache.cbm_len * num_b; - break; - } - } + ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope); + if (ci) + size = ci->size / r->cache.cbm_len * num_b; return size; } @@ -1477,9 +1475,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, { struct resctrl_schema *schema; enum resctrl_conf_type type; + struct rdt_ctrl_domain *d; struct rdtgroup *rdtgrp; struct rdt_resource *r; - struct rdt_domain *d; unsigned int size; int ret = 0; u32 closid; @@ -1503,7 +1501,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, rdtgrp->plr->d, rdtgrp->plr->cbm); - seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); + seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size); } goto out; } @@ -1515,7 +1513,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, type = schema->conf_type; sep = false; seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { @@ -1533,7 +1531,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, else size = rdtgroup_cbm_to_size(r, d, ctrl); } - seq_printf(s, "%d=%u", d->id, size); + seq_printf(s, "%d=%u", d->hdr.id, size); sep = true; } seq_putc(s, '\n'); @@ -1591,21 +1589,21 @@ static void mon_event_config_read(void *info) mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS; } -static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info) +static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info) { - smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1); + smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1); } static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { struct mon_config_info mon_info = {0}; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; bool sep = false; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->mon_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -1613,7 +1611,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid mon_info.evtid = evtid; mondata_config_read(dom, &mon_info); - seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config); + seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config); sep = true; } seq_puts(s, "\n"); @@ -1658,7 +1656,7 @@ static void mon_event_config_write(void *info) } static void mbm_config_write_domain(struct rdt_resource *r, - struct rdt_domain *d, u32 evtid, u32 val) + struct rdt_mon_domain *d, u32 evtid, u32 val) { struct mon_config_info mon_info = {0}; @@ -1679,7 +1677,7 @@ static void mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->cpu_mask, mon_event_config_write, + smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write, &mon_info, 1); /* @@ -1699,7 +1697,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); char *dom_str = NULL, *id_str; unsigned long dom_id, val; - struct rdt_domain *d; + struct rdt_mon_domain *d; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); @@ -1729,8 +1727,8 @@ next: return -EINVAL; } - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->hdr.id == dom_id) { mbm_config_write_domain(r, d, evtid, val); goto next; } @@ -2258,9 +2256,9 @@ static inline bool is_mba_linear(void) static int set_cache_qos_cfg(int level, bool enable) { void (*update)(void *arg); + struct rdt_ctrl_domain *d; struct rdt_resource *r_l; cpumask_var_t cpu_mask; - struct rdt_domain *d; int cpu; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -2277,14 +2275,14 @@ static int set_cache_qos_cfg(int level, bool enable) return -ENOMEM; r_l = &rdt_resources_all[level].r_resctrl; - list_for_each_entry(d, &r_l->domains, list) { + list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) { if (r_l->cache.arch_has_per_cpu_cfg) /* Pick all the CPUs in the domain instance */ - for_each_cpu(cpu, &d->cpu_mask) + for_each_cpu(cpu, &d->hdr.cpu_mask) cpumask_set_cpu(cpu, cpu_mask); else /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); } /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ @@ -2310,10 +2308,10 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) l3_qos_cfg_update(&hw_res->cdp_enabled); } -static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d) { u32 num_closid = resctrl_arch_get_num_closid(r); - int cpu = cpumask_any(&d->cpu_mask); + int cpu = cpumask_any(&d->hdr.cpu_mask); int i; d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), @@ -2328,7 +2326,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) } static void mba_sc_domain_destroy(struct rdt_resource *r, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { kfree(d->mbps_val); d->mbps_val = NULL; @@ -2336,14 +2334,18 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, /* * MBA software controller is supported only if - * MBM is supported and MBA is in linear scale. + * MBM is supported and MBA is in linear scale, + * and the MBM monitor scope is the same as MBA + * control scope. */ static bool supports_mba_mbps(void) { + struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; return (is_mbm_local_enabled() && - r->alloc_capable && is_mba_linear()); + r->alloc_capable && is_mba_linear() && + r->ctrl_scope == rmbm->mon_scope); } /* @@ -2354,7 +2356,7 @@ static int set_mba_sc(bool mba_sc) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; u32 num_closid = resctrl_arch_get_num_closid(r); - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int i; if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) @@ -2362,7 +2364,7 @@ static int set_mba_sc(bool mba_sc) r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { for (i = 0; i < num_closid; i++) d->mbps_val[i] = MBA_MAX_MBPS; } @@ -2626,7 +2628,7 @@ static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); unsigned long flags = RFTYPE_CTRL_BASE; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; struct rdt_resource *r; int ret; @@ -2701,7 +2703,7 @@ static int rdt_get_tree(struct fs_context *fc) if (is_mbm_enabled()) { r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - list_for_each_entry(dom, &r->domains, list) + list_for_each_entry(dom, &r->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } @@ -2751,6 +2753,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct rdt_fs_context *ctx = rdt_fc2context(fc); struct fs_parse_result result; + const char *msg; int opt; opt = fs_parse(fc, rdt_fs_parameters, param, &result); @@ -2765,8 +2768,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->enable_cdpl2 = true; return 0; case Opt_mba_mbps: + msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope"; if (!supports_mba_mbps()) - return -EINVAL; + return invalfc(fc, msg); ctx->enable_mba_mbps = true; return 0; case Opt_debug: @@ -2811,9 +2815,9 @@ static int rdt_init_fs_context(struct fs_context *fc) static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct msr_param msr_param; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int i; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -2825,16 +2829,16 @@ static int reset_all_ctrls(struct rdt_resource *r) /* * Disable resource control for this resource by setting all - * CBMs in all domains to the maximum mask value. Pick one CPU + * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU * from each domain to update the MSRs below. */ - list_for_each_entry(d, &r->domains, list) { - hw_dom = resctrl_to_arch_dom(d); + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + hw_dom = resctrl_to_arch_ctrl_dom(d); for (i = 0; i < hw_res->num_closid; i++) hw_dom->ctrl_val[i] = r->default_ctrl; msr_param.dom = d; - smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); + smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } return 0; @@ -3002,62 +3006,126 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name, return ret; } +static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname) +{ + struct kernfs_node *kn; + + kn = kernfs_find_and_get(pkn, name); + if (!kn) + return; + kernfs_put(kn); + + if (kn->dir.subdirs <= 1) + kernfs_remove(kn); + else + kernfs_remove_by_name(kn, subname); +} + /* * Remove all subdirectories of mon_data of ctrl_mon groups - * and monitor groups with given domain id. + * and monitor groups for the given domain. + * Remove files and directories containing "sum" of domain data + * when last domain being summed is removed. */ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - unsigned int dom_id) + struct rdt_mon_domain *d) { struct rdtgroup *prgrp, *crgrp; + char subname[32]; + bool snc_mode; char name[32]; + snc_mode = r->mon_scope == RESCTRL_L3_NODE; + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + if (snc_mode) + sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - sprintf(name, "mon_%s_%02d", r->name, dom_id); - kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); + mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname); list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) - kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); + mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname); } } -static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, - struct rdt_domain *d, - struct rdt_resource *r, struct rdtgroup *prgrp) +static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, + struct rdt_resource *r, struct rdtgroup *prgrp, + bool do_sum) { + struct rmid_read rr = {0}; union mon_data_bits priv; - struct kernfs_node *kn; struct mon_evt *mevt; - struct rmid_read rr; - char name[32]; int ret; - sprintf(name, "mon_%s_%02d", r->name, d->id); - /* create the directory */ - kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) - goto out_destroy; - - if (WARN_ON(list_empty(&r->evt_list))) { - ret = -EPERM; - goto out_destroy; - } + if (WARN_ON(list_empty(&r->evt_list))) + return -EPERM; priv.u.rid = r->rid; - priv.u.domid = d->id; + priv.u.domid = do_sum ? d->ci->id : d->hdr.id; + priv.u.sum = do_sum; list_for_each_entry(mevt, &r->evt_list, list) { priv.u.evtid = mevt->evtid; ret = mon_addfile(kn, mevt->name, priv.priv); if (ret) + return ret; + + if (!do_sum && is_mbm_event(mevt->evtid)) + mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); + } + + return 0; +} + +static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, + struct rdt_mon_domain *d, + struct rdt_resource *r, struct rdtgroup *prgrp) +{ + struct kernfs_node *kn, *ckn; + char name[32]; + bool snc_mode; + int ret = 0; + + lockdep_assert_held(&rdtgroup_mutex); + + snc_mode = r->mon_scope == RESCTRL_L3_NODE; + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + kn = kernfs_find_and_get(parent_kn, name); + if (kn) { + /* + * rdtgroup_mutex will prevent this directory from being + * removed. No need to keep this hold. + */ + kernfs_put(kn); + } else { + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + ret = mon_add_all_files(kn, d, r, prgrp, snc_mode); + if (ret) + goto out_destroy; + } + + if (snc_mode) { + sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id); + ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp); + if (IS_ERR(ckn)) { + ret = -EINVAL; + goto out_destroy; + } + + ret = rdtgroup_kn_set_ugid(ckn); + if (ret) goto out_destroy; - if (is_mbm_event(mevt->evtid)) - mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); + ret = mon_add_all_files(ckn, d, r, prgrp, false); + if (ret) + goto out_destroy; } + kernfs_activate(kn); return 0; @@ -3071,7 +3139,7 @@ out_destroy: * and "monitor" groups with given domain id. */ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) + struct rdt_mon_domain *d) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; @@ -3093,13 +3161,13 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, struct rdt_resource *r, struct rdtgroup *prgrp) { - struct rdt_domain *dom; + struct rdt_mon_domain *dom; int ret; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->mon_domains, hdr.list) { ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); if (ret) return ret; @@ -3198,7 +3266,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) * Set the RDT domain up to start off with all usable allocations. That is, * all shareable and unused bits. All-zero CBM is invalid. */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, +static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s, u32 closid) { enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); @@ -3258,7 +3326,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, */ tmp_cbm = cfg->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id); return -ENOSPC; } cfg->have_new_ctrl = true; @@ -3278,10 +3346,10 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, */ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int ret; - list_for_each_entry(d, &s->res->domains, list) { + list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) { ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; @@ -3294,9 +3362,9 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (is_mba_sc(r)) { d->mbps_val[closid] = MBA_MAX_MBPS; continue; @@ -3920,29 +3988,33 @@ static void __init rdtgroup_setup_default(void) mutex_unlock(&rdtgroup_mutex); } -static void domain_destroy_mon_state(struct rdt_domain *d) +static void domain_destroy_mon_state(struct rdt_mon_domain *d) { bitmap_free(d->rmid_busy_llc); kfree(d->mbm_total); kfree(d->mbm_local); } -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) { mutex_lock(&rdtgroup_mutex); if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) mba_sc_domain_destroy(r, d); - if (!r->mon_capable) - goto out_unlock; + mutex_unlock(&rdtgroup_mutex); +} + +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + mutex_lock(&rdtgroup_mutex); /* * If resctrl is mounted, remove all the * per domain monitor data directories. */ if (resctrl_mounted && resctrl_arch_mon_capable()) - rmdir_mondata_subdir_allrdtgrp(r, d->id); + rmdir_mondata_subdir_allrdtgrp(r, d); if (is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); @@ -3961,11 +4033,10 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) domain_destroy_mon_state(d); -out_unlock: mutex_unlock(&rdtgroup_mutex); } -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; @@ -3996,7 +4067,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) return 0; } -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) { int err = 0; @@ -4005,11 +4076,18 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { /* RDT_RESOURCE_MBA is never mon_capable */ err = mba_sc_domain_allocate(r, d); - goto out_unlock; } - if (!r->mon_capable) - goto out_unlock; + mutex_unlock(&rdtgroup_mutex); + + return err; +} + +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + int err; + + mutex_lock(&rdtgroup_mutex); err = domain_setup_mon_state(r, d); if (err) @@ -4060,8 +4138,8 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) void resctrl_offline_cpu(unsigned int cpu) { struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; - struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { @@ -4074,7 +4152,7 @@ void resctrl_offline_cpu(unsigned int cpu) if (!l3->mon_capable) goto out_unlock; - d = get_domain_from_cpu(cpu, l3); + d = get_mon_domain_from_cpu(cpu, l3); if (d) { if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 11f83d07925e..00189cdeb775 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -41,80 +41,97 @@ #define CPUID_VMWARE_INFO_LEAF 0x40000000 #define CPUID_VMWARE_FEATURES_LEAF 0x40000010 -#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) -#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) -#define VMWARE_HYPERVISOR_MAGIC 0x564D5868 - -#define VMWARE_CMD_GETVERSION 10 -#define VMWARE_CMD_GETHZ 45 -#define VMWARE_CMD_GETVCPU_INFO 68 -#define VMWARE_CMD_LEGACY_X2APIC 3 -#define VMWARE_CMD_VCPU_RESERVED 31 -#define VMWARE_CMD_STEALCLOCK 91 +#define GETVCPU_INFO_LEGACY_X2APIC BIT(3) +#define GETVCPU_INFO_VCPU_RESERVED BIT(31) #define STEALCLOCK_NOT_AVAILABLE (-1) #define STEALCLOCK_DISABLED 0 #define STEALCLOCK_ENABLED 1 -#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx), %%eax" : \ - "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ - "a"(VMWARE_HYPERVISOR_MAGIC), \ - "c"(VMWARE_CMD_##cmd), \ - "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \ - "memory") - -#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \ - __asm__("vmcall" : \ - "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ - "a"(VMWARE_HYPERVISOR_MAGIC), \ - "c"(VMWARE_CMD_##cmd), \ - "d"(0), "b"(UINT_MAX) : \ - "memory") - -#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \ - __asm__("vmmcall" : \ - "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ - "a"(VMWARE_HYPERVISOR_MAGIC), \ - "c"(VMWARE_CMD_##cmd), \ - "d"(0), "b"(UINT_MAX) : \ - "memory") - -#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \ - switch (vmware_hypercall_mode) { \ - case CPUID_VMWARE_FEATURES_ECX_VMCALL: \ - VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \ - break; \ - case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \ - VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \ - break; \ - default: \ - VMWARE_PORT(cmd, eax, ebx, ecx, edx); \ - break; \ - } \ - } while (0) - struct vmware_steal_time { union { - uint64_t clock; /* stolen time counter in units of vtsc */ + u64 clock; /* stolen time counter in units of vtsc */ struct { /* only for little-endian */ - uint32_t clock_low; - uint32_t clock_high; + u32 clock_low; + u32 clock_high; }; }; - uint64_t reserved[7]; + u64 reserved[7]; }; static unsigned long vmware_tsc_khz __ro_after_init; static u8 vmware_hypercall_mode __ro_after_init; +unsigned long vmware_hypercall_slow(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5) +{ + unsigned long out0, rbx, rcx, rdx, rsi, rdi; + + switch (vmware_hypercall_mode) { + case CPUID_VMWARE_FEATURES_ECX_VMCALL: + asm_inline volatile ("vmcall" + : "=a" (out0), "=b" (rbx), "=c" (rcx), + "=d" (rdx), "=S" (rsi), "=D" (rdi) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + break; + case CPUID_VMWARE_FEATURES_ECX_VMMCALL: + asm_inline volatile ("vmmcall" + : "=a" (out0), "=b" (rbx), "=c" (rcx), + "=d" (rdx), "=S" (rsi), "=D" (rdi) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + break; + default: + asm_inline volatile ("movw %[port], %%dx; inl (%%dx), %%eax" + : "=a" (out0), "=b" (rbx), "=c" (rcx), + "=d" (rdx), "=S" (rsi), "=D" (rdi) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + break; + } + + if (out1) + *out1 = rbx; + if (out2) + *out2 = rcx; + if (out3) + *out3 = rdx; + if (out4) + *out4 = rsi; + if (out5) + *out5 = rdi; + + return out0; +} + static inline int __vmware_platform(void) { - uint32_t eax, ebx, ecx, edx; - VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx); - return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; + u32 eax, ebx, ecx; + + eax = vmware_hypercall3(VMWARE_CMD_GETVERSION, 0, &ebx, &ecx); + return eax != UINT_MAX && ebx == VMWARE_HYPERVISOR_MAGIC; } static unsigned long vmware_get_tsc_khz(void) @@ -166,21 +183,12 @@ static void __init vmware_cyc2ns_setup(void) pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset); } -static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2) +static int vmware_cmd_stealclock(u32 addr_hi, u32 addr_lo) { - uint32_t result, info; - - asm volatile (VMWARE_HYPERCALL : - "=a"(result), - "=c"(info) : - "a"(VMWARE_HYPERVISOR_MAGIC), - "b"(0), - "c"(VMWARE_CMD_STEALCLOCK), - "d"(0), - "S"(arg1), - "D"(arg2) : - "memory"); - return result; + u32 info; + + return vmware_hypercall5(VMWARE_CMD_STEALCLOCK, 0, 0, addr_hi, addr_lo, + &info); } static bool stealclock_enable(phys_addr_t pa) @@ -215,15 +223,15 @@ static bool vmware_is_stealclock_available(void) * Return: * The steal clock reading in ns. */ -static uint64_t vmware_steal_clock(int cpu) +static u64 vmware_steal_clock(int cpu) { struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu); - uint64_t clock; + u64 clock; if (IS_ENABLED(CONFIG_64BIT)) clock = READ_ONCE(steal->clock); else { - uint32_t initial_high, low, high; + u32 initial_high, low, high; do { initial_high = READ_ONCE(steal->clock_high); @@ -235,7 +243,7 @@ static uint64_t vmware_steal_clock(int cpu) high = READ_ONCE(steal->clock_high); } while (initial_high != high); - clock = ((uint64_t)high << 32) | low; + clock = ((u64)high << 32) | low; } return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul, @@ -389,13 +397,13 @@ static void __init vmware_set_capabilities(void) static void __init vmware_platform_setup(void) { - uint32_t eax, ebx, ecx, edx; - uint64_t lpj, tsc_khz; + u32 eax, ebx, ecx; + u64 lpj, tsc_khz; - VMWARE_CMD(GETHZ, eax, ebx, ecx, edx); + eax = vmware_hypercall3(VMWARE_CMD_GETHZ, UINT_MAX, &ebx, &ecx); if (ebx != UINT_MAX) { - lpj = tsc_khz = eax | (((uint64_t)ebx) << 32); + lpj = tsc_khz = eax | (((u64)ebx) << 32); do_div(tsc_khz, 1000); WARN_ON(tsc_khz >> 32); pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", @@ -446,7 +454,7 @@ static u8 __init vmware_select_hypercall(void) * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode * intentionally defaults to 0. */ -static uint32_t __init vmware_platform(void) +static u32 __init vmware_platform(void) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { unsigned int eax; @@ -474,12 +482,65 @@ static uint32_t __init vmware_platform(void) /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ static bool __init vmware_legacy_x2apic_available(void) { - uint32_t eax, ebx, ecx, edx; - VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx); - return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) && - (eax & BIT(VMWARE_CMD_LEGACY_X2APIC)); + u32 eax; + + eax = vmware_hypercall1(VMWARE_CMD_GETVCPU_INFO, 0); + return !(eax & GETVCPU_INFO_VCPU_RESERVED) && + (eax & GETVCPU_INFO_LEGACY_X2APIC); } +#ifdef CONFIG_INTEL_TDX_GUEST +/* + * TDCALL[TDG.VP.VMCALL] uses %rax (arg0) and %rcx (arg2). Therefore, + * we remap those registers to %r12 and %r13, respectively. + */ +unsigned long vmware_tdx_hypercall(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5) +{ + struct tdx_module_args args = {}; + + if (!hypervisor_is_type(X86_HYPER_VMWARE)) { + pr_warn_once("Incorrect usage\n"); + return ULONG_MAX; + } + + if (cmd & ~VMWARE_CMD_MASK) { + pr_warn_once("Out of range command %lx\n", cmd); + return ULONG_MAX; + } + + args.rbx = in1; + args.rdx = in3; + args.rsi = in4; + args.rdi = in5; + args.r10 = VMWARE_TDX_VENDOR_LEAF; + args.r11 = VMWARE_TDX_HCALL_FUNC; + args.r12 = VMWARE_HYPERVISOR_MAGIC; + args.r13 = cmd; + /* CPL */ + args.r15 = 0; + + __tdx_hypercall(&args); + + if (out1) + *out1 = args.rbx; + if (out2) + *out2 = args.r13; + if (out3) + *out3 = args.rdx; + if (out4) + *out4 = args.rsi; + if (out5) + *out5 = args.rdi; + + return args.r12; +} +EXPORT_SYMBOL_GPL(vmware_tdx_hypercall); +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f06501445cd9..340af8155658 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif + + /* + * Non-crash kexec calls enc_kexec_begin() while scheduling is still + * active. This allows the callback to wait until all in-flight + * shared<->private conversions are complete. In a crash scenario, + * enc_kexec_begin() gets called after all but one CPU have been shut + * down and interrupts have been disabled. This allows the callback to + * detect a race with the conversion and report it. + */ + x86_platform.guest.enc_kexec_begin(); + x86_platform.guest.enc_kexec_finish(); + crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 8e3c53b4d070..64280879c68c 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -83,7 +83,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev) ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (ret) - return ret; + return pcibios_err_to_errno(ret); if (!pin) return 0; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 68b09f718f10..4893d30ce438 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -828,7 +828,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) /* * Find the highest page frame number we have available */ -static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type) +static unsigned long __init e820__end_ram_pfn(unsigned long limit_pfn) { int i; unsigned long last_pfn = 0; @@ -839,7 +839,8 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type unsigned long start_pfn; unsigned long end_pfn; - if (entry->type != type) + if (entry->type != E820_TYPE_RAM && + entry->type != E820_TYPE_ACPI) continue; start_pfn = entry->addr >> PAGE_SHIFT; @@ -865,12 +866,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type unsigned long __init e820__end_of_ram_pfn(void) { - return e820_end_pfn(MAX_ARCH_PFN, E820_TYPE_RAM); + return e820__end_ram_pfn(MAX_ARCH_PFN); } unsigned long __init e820__end_of_low_ram_pfn(void) { - return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_TYPE_RAM); + return e820__end_ram_pfn(1UL << (32 - PAGE_SHIFT)); } static void __init early_panic(char *msg) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 05df04f39628..2ee0b9c53dcc 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -106,21 +106,17 @@ static inline u64 xfeatures_mask_independent(void) * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT * supports modified optimization which is not supported by XSAVE. * - * We use XSAVE as a fallback. - * - * The 661 label is defined in the ALTERNATIVE* macros as the address of the - * original instruction which gets replaced. We need to use it here as the - * address of the instruction where we might get an exception at. + * Use XSAVE as a fallback. */ #define XSTATE_XSAVE(st, lmask, hmask, err) \ - asm volatile(ALTERNATIVE_3(XSAVE, \ + asm volatile("1: " ALTERNATIVE_3(XSAVE, \ XSAVEOPT, X86_FEATURE_XSAVEOPT, \ XSAVEC, X86_FEATURE_XSAVEC, \ XSAVES, X86_FEATURE_XSAVES) \ "\n" \ "xor %[err], %[err]\n" \ "3:\n" \ - _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \ + _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG, %[err]) \ : [err] "=r" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -130,11 +126,11 @@ static inline u64 xfeatures_mask_independent(void) * XSAVE area format. */ #define XSTATE_XRESTORE(st, lmask, hmask) \ - asm volatile(ALTERNATIVE(XRSTOR, \ + asm volatile("1: " ALTERNATIVE(XRSTOR, \ XRSTORS, X86_FEATURE_XSAVES) \ "\n" \ "3:\n" \ - _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \ + _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FPU_RESTORE) \ : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b8441147eb5e..f63f8fd00a91 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -835,6 +835,13 @@ void __noreturn stop_this_cpu(void *dummy) */ cpumask_clear_cpu(cpu, &cpus_stop_mask); +#ifdef CONFIG_SMP + if (smp_ops.stop_this_cpu) { + smp_ops.stop_this_cpu(); + unreachable(); + } +#endif + for (;;) { /* * Use native_halt() so that memory contents don't change diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f3130f762784..0e0a4cf6b5eb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include <linux/delay.h> #include <linux/objtool.h> #include <linux/pgtable.h> +#include <linux/kexec.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void) void native_machine_shutdown(void) { + /* + * Call enc_kexec_begin() while all CPUs are still active and + * interrupts are enabled. This will allow all in-flight memory + * conversions to finish cleanly. + */ + if (kexec_in_progress) + x86_platform.guest.enc_kexec_begin(); + /* Stop the cpus and apics */ #ifdef CONFIG_X86_IO_APIC /* @@ -752,6 +761,9 @@ void native_machine_shutdown(void) #ifdef CONFIG_X86_64 x86_platform.iommu_shutdown(); #endif + + if (kexec_in_progress) + x86_platform.guest.enc_kexec_finish(); } static void __machine_emergency_restart(int emergency) @@ -868,6 +880,12 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); + + if (smp_ops.stop_this_cpu) { + smp_ops.stop_this_cpu(); + unreachable(); + } + /* Assume hlt works */ halt(); for (;;) diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 56cab1bb25f5..042c9a0334e9 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -5,6 +5,8 @@ */ #include <linux/linkage.h> +#include <linux/stringify.h> +#include <asm/alternative.h> #include <asm/page_types.h> #include <asm/kexec.h> #include <asm/processor-flags.h> @@ -145,16 +147,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) * Set cr4 to a known state: * - physical address extension enabled * - 5-level paging, if it was enabled before + * - Machine check exception on TDX guest, if it was enabled before. + * Clearing MCE might not be allowed in TDX guests, depending on setup. + * + * Use R13 that contains the original CR4 value, read in relocate_kernel(). + * PAE is always set in the original CR4. */ - movl $X86_CR4_PAE, %eax - testq $X86_CR4_LA57, %r13 - jz 1f - orl $X86_CR4_LA57, %eax -1: - movq %rax, %cr4 - - jmp 1f -1: + andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d + ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST + movq %r13, %cr4 /* Flush the TLB (needed?) */ movq %r9, %cr3 @@ -165,9 +166,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) * used by kexec. Flush the caches before copying the kernel. */ testq %r12, %r12 - jz 1f + jz .Lsme_off wbinvd -1: +.Lsme_off: movq %rcx, %r11 call swap_pages @@ -187,7 +188,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) */ testq %r11, %r11 - jnz 1f + jnz .Lrelocate xorl %eax, %eax xorl %ebx, %ebx xorl %ecx, %ecx @@ -208,7 +209,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) ret int3 -1: +.Lrelocate: popq %rdx leaq PAGE_SIZE(%r10), %rsp ANNOTATE_RETPOLINE_SAFE diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 05c5aa951da7..728927e4ba51 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -165,6 +165,7 @@ unsigned long saved_video_mode; static char __initdata command_line[COMMAND_LINE_SIZE]; #ifdef CONFIG_CMDLINE_BOOL static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +bool builtin_cmdline_added __ro_after_init; #endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) @@ -765,6 +766,7 @@ void __init setup_arch(char **cmdline_p) strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); } #endif + builtin_cmdline_added = true; #endif strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e42faa792c07..52e1f3f0b361 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -27,25 +27,7 @@ unsigned long profile_pc(struct pt_regs *regs) { - unsigned long pc = instruction_pointer(regs); - - if (!user_mode(regs) && in_lock_functions(pc)) { -#ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); -#else - unsigned long *sp = (unsigned long *)regs->sp; - /* - * Return address is either directly at stack pointer - * or above a saved flags. Eflags has bits 22-31 zero, - * kernel addresses don't. - */ - if (sp[0] >> 22) - return sp[0]; - if (sp[1] >> 22) - return sp[1]; -#endif - } - return pc; + return instruction_pointer(regs); } EXPORT_SYMBOL(profile_pc); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 06b170759e5b..d4462fb26299 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -50,9 +50,9 @@ int tsc_clocksource_reliable; static int __read_mostly tsc_force_recalibrate; -static u32 art_to_tsc_numerator; -static u32 art_to_tsc_denominator; -static u64 art_to_tsc_offset; +static struct clocksource_base art_base_clk = { + .id = CSID_X86_ART, +}; static bool have_art; struct cyc2ns { @@ -1074,7 +1074,7 @@ core_initcall(cpufreq_register_tsc_scaling); */ static void __init detect_art(void) { - unsigned int unused[2]; + unsigned int unused; if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) return; @@ -1089,13 +1089,14 @@ static void __init detect_art(void) tsc_async_resets) return; - cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, - &art_to_tsc_numerator, unused, unused+1); + cpuid(ART_CPUID_LEAF, &art_base_clk.denominator, + &art_base_clk.numerator, &art_base_clk.freq_khz, &unused); - if (art_to_tsc_denominator < ART_MIN_DENOMINATOR) + art_base_clk.freq_khz /= KHZ; + if (art_base_clk.denominator < ART_MIN_DENOMINATOR) return; - rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset); + rdmsrl(MSR_IA32_TSC_ADJUST, art_base_clk.offset); /* Make this sticky over multiple CPU init calls */ setup_force_cpu_cap(X86_FEATURE_ART); @@ -1296,67 +1297,6 @@ int unsynchronized_tsc(void) return 0; } -/* - * Convert ART to TSC given numerator/denominator found in detect_art() - */ -struct system_counterval_t convert_art_to_tsc(u64 art) -{ - u64 tmp, res, rem; - - rem = do_div(art, art_to_tsc_denominator); - - res = art * art_to_tsc_numerator; - tmp = rem * art_to_tsc_numerator; - - do_div(tmp, art_to_tsc_denominator); - res += tmp + art_to_tsc_offset; - - return (struct system_counterval_t) { - .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC, - .cycles = res, - }; -} -EXPORT_SYMBOL(convert_art_to_tsc); - -/** - * convert_art_ns_to_tsc() - Convert ART in nanoseconds to TSC. - * @art_ns: ART (Always Running Timer) in unit of nanoseconds - * - * PTM requires all timestamps to be in units of nanoseconds. When user - * software requests a cross-timestamp, this function converts system timestamp - * to TSC. - * - * This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set - * indicating the tsc_khz is derived from CPUID[15H]. Drivers should check - * that this flag is set before conversion to TSC is attempted. - * - * Return: - * struct system_counterval_t - system counter value with the ID of the - * corresponding clocksource: - * cycles: System counter value - * cs_id: The clocksource ID for validating comparability - */ - -struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns) -{ - u64 tmp, res, rem; - - rem = do_div(art_ns, USEC_PER_SEC); - - res = art_ns * tsc_khz; - tmp = rem * tsc_khz; - - do_div(tmp, USEC_PER_SEC); - res += tmp; - - return (struct system_counterval_t) { - .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC, - .cycles = res, - }; -} -EXPORT_SYMBOL(convert_art_ns_to_tsc); - - static void tsc_refine_calibration_work(struct work_struct *work); static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); /** @@ -1458,8 +1398,10 @@ out: if (tsc_unstable) goto unreg; - if (boot_cpu_has(X86_FEATURE_ART)) + if (boot_cpu_has(X86_FEATURE_ART)) { have_art = true; + clocksource_tsc.base = &art_base_clk; + } clocksource_register_khz(&clocksource_tsc, tsc_khz); unreg: clocksource_unregister(&clocksource_tsc_early); @@ -1484,8 +1426,10 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { - if (boot_cpu_has(X86_FEATURE_ART)) + if (boot_cpu_has(X86_FEATURE_ART)) { have_art = true; + clocksource_tsc.base = &art_base_clk; + } clocksource_register_khz(&clocksource_tsc, tsc_khz); clocksource_unregister(&clocksource_tsc_early); @@ -1509,10 +1453,12 @@ static bool __init determine_cpu_tsc_frequencies(bool early) if (early) { cpu_khz = x86_platform.calibrate_cpu(); - if (tsc_early_khz) + if (tsc_early_khz) { tsc_khz = tsc_early_khz; - else + } else { tsc_khz = x86_platform.calibrate_tsc(); + clocksource_tsc.freq_khz = tsc_khz; + } } else { /* We should not be here with non-native cpu calibration */ WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3509afc6a672..6e73403e874f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -357,6 +357,9 @@ SECTIONS PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + . = ALIGN(PAGE_SIZE); /* freed after init ends here */ diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d5dc5a92635a..82b128d3f309 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -134,10 +134,12 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; -static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } +static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return 0; } +static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } +static void enc_kexec_begin_noop(void) {} +static void enc_kexec_finish_noop(void) {} static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { @@ -161,6 +163,8 @@ struct x86_platform_ops x86_platform __ro_after_init = { .enc_status_change_finish = enc_status_change_finish_noop, .enc_tlb_flush_required = enc_tlb_flush_required_noop, .enc_cache_flush_required = enc_cache_flush_required_noop, + .enc_kexec_begin = enc_kexec_begin_noop, + .enc_kexec_finish = enc_kexec_finish_noop, }, }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5d4c86133453..c8cc578646d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1069,7 +1069,7 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC - : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); + : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags)); return rc; } diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 80570eb3c89b..384da1fdd5c6 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -6,8 +6,10 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/ctype.h> + #include <asm/setup.h> #include <asm/cmdline.h> +#include <asm/bug.h> static inline int myisspace(u8 c) { @@ -205,12 +207,18 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, int cmdline_find_option_bool(const char *cmdline, const char *option) { + if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) + WARN_ON_ONCE(!builtin_cmdline_added); + return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); } int cmdline_find_option(const char *cmdline, const char *option, char *buffer, int bufsize) { + if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) + WARN_ON_ONCE(!builtin_cmdline_added); + return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); } diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a1cb3a4e6742..a314622aa093 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -44,21 +44,23 @@ or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax -.if \size != 8 jae .Lbad_get_user -.else - jae .Lbad_get_user_8 -.endif sbb %edx, %edx /* array_index_mask_nospec() */ and %edx, %eax .endif .endm +.macro UACCESS op src dst +1: \op \src,\dst + _ASM_EXTABLE_UA(1b, __get_user_handle_exception) +.endm + + .text SYM_FUNC_START(__get_user_1) check_range size=1 ASM_STAC -1: movzbl (%_ASM_AX),%edx + UACCESS movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC RET @@ -68,7 +70,7 @@ EXPORT_SYMBOL(__get_user_1) SYM_FUNC_START(__get_user_2) check_range size=2 ASM_STAC -2: movzwl (%_ASM_AX),%edx + UACCESS movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC RET @@ -78,7 +80,7 @@ EXPORT_SYMBOL(__get_user_2) SYM_FUNC_START(__get_user_4) check_range size=4 ASM_STAC -3: movl (%_ASM_AX),%edx + UACCESS movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC RET @@ -89,10 +91,11 @@ SYM_FUNC_START(__get_user_8) check_range size=8 ASM_STAC #ifdef CONFIG_X86_64 -4: movq (%_ASM_AX),%rdx + UACCESS movq (%_ASM_AX),%rdx #else -4: movl (%_ASM_AX),%edx -5: movl 4(%_ASM_AX),%ecx + xor %ecx,%ecx + UACCESS movl (%_ASM_AX),%edx + UACCESS movl 4(%_ASM_AX),%ecx #endif xor %eax,%eax ASM_CLAC @@ -104,7 +107,7 @@ EXPORT_SYMBOL(__get_user_8) SYM_FUNC_START(__get_user_nocheck_1) ASM_STAC ASM_BARRIER_NOSPEC -6: movzbl (%_ASM_AX),%edx + UACCESS movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC RET @@ -114,7 +117,7 @@ EXPORT_SYMBOL(__get_user_nocheck_1) SYM_FUNC_START(__get_user_nocheck_2) ASM_STAC ASM_BARRIER_NOSPEC -7: movzwl (%_ASM_AX),%edx + UACCESS movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC RET @@ -124,7 +127,7 @@ EXPORT_SYMBOL(__get_user_nocheck_2) SYM_FUNC_START(__get_user_nocheck_4) ASM_STAC ASM_BARRIER_NOSPEC -8: movl (%_ASM_AX),%edx + UACCESS movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC RET @@ -135,10 +138,11 @@ SYM_FUNC_START(__get_user_nocheck_8) ASM_STAC ASM_BARRIER_NOSPEC #ifdef CONFIG_X86_64 -9: movq (%_ASM_AX),%rdx + UACCESS movq (%_ASM_AX),%rdx #else -9: movl (%_ASM_AX),%edx -10: movl 4(%_ASM_AX),%ecx + xor %ecx,%ecx + UACCESS movl (%_ASM_AX),%edx + UACCESS movl 4(%_ASM_AX),%ecx #endif xor %eax,%eax ASM_CLAC @@ -154,36 +158,3 @@ SYM_CODE_START_LOCAL(__get_user_handle_exception) mov $(-EFAULT),%_ASM_AX RET SYM_CODE_END(__get_user_handle_exception) - -#ifdef CONFIG_X86_32 -SYM_CODE_START_LOCAL(__get_user_8_handle_exception) - ASM_CLAC -.Lbad_get_user_8: - xor %edx,%edx - xor %ecx,%ecx - mov $(-EFAULT),%_ASM_AX - RET -SYM_CODE_END(__get_user_8_handle_exception) -#endif - -/* get_user */ - _ASM_EXTABLE_UA(1b, __get_user_handle_exception) - _ASM_EXTABLE_UA(2b, __get_user_handle_exception) - _ASM_EXTABLE_UA(3b, __get_user_handle_exception) -#ifdef CONFIG_X86_64 - _ASM_EXTABLE_UA(4b, __get_user_handle_exception) -#else - _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception) - _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception) -#endif - -/* __get_user */ - _ASM_EXTABLE_UA(6b, __get_user_handle_exception) - _ASM_EXTABLE_UA(7b, __get_user_handle_exception) - _ASM_EXTABLE_UA(8b, __get_user_handle_exception) -#ifdef CONFIG_X86_64 - _ASM_EXTABLE_UA(9b, __get_user_handle_exception) -#else - _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception) - _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception) -#endif diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index e0411a3774d4..5eecb45d05d5 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -25,6 +25,9 @@ static __always_inline void rep_movs(void *to, const void *from, size_t n) static void string_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) { + const void *orig_to = to; + const size_t orig_n = n; + if (unlikely(!n)) return; @@ -39,7 +42,7 @@ static void string_memcpy_fromio(void *to, const volatile void __iomem *from, si } rep_movs(to, (const void *)from, n); /* KMSAN must treat values read from devices as initialized. */ - kmsan_unpoison_memory(to, n); + kmsan_unpoison_memory(orig_to, orig_n); } static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f4a7..c45127265f2f 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -4,6 +4,79 @@ * included by both the compressed kernel and the regular kernel. */ +static void free_pte(struct x86_mapping_info *info, pmd_t *pmd) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + + info->free_pgt_page(pte, info->context); +} + +static void free_pmd(struct x86_mapping_info *info, pud_t *pud) +{ + pmd_t *pmd = pmd_offset(pud, 0); + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_present(pmd[i])) + continue; + + if (pmd_leaf(pmd[i])) + continue; + + free_pte(info, &pmd[i]); + } + + info->free_pgt_page(pmd, info->context); +} + +static void free_pud(struct x86_mapping_info *info, p4d_t *p4d) +{ + pud_t *pud = pud_offset(p4d, 0); + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + if (!pud_present(pud[i])) + continue; + + if (pud_leaf(pud[i])) + continue; + + free_pmd(info, &pud[i]); + } + + info->free_pgt_page(pud, info->context); +} + +static void free_p4d(struct x86_mapping_info *info, pgd_t *pgd) +{ + p4d_t *p4d = p4d_offset(pgd, 0); + int i; + + for (i = 0; i < PTRS_PER_P4D; i++) { + if (!p4d_present(p4d[i])) + continue; + + free_pud(info, &p4d[i]); + } + + if (pgtable_l5_enabled()) + info->free_pgt_page(p4d, info->context); +} + +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd) +{ + int i; + + for (i = 0; i < PTRS_PER_PGD; i++) { + if (!pgd_present(pgd[i])) + continue; + + free_p4d(info, &pgd[i]); + } + + info->free_pgt_page(pgd, info->context); +} + static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page, unsigned long addr, unsigned long end) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7e177856ee4f..28002cc7a37d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -469,7 +469,9 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, !e820__mapped_any(paddr & PAGE_MASK, paddr_next, E820_TYPE_RAM) && !e820__mapped_any(paddr & PAGE_MASK, paddr_next, - E820_TYPE_RESERVED_KERN)) + E820_TYPE_RESERVED_KERN) && + !e820__mapped_any(paddr & PAGE_MASK, paddr_next, + E820_TYPE_ACPI)) set_pte_init(pte, __pte(0), init); continue; } @@ -524,7 +526,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, !e820__mapped_any(paddr & PMD_MASK, paddr_next, E820_TYPE_RAM) && !e820__mapped_any(paddr & PMD_MASK, paddr_next, - E820_TYPE_RESERVED_KERN)) + E820_TYPE_RESERVED_KERN) && + !e820__mapped_any(paddr & PMD_MASK, paddr_next, + E820_TYPE_ACPI)) set_pmd_init(pmd, __pmd(0), init); continue; } @@ -611,7 +615,9 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, !e820__mapped_any(paddr & PUD_MASK, paddr_next, E820_TYPE_RAM) && !e820__mapped_any(paddr & PUD_MASK, paddr_next, - E820_TYPE_RESERVED_KERN)) + E820_TYPE_RESERVED_KERN) && + !e820__mapped_any(paddr & PUD_MASK, paddr_next, + E820_TYPE_ACPI)) set_pud_init(pud, __pud(0), init); continue; } @@ -698,7 +704,9 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, !e820__mapped_any(paddr & P4D_MASK, paddr_next, E820_TYPE_RAM) && !e820__mapped_any(paddr & P4D_MASK, paddr_next, - E820_TYPE_RESERVED_KERN)) + E820_TYPE_RESERVED_KERN) && + !e820__mapped_any(paddr & P4D_MASK, paddr_next, + E820_TYPE_ACPI)) set_p4d_init(p4d, __p4d(0), init); continue; } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 422602f6039b..86a476a426c2 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -2,7 +2,7 @@ /* * AMD Memory Encryption Support * - * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> */ @@ -283,7 +283,7 @@ static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) #endif } -static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { /* * To maintain the security guarantees of SEV-SNP guests, make sure @@ -292,11 +292,11 @@ static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) snp_set_memory_shared(vaddr, npages); - return true; + return 0; } /* Return true unconditionally: return value doesn't matter for the SEV side */ -static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) { /* * After memory is mapped encrypted in the page table, validate it @@ -308,7 +308,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc); - return true; + return 0; } static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) @@ -510,6 +510,12 @@ void __init sme_early_init(void) */ x86_init.resources.dmi_setup = snp_dmi_setup; } + + /* + * Switch the SVSM CA mapping (if active) from identity mapped to + * kernel mapped. + */ + snp_update_svsm_ca(); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 19fdfbb171ed..443a97e515c0 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -662,8 +662,9 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star /* * Lookup the page table entry for a virtual address in a specific pgd. - * Return a pointer to the entry, the level of the mapping, and the effective - * NX and RW bits of all page table levels. + * Return a pointer to the entry (or NULL if the entry does not exist), + * the level of the entry, and the effective NX and RW bits of all + * page table levels. */ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address, unsigned int *level, bool *nx, bool *rw) @@ -672,13 +673,14 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address, pud_t *pud; pmd_t *pmd; - *level = PG_LEVEL_NONE; + *level = PG_LEVEL_256T; *nx = false; *rw = true; if (pgd_none(*pgd)) return NULL; + *level = PG_LEVEL_512G; *nx |= pgd_flags(*pgd) & _PAGE_NX; *rw &= pgd_flags(*pgd) & _PAGE_RW; @@ -686,10 +688,10 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address, if (p4d_none(*p4d)) return NULL; - *level = PG_LEVEL_512G; if (p4d_leaf(*p4d) || !p4d_present(*p4d)) return (pte_t *)p4d; + *level = PG_LEVEL_1G; *nx |= p4d_flags(*p4d) & _PAGE_NX; *rw &= p4d_flags(*p4d) & _PAGE_RW; @@ -697,10 +699,10 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address, if (pud_none(*pud)) return NULL; - *level = PG_LEVEL_1G; if (pud_leaf(*pud) || !pud_present(*pud)) return (pte_t *)pud; + *level = PG_LEVEL_2M; *nx |= pud_flags(*pud) & _PAGE_NX; *rw &= pud_flags(*pud) & _PAGE_RW; @@ -708,15 +710,13 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address, if (pmd_none(*pmd)) return NULL; - *level = PG_LEVEL_2M; if (pmd_leaf(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; + *level = PG_LEVEL_4K; *nx |= pmd_flags(*pmd) & _PAGE_NX; *rw &= pmd_flags(*pmd) & _PAGE_RW; - *level = PG_LEVEL_4K; - return pte_offset_kernel(pmd, address); } @@ -736,9 +736,8 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, * Lookup the page table entry for a virtual address. Return a pointer * to the entry and the level of the mapping. * - * Note: We return pud and pmd either when the entry is marked large - * or when the present bit is not set. Otherwise we would return a - * pointer to a nonexisting mapping. + * Note: the function returns p4d, pud or pmd either when the entry is marked + * large or when the present bit is not set. Otherwise it returns NULL. */ pte_t *lookup_address(unsigned long address, unsigned int *level) { @@ -2196,7 +2195,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); /* Notify hypervisor that we are about to set/clr encryption attribute. */ - if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) + ret = x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); + if (ret) goto vmm_fail; ret = __change_page_attr_set_clr(&cpa, 1); @@ -2214,24 +2214,61 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) return ret; /* Notify hypervisor that we have successfully set/clr encryption attribute. */ - if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc)) + ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc); + if (ret) goto vmm_fail; return 0; vmm_fail: - WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s.\n", - (void *)addr, numpages, enc ? "private" : "shared"); + WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s: %d\n", + (void *)addr, numpages, enc ? "private" : "shared", ret); + + return ret; +} + +/* + * The lock serializes conversions between private and shared memory. + * + * It is taken for read on conversion. A write lock guarantees that no + * concurrent conversions are in progress. + */ +static DECLARE_RWSEM(mem_enc_lock); + +/* + * Stop new private<->shared conversions. + * + * Taking the exclusive mem_enc_lock waits for in-flight conversions to complete. + * The lock is not released to prevent new conversions from being started. + */ +bool set_memory_enc_stop_conversion(void) +{ + /* + * In a crash scenario, sleep is not allowed. Try to take the lock. + * Failure indicates that there is a race with the conversion. + */ + if (oops_in_progress) + return down_write_trylock(&mem_enc_lock); + + down_write(&mem_enc_lock); - return -EIO; + return true; } static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return __set_memory_enc_pgtable(addr, numpages, enc); + int ret = 0; - return 0; + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + if (!down_read_trylock(&mem_enc_lock)) + return -EBUSY; + + ret = __set_memory_enc_pgtable(addr, numpages, enc); + + up_read(&mem_enc_lock); + } + + return ret; } int set_memory_encrypted(unsigned long addr, int numpages) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 8edd62206604..b433b1753016 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -216,7 +216,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, } static const struct x86_cpu_id intel_mid_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, NULL), + X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, NULL), {} }; @@ -233,9 +233,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) return 0; ret = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); - if (ret < 0) { + if (ret) { dev_warn(&dev->dev, "Failed to read interrupt line: %d\n", ret); - return ret; + return pcibios_err_to_errno(ret); } id = x86_match_cpu(intel_mid_cpu_ids); @@ -243,7 +243,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) model = id->model; switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT_MID: + case VFM_MODEL(INTEL_ATOM_SILVERMONT_MID): polarity_low = false; /* Special treatment for IRQ0 */ diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 652cd53e77f6..0f2fe524f60d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -38,10 +38,10 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev) u8 gsi; rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); - if (rc < 0) { + if (rc) { dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", rc); - return rc; + return pcibios_err_to_errno(rc); } /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ pirq = gsi; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 7be71c2cdc83..f83bbe0acd4a 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -22,6 +22,7 @@ #include <asm/mpspec_def.h> #include <asm/hw_irq.h> #include <asm/apic.h> +#include <asm/cpu_device_id.h> #include <asm/io_apic.h> #include <asm/intel-mid.h> #include <asm/io.h> @@ -55,9 +56,8 @@ static void __init intel_mid_time_init(void) static void intel_mid_arch_setup(void) { - switch (boot_cpu_data.x86_model) { - case 0x3C: - case 0x4A: + switch (boot_cpu_data.x86_vfm) { + case INTEL_ATOM_SILVERMONT_MID: x86_platform.legacy.rtc = 1; break; default: diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index fdd49d70b437..c81cea208c2c 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -62,7 +62,7 @@ static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr) fail_read: dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); - return result; + return pcibios_err_to_errno(result); } static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr) @@ -91,7 +91,7 @@ static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr) fail_write: dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); - return result; + return pcibios_err_to_errno(result); } int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 89df5d89d664..51655133eee3 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -9,6 +9,10 @@ #include <linux/cache.h> #include <asm/syscall.h> +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); + /* * Below you can see, in terms of #define's, the differences between the x86-64 * and the UML syscall table. @@ -22,15 +26,13 @@ #define sys_vm86 sys_ni_syscall #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) +#define __SYSCALL_NORETURN __SYSCALL #define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include <asm/syscalls_32.h> +#undef __SYSCALL -#undef __SYSCALL #define __SYSCALL(nr, sym) sym, - -extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { #include <asm/syscalls_32.h> }; diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index b0b4cfd2308c..943d414f2109 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -9,6 +9,10 @@ #include <linux/cache.h> #include <asm/syscall.h> +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); + /* * Below you can see, in terms of #define's, the differences between the x86-64 * and the UML syscall table. @@ -18,14 +22,13 @@ #define sys_iopl sys_ni_syscall #define sys_ioperm sys_ni_syscall +#define __SYSCALL_NORETURN __SYSCALL + #define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include <asm/syscalls_64.h> +#undef __SYSCALL -#undef __SYSCALL #define __SYSCALL(nr, sym) sym, - -extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); - const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { #include <asm/syscalls_64.h> }; diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 0ae10535c699..0ce17766c0e5 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -120,7 +120,7 @@ static __init void snp_enable(void *arg) bool snp_probe_rmptable_info(void) { - u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; + u64 rmp_sz, rmp_base, rmp_end; rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); rdmsrl(MSR_AMD64_RMP_END, rmp_end); @@ -137,28 +137,11 @@ bool snp_probe_rmptable_info(void) rmp_sz = rmp_end - rmp_base + 1; - /* - * Calculate the amount the memory that must be reserved by the BIOS to - * address the whole RAM, including the bookkeeping area. The RMP itself - * must also be covered. - */ - max_rmp_pfn = max_pfn; - if (PHYS_PFN(rmp_end) > max_pfn) - max_rmp_pfn = PHYS_PFN(rmp_end); - - calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; - - if (calc_rmp_sz > rmp_sz) { - pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", - calc_rmp_sz, rmp_sz); - return false; - } - probed_rmp_base = rmp_base; probed_rmp_size = rmp_sz; pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", - probed_rmp_base, probed_rmp_base + probed_rmp_size - 1); + rmp_base, rmp_end); return true; } @@ -206,9 +189,8 @@ void __init snp_fixup_e820_tables(void) */ static int __init snp_rmptable_init(void) { + u64 max_rmp_pfn, calc_rmp_sz, rmptable_size, rmp_end, val; void *rmptable_start; - u64 rmptable_size; - u64 val; if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; @@ -219,10 +201,28 @@ static int __init snp_rmptable_init(void) if (!probed_rmp_size) goto nosnp; + rmp_end = probed_rmp_base + probed_rmp_size - 1; + + /* + * Calculate the amount the memory that must be reserved by the BIOS to + * address the whole RAM, including the bookkeeping area. The RMP itself + * must also be covered. + */ + max_rmp_pfn = max_pfn; + if (PFN_UP(rmp_end) > max_pfn) + max_rmp_pfn = PFN_UP(rmp_end); + + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; + if (calc_rmp_sz > probed_rmp_size) { + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", + calc_rmp_sz, probed_rmp_size); + goto nosnp; + } + rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); if (!rmptable_start) { pr_err("Failed to map RMP table\n"); - return 1; + goto nosnp; } /* diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 49a1c6890b55..4e2b2e2ac9f9 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -33,7 +33,7 @@ #include <asm/msr.h> #include <asm/cpufeature.h> #include <asm/tdx.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/mce.h> #include "tdx.h" @@ -1426,9 +1426,9 @@ static void __init check_tdx_erratum(void) * private memory poisons that memory, and a subsequent read of * that memory triggers #MC. */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_EMERALDRAPIDS_X: + switch (boot_cpu_data.x86_vfm) { + case INTEL_SAPPHIRERAPIDS_X: + case INTEL_EMERALDRAPIDS_X: setup_force_cpu_bug(X86_BUG_TDX_PW_MCE); } } diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 08010dbf5e09..df275d554788 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -19,7 +19,7 @@ struct task_struct; -static inline struct task_struct *get_current(void) +static __always_inline struct task_struct *get_current(void) { return current_thread_info()->task; } diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 326db1c1d5d8..e0dffcc43b9e 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -91,7 +91,7 @@ struct thread_info { } /* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) +static __always_inline struct thread_info *current_thread_info(void) { struct thread_info *ti; __asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t" diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index defc67909a9c..d6d2b533a574 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -263,6 +263,9 @@ static const struct proc_ops simdisk_proc_ops = { static int __init simdisk_setup(struct simdisk *dev, int which, struct proc_dir_entry *procdir) { + struct queue_limits lim = { + .features = BLK_FEAT_ROTATIONAL, + }; char tmp[2] = { '0' + which, 0 }; int err; @@ -271,7 +274,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, spin_lock_init(&dev->lock); dev->users = 0; - dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE); + dev->gd = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(dev->gd)) { err = PTR_ERR(dev->gd); goto out; |