diff options
Diffstat (limited to 'arch/x86/kernel')
37 files changed, 411 insertions, 204 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2624de16cd7a..8dcbf6890714 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -935,6 +935,9 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE, SMP_CACHE_BYTES); + if (!hpet_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 0c26b1b44e51..4203d4f0c68d 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -90,7 +90,7 @@ ret_point: .data ALIGN ENTRY(saved_magic) .long 0 -ENTRY(saved_eip) .long 0 +saved_eip: .long 0 # saved registers saved_idt: .long 0,0 diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 50b8ed0317a3..510fa12aab73 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -125,12 +125,12 @@ ENTRY(do_suspend_lowlevel) ENDPROC(do_suspend_lowlevel) .data -ENTRY(saved_rbp) .quad 0 -ENTRY(saved_rsi) .quad 0 -ENTRY(saved_rdi) .quad 0 -ENTRY(saved_rbx) .quad 0 +saved_rbp: .quad 0 +saved_rsi: .quad 0 +saved_rdi: .quad 0 +saved_rbx: .quad 0 -ENTRY(saved_rip) .quad 0 -ENTRY(saved_rsp) .quad 0 +saved_rip: .quad 0 +saved_rsp: .quad 0 ENTRY(saved_magic) .quad 0 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ebeac487a20c..9a79c7808f9c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -11,6 +11,7 @@ #include <linux/stop_machine.h> #include <linux/slab.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -393,10 +394,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, continue; } - DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", + DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", a->cpuid >> 5, a->cpuid & 0x1f, - instr, a->instrlen, + instr, instr, a->instrlen, replacement, a->replacementlen, a->padlen); DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); @@ -764,8 +765,8 @@ int poke_int3_handler(struct pt_regs *regs) regs->ip = (unsigned long) bp_int3_handler; return 1; - } +NOKPROBE_SYMBOL(poke_int3_handler); /** * text_poke_bp() -- update instructions on live kernel on SMP diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2953bbf05c08..53aa234a6803 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -812,6 +812,7 @@ static int irq_polarity(int idx) return IOAPIC_POL_HIGH; case MP_IRQPOL_RESERVED: pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); + /* fall through */ case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_POL_LOW; @@ -859,6 +860,7 @@ static int irq_trigger(int idx) return IOAPIC_EDGE; case MP_IRQTRIG_RESERVED: pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); + /* fall through */ case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_LEVEL; @@ -2579,6 +2581,8 @@ static struct resource * __init ioapic_setup_resources(void) n *= nr_ioapics; mem = memblock_alloc(n, SMP_CACHE_BYTES); + if (!mem) + panic("%s: Failed to allocate %lu bytes\n", __func__, n); res = (void *)mem; mem += sizeof(struct resource) * nr_ioapics; @@ -2623,6 +2627,9 @@ fake_ioapic_page: #endif ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!ioapic_phys) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index a555da094157..1e225528f0d7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -27,6 +27,7 @@ #include <linux/crash_dump.h> #include <linux/reboot.h> #include <linux/memory.h> +#include <linux/numa.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -1390,7 +1391,7 @@ static void __init build_socket_tables(void) } /* Set socket -> node values: */ - lnid = -1; + lnid = NUMA_NO_NODE; for_each_present_cpu(cpu) { int nid = cpu_to_node(cpu); int apicid, sockid; @@ -1521,7 +1522,7 @@ static void __init uv_system_init_hub(void) new_hub->pnode = 0xffff; new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); - new_hub->memory_nid = -1; + new_hub->memory_nid = NUMA_NO_NODE; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } @@ -1538,7 +1539,7 @@ static void __init uv_system_init_hub(void) uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; - if (uv_cpu_hub_info(cpu)->memory_nid == -1) + if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); /* Init memoryless node: */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 69f6bbb41be0..01004bfb1a1b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -819,11 +819,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects - * all up to and including B1. - */ - if (c->x86_model <= 1 && c->x86_stepping <= 1) + + /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01874d54f4fd..2da82eff0eb4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -798,15 +798,25 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); + task_clear_spec_ssb_noexec(task); task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); + task_clear_spec_ssb_noexec(task); task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); + task_clear_spec_ssb_noexec(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE_NOEXEC: + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_noexec(task); task_update_spec_tif(task); break; default: @@ -885,6 +895,8 @@ static int ssb_prctl_get(struct task_struct *task) case SPEC_STORE_BYPASS_PRCTL: if (task_spec_ssb_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_noexec(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC; if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index c4d1023fb0ab..395d46f78582 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -248,6 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, switch (leaf) { case 1: l1 = &l1i; + /* fall through */ case 0: if (!l1->val) return; diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 3fed38812eea..6dd78d8235e4 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -48,3 +48,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) return NULL; } EXPORT_SYMBOL(x86_match_cpu); + +static const struct x86_cpu_desc * +x86_match_cpu_with_stepping(const struct x86_cpu_desc *match) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + const struct x86_cpu_desc *m; + + for (m = match; m->x86_family | m->x86_model; m++) { + if (c->x86_vendor != m->x86_vendor) + continue; + if (c->x86 != m->x86_family) + continue; + if (c->x86_model != m->x86_model) + continue; + if (c->x86_stepping != m->x86_stepping) + continue; + return m; + } + return NULL; +} + +bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table) +{ + const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table); + + if (!res || res->x86_microcode_rev > boot_cpu_data.microcode) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 89298c83de53..e64de5149e50 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = { [SMCA_FP] = { "floating_point", "Floating Point Unit" }, [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, + [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, [SMCA_UMC] = { "umc", "Unified Memory Controller" }, [SMCA_PB] = { "param_block", "Parameter Block" }, [SMCA_PSP] = { "psp", "Platform Security Processor" }, + [SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, + [SMCA_SMU_V2] = { "smu", "System Management Unit" }, + [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, + [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, + [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, }; static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = @@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, /* ZN Core (HWID=0xB0) MCA types */ - { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, + { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, /* HWID 0xB0 MCATYPE 0x4 is Reserved */ - { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF }, + { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF }, { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, /* Data Fabric MCA types */ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, - { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF }, + { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F }, + { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF }, /* Unified Memory Controller MCA type */ - { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F }, + { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF }, /* Parameter Block MCA type */ { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, /* Platform Security Processor MCA type */ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, + { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF }, /* System Management Unit MCA type */ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, + { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF }, + + /* Microprocessor 5 Unit MCA type */ + { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF }, + + /* Northbridge IO Unit MCA type */ + { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F }, + + /* PCI Express Unit MCA type */ + { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F }, }; struct smca_bank smca_banks[MAX_NR_BANKS]; @@ -545,6 +563,40 @@ out: return offset; } +/* + * Turn off MC4_MISC thresholding banks on all family 0x15 models since + * they're not supported there. + */ +void disable_err_thresholding(struct cpuinfo_x86 *c) +{ + int i; + u64 hwcr; + bool need_toggle; + u32 msrs[] = { + 0x00000413, /* MC4_MISC0 */ + 0xc0000408, /* MC4_MISC1 */ + }; + + if (c->x86 != 0x15) + return; + + rdmsrl(MSR_K7_HWCR, hwcr); + + /* McStatusWrEn has to be set */ + need_toggle = !(hwcr & BIT(18)); + + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); + + /* Clear CntP bit safely */ + for (i = 0; i < ARRAY_SIZE(msrs); i++) + msr_clear_bit(msrs[i], 62); + + /* restore old settings */ + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; + disable_err_thresholding(c); + for (bank = 0; bank < mca_cfg.banks; ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index 1d9b3ce662a0..c038e5c00a59 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); #define CPER_CREATOR_MCE \ - UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ - 0x64, 0x90, 0xb8, 0x9d) + GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) #define CPER_SECTION_TYPE_MCE \ - UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ - 0x04, 0x4a, 0x38, 0xfc) + GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) /* * CPER specification (in UEFI specification 2.3 appendix N) requires @@ -135,7 +135,7 @@ retry: goto out; /* try to skip other type records in storage */ else if (rc != sizeof(rcd) || - uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) + !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE)) goto retry; memcpy(m, &rcd.mce, sizeof(*m)); rc = sizeof(*m); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6ce290c506d9..b7fb541a4873 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 0x15 && c->x86_model <= 0xf) mce_flags.overflow_recov = 1; - /* - * Turn off MC4_MISC thresholding banks on those models since - * they're not supported there. - */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { - int i; - u64 hwcr; - bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; - - rdmsrl(MSR_K7_HWCR, hwcr); - - /* McStatusWrEn has to be set */ - need_toggle = !(hwcr & BIT(18)); - - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); - - /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) - msr_clear_bit(msrs[i], 62); - - /* restore old settings */ - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr); - } } if (c->x86_vendor == X86_VENDOR_INTEL) { diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dc3e26e905a3..65201e180fe0 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -165,6 +165,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), KERNEL ), + MCESEV( + PANIC, "Instruction fetch error in kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e81a2db42df7..3fa238a137d2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; # endif + + /* + * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, + * set x2apic destination mode to physcial mode when x2apic is available + * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs + * have 8-bit APIC id. + */ +# ifdef CONFIG_X86_X2APIC + if (x2apic_supported()) + x2apic_phys = 1; +# endif + #endif } diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 3668c5df90c6..5bd011737272 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -296,7 +296,7 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, unsigned long sizek) { unsigned long hole_basek, hole_sizek; - unsigned long second_basek, second_sizek; + unsigned long second_sizek; unsigned long range0_basek, range0_sizek; unsigned long range_basek, range_sizek; unsigned long chunk_sizek; @@ -304,7 +304,6 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, hole_basek = 0; hole_sizek = 0; - second_basek = 0; second_sizek = 0; chunk_sizek = state->chunk_sizek; gran_sizek = state->gran_sizek; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 822b7db634ee..e49b77283924 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/kernfs.h> +#include <linux/fs_context.h> #include <linux/jump_label.h> #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -40,6 +41,21 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) + +struct rdt_fs_context { + struct kernfs_fs_context kfc; + bool enable_cdpl2; + bool enable_cdpl3; + bool enable_mba_mbps; +}; + +static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct rdt_fs_context, kfc); +} + DECLARE_STATIC_KEY_FALSE(rdt_enable_key); /** diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 14bed6af8377..604c0e3bcc83 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -34,13 +34,6 @@ #include "pseudo_lock_event.h" /* - * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware - * prefetcher state. Details about this register can be found in the MSR - * tables for specific platforms found in Intel's SDM. - */ -#define MSR_MISC_FEATURE_CONTROL 0x000001a4 - -/* * The bits needed to disable hardware prefetching varies based on the * platform. During initialization we will discover which bits to use. */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 8388adf241b2..399601eda8e4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -24,6 +24,7 @@ #include <linux/cpu.h> #include <linux/debugfs.h> #include <linux/fs.h> +#include <linux/fs_parser.h> #include <linux/sysfs.h> #include <linux/kernfs.h> #include <linux/seq_buf.h> @@ -32,6 +33,7 @@ #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/task_work.h> +#include <linux/user_namespace.h> #include <uapi/linux/magic.h> @@ -1858,46 +1860,6 @@ static void cdp_disable_all(void) cdpl2_disable(); } -static int parse_rdtgroupfs_options(char *data) -{ - char *token, *o = data; - int ret = 0; - - while ((token = strsep(&o, ",")) != NULL) { - if (!*token) { - ret = -EINVAL; - goto out; - } - - if (!strcmp(token, "cdp")) { - ret = cdpl3_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "cdpl2")) { - ret = cdpl2_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "mba_MBps")) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - ret = set_mba_sc(true); - else - ret = -EINVAL; - if (ret) - goto out; - } else { - ret = -EINVAL; - goto out; - } - } - - return 0; - -out: - pr_err("Invalid mount option \"%s\"\n", token); - - return ret; -} - /* * We don't allow rdtgroup directories to be created anywhere * except the root directory. Thus when looking for the rdtgroup @@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, struct kernfs_node **mon_data_kn); -static struct dentry *rdt_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int rdt_enable_ctx(struct rdt_fs_context *ctx) +{ + int ret = 0; + + if (ctx->enable_cdpl2) + ret = cdpl2_enable(); + + if (!ret && ctx->enable_cdpl3) + ret = cdpl3_enable(); + + if (!ret && ctx->enable_mba_mbps) + ret = set_mba_sc(true); + + return ret; +} + +static int rdt_get_tree(struct fs_context *fc) { + struct rdt_fs_context *ctx = rdt_fc2context(fc); struct rdt_domain *dom; struct rdt_resource *r; - struct dentry *dentry; int ret; cpus_read_lock(); @@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, * resctrl file system can only be mounted once. */ if (static_branch_unlikely(&rdt_enable_key)) { - dentry = ERR_PTR(-EBUSY); + ret = -EBUSY; goto out; } - ret = parse_rdtgroupfs_options(data); - if (ret) { - dentry = ERR_PTR(ret); + ret = rdt_enable_ctx(ctx); + if (ret < 0) goto out_cdp; - } closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); - if (ret) { - dentry = ERR_PTR(ret); - goto out_cdp; - } + if (ret < 0) + goto out_mba; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, NULL, "mon_groups", &kn_mongrp); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_info; - } kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_mongrp; - } kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } ret = rdt_pseudo_lock_init(); - if (ret) { - dentry = ERR_PTR(ret); + if (ret) goto out_mondata; - } - dentry = kernfs_mount(fs_type, flags, rdt_root, - RDTGROUP_SUPER_MAGIC, NULL); - if (IS_ERR(dentry)) + ret = kernfs_get_tree(fc); + if (ret < 0) goto out_psl; if (rdt_alloc_capable) @@ -2059,14 +2024,95 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_mba: + if (ctx->enable_mba_mbps) + set_mba_sc(false); out_cdp: cdp_disable_all(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); + return ret; +} + +enum rdt_param { + Opt_cdp, + Opt_cdpl2, + Opt_mba_mpbs, + nr__rdt_params +}; + +static const struct fs_parameter_spec rdt_param_specs[] = { + fsparam_flag("cdp", Opt_cdp), + fsparam_flag("cdpl2", Opt_cdpl2), + fsparam_flag("mba_mpbs", Opt_mba_mpbs), + {} +}; + +static const struct fs_parameter_description rdt_fs_parameters = { + .name = "rdt", + .specs = rdt_param_specs, +}; + +static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, &rdt_fs_parameters, param, &result); + if (opt < 0) + return opt; - return dentry; + switch (opt) { + case Opt_cdp: + ctx->enable_cdpl3 = true; + return 0; + case Opt_cdpl2: + ctx->enable_cdpl2 = true; + return 0; + case Opt_mba_mpbs: + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return -EINVAL; + ctx->enable_mba_mbps = true; + return 0; + } + + return -EINVAL; +} + +static void rdt_fs_context_free(struct fs_context *fc) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static const struct fs_context_operations rdt_fs_context_ops = { + .free = rdt_fs_context_free, + .parse_param = rdt_parse_param, + .get_tree = rdt_get_tree, +}; + +static int rdt_init_fs_context(struct fs_context *fc) +{ + struct rdt_fs_context *ctx; + + ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->kfc.root = rdt_root; + ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; + fc->fs_private = &ctx->kfc; + fc->ops = &rdt_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(&init_user_ns); + fc->global = true; + return 0; } static int reset_all_ctrls(struct rdt_resource *r) @@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb) } static struct file_system_type rdt_fs_type = { - .name = "resctrl", - .mount = rdt_mount, - .kill_sb = rdt_kill_sb, + .name = "resctrl", + .init_fs_context = rdt_init_fs_context, + .parameters = &rdt_fs_parameters, + .kill_sb = rdt_kill_sb, }; static int mon_addfile(struct kernfs_node *parent_kn, const char *name, diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 50895c2f937d..2879e234e193 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -14,6 +14,7 @@ #include <linux/acpi.h> #include <linux/firmware-map.h> #include <linux/sort.h> +#include <linux/memory_hotplug.h> #include <asm/e820/api.h> #include <asm/setup.h> @@ -671,21 +672,18 @@ __init void e820__reallocate_tables(void) int size; size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries; - n = kmalloc(size, GFP_KERNEL); + n = kmemdup(e820_table, size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820_table, size); e820_table = n; size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries; - n = kmalloc(size, GFP_KERNEL); + n = kmemdup(e820_table_kexec, size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820_table_kexec, size); e820_table_kexec = n; size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; - n = kmalloc(size, GFP_KERNEL); + n = kmemdup(e820_table_firmware, size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820_table_firmware, size); e820_table_firmware = n; } @@ -778,7 +776,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) { u64 addr; - addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); + addr = memblock_phys_alloc(size, align); if (addr) { e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n"); @@ -881,6 +879,10 @@ static int __init parse_memopt(char *p) e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); +#ifdef CONFIG_MEMORY_HOTPLUG + max_mem_size = mem_size; +#endif + return 0; } early_param("mem", parse_memopt); @@ -1095,6 +1097,9 @@ void __init e820__reserve_resources(void) res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*res) * e820_table->nr_entries); e820_res = res; for (i = 0; i < e820_table->nr_entries; i++) { diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 374a52fa5296..9b33904251a9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -388,10 +388,6 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_EARLY_PRINTK_EFI - if (!strncmp(buf, "efi", 3)) - early_console_register(&early_efi_console, keep); -#endif #ifdef CONFIG_EARLY_PRINTK_USB_XDBC if (!strncmp(buf, "xdbc", 4)) early_xdbc_parse_parameter(buf + 4); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9cc108456d0b..d7432c2b1051 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -669,7 +669,7 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size) return false; } -static int init_xstate_size(void) +static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ unsigned int possible_xstate_size; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8257a59704ae..ef49517f6bb2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -49,7 +49,7 @@ int ftrace_arch_code_modify_post_process(void) union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; struct { - unsigned char e8; + unsigned char op; int offset; } __attribute__((packed)); }; @@ -59,20 +59,23 @@ static int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char * +ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; - calc.e8 = 0xe8; + calc.op = op; calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - /* - * No locking needed, this must be called via kstop_machine - * which in essence is like running on a uniprocessor machine. - */ return calc.code; } +static unsigned char * +ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + return ftrace_text_replace(0xe8, ip, addr); +} + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -269,7 +272,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -static int is_ftrace_caller(unsigned long ip) +static nokprobe_inline int is_ftrace_caller(unsigned long ip) { if (ip == ftrace_update_func) return 1; @@ -299,6 +302,7 @@ int ftrace_int3_handler(struct pt_regs *regs) return 1; } +NOKPROBE_SYMBOL(ftrace_int3_handler); static int ftrace_write(unsigned long ip, const char *val, int size) { @@ -664,22 +668,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER) -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) -{ - static union ftrace_code_union calc; - - /* Jmp not a call (ignore the .e8) */ - calc.e8 = 0xe9; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - - /* - * ftrace external locks synchronize the access to the static variable. - */ - return calc.code; -} -#endif - /* Currently only x86_64 supports dynamic trampolines */ #ifdef CONFIG_X86_64 @@ -891,8 +879,8 @@ static void *addr_from_call(void *ptr) return NULL; /* Make sure this is a call */ - if (WARN_ON_ONCE(calc.e8 != 0xe8)) { - pr_warn("Expected e8, got %x\n", calc.e8); + if (WARN_ON_ONCE(calc.op != 0xe8)) { + pr_warn("Expected e8, got %x\n", calc.op); return NULL; } @@ -963,6 +951,11 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); +static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) +{ + return ftrace_text_replace(0xe9, ip, addr); +} + static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 34a5c1715148..ff9bfd40429e 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -261,12 +261,8 @@ static int arch_build_bp_info(struct perf_event *bp, * allow kernel breakpoints at all. */ if (attr->bp_addr >= TASK_SIZE_MAX) { -#ifdef CONFIG_KPROBES if (within_kprobe_blacklist(attr->bp_addr)) return -EINVAL; -#else - return -EINVAL; -#endif } hw->type = X86_BREAKPOINT_EXECUTE; @@ -279,6 +275,7 @@ static int arch_build_bp_info(struct perf_event *bp, hw->len = X86_BREAKPOINT_LEN_X; return 0; } + /* fall through */ default: return -EINVAL; } diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 53917a3ebf94..22f60dd26460 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -218,6 +218,9 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->screen_info.ext_mem_k = 0; params->alt_mem_k = 0; + /* Always fill in RSDP: it is either 0 or a valid value */ + params->acpi_rsdp_addr = boot_params.acpi_rsdp_addr; + /* Default APM info */ memset(¶ms->apm_bios_info, 0, sizeof(params->apm_bios_info)); @@ -256,7 +259,6 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz, efi_setup_data_offset); #endif - /* Setup EDD info */ memcpy(params->eddbuf, boot_params.eddbuf, EDDMAXNR * sizeof(struct edd_info)); @@ -536,9 +538,17 @@ static int bzImage64_cleanup(void *loader_data) #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) { - return verify_pefile_signature(kernel, kernel_len, - VERIFY_USE_SECONDARY_KEYRING, - VERIFYING_KEXEC_PE_SIGNATURE); + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + } + return ret; } #endif diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5db08425063e..4ff6b4cdb941 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -467,6 +467,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->ip = addr; + /* fall through */ case 'D': case 'k': /* clear the trace bit */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4ba75afba527..a034cb808e7e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1028,6 +1028,13 @@ NOKPROBE_SYMBOL(kprobe_fault_handler); int __init arch_populate_kprobe_blacklist(void) { + int ret; + + ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); + if (ret) + return ret; + return kprobe_add_area_blacklist((unsigned long)__entry_text_start, (unsigned long)__entry_text_end); } diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 6adf6e6c2933..f14262952015 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -97,6 +97,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) } asm ( + ".pushsection .rodata\n" "optprobe_template_func:\n" ".global optprobe_template_entry\n" "optprobe_template_entry:\n" @@ -136,8 +137,7 @@ asm ( #endif ".global optprobe_template_end\n" "optprobe_template_end:\n" - ".type optprobe_template_func, @function\n" - ".size optprobe_template_func, .-optprobe_template_func\n"); + ".popsection\n"); void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4c8acdfdc5a7..ceba408ea982 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -352,6 +352,8 @@ void machine_kexec(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { + u64 sme_mask = sme_me_mask; + VMCOREINFO_NUMBER(phys_base); VMCOREINFO_SYMBOL(init_top_pgt); vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", @@ -364,6 +366,7 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_NUMBER(sme_mask); } /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90ae0ca51083..58ac7be52c7a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -255,6 +255,18 @@ void arch_setup_new_exec(void) /* If cpuid was previously disabled for this task, re-enable it. */ if (test_thread_flag(TIF_NOCPUID)) enable_cpuid(); + + /* + * Don't inherit TIF_SSBD across exec boundary when + * PR_SPEC_DISABLE_NOEXEC is used. + */ + if (test_thread_flag(TIF_SSBD) && + task_spec_ssb_noexec(current)) { + clear_thread_flag(TIF_SSBD); + task_clear_spec_ssb_disable(current); + task_clear_spec_ssb_noexec(current); + speculation_ctrl_update(task_thread_info(current)->flags); + } } static inline void switch_to_bitmap(struct thread_struct *prev, diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e8796fcd7e5a..4bf46575568a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, void *ptr; if (!node_online(node) || !NODE_DATA(node)) { - ptr = memblock_alloc_from_nopanic(size, align, goal); + ptr = memblock_alloc_from(size, align, goal); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", cpu, size, __pa(ptr)); } else { - ptr = memblock_alloc_try_nid_nopanic(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, - node); + ptr = memblock_alloc_try_nid(size, align, goal, + MEMBLOCK_ALLOC_ACCESSIBLE, + node); pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", cpu, size, node, __pa(ptr)); } return ptr; #else - return memblock_alloc_from_nopanic(size, align, goal); + return memblock_alloc_from(size, align, goal); #endif } @@ -171,7 +171,7 @@ void __init setup_per_cpu_areas(void) unsigned long delta; int rc; - pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n", + pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%u\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ccd1f2a8e557..ce1a67b70168 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,6 +56,7 @@ #include <linux/stackprotector.h> #include <linux/gfp.h> #include <linux/cpuidle.h> +#include <linux/numa.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -149,7 +150,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ static void smp_callin(void) { - int cpuid, phys_id; + int cpuid; /* * If waken up by an INIT in an 82489DX configuration @@ -160,11 +161,6 @@ static void smp_callin(void) cpuid = smp_processor_id(); /* - * (This works even if the APIC is not enabled.) - */ - phys_id = read_apic_id(); - - /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this * CPU, first the APIC. (this is probably redundant on most @@ -841,7 +837,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* reduce the number of lines printed when booting a large cpu count system */ static void announce_cpu(int cpu, int apicid) { - static int current_node = -1; + static int current_node = NUMA_NO_NODE; int node = early_cpu_to_node(cpu); static int width, node_width; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9b7c4ca8f0a7..d26f9e9c3d83 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -111,6 +111,7 @@ void ist_enter(struct pt_regs *regs) /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); } +NOKPROBE_SYMBOL(ist_enter); void ist_exit(struct pt_regs *regs) { @@ -880,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { - unsigned long cr0; + unsigned long cr0 = read_cr0(); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_MATH_EMULATION - if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { + if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) { struct math_emu_info info = { }; cond_local_irq_enable(regs); @@ -897,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif /* This should not happen. */ - cr0 = read_cr0(); if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) { /* Try to fix it up and carry on. */ write_cr0(cr0 & ~X86_CR0_TS); diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 3dc26f95d46e..9b9fd4826e7a 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -320,10 +320,14 @@ bool unwind_next_frame(struct unwind_state *state) } /* Get the next frame pointer: */ - if (state->regs) + if (state->next_bp) { + next_bp = state->next_bp; + state->next_bp = NULL; + } else if (state->regs) { next_bp = (unsigned long *)state->regs->bp; - else + } else { next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); + } /* Move to the next frame if it's safe: */ if (!update_stack_state(state, next_bp)) @@ -398,6 +402,21 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, bp = get_frame_pointer(task, regs); + /* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer. + * That means that SP points into the middle of an incomplete frame: + * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we + * would have written a frame pointer if we hadn't crashed. + * Pretend that the frame is complete and that BP points to it, but save + * the real BP so that we can use it when looking for the next frame. + */ + if (regs && regs->ip == 0 && + (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + state->next_bp = bp; + bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + } + /* Initialize stack info and make sure the frame data is accessible: */ get_stack_info(bp, state->task, &state->stack_info, &state->stack_mask); @@ -410,7 +429,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->bp < first_frame)) + (state->next_bp == NULL && state->bp < first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 26038eacf74a..89be1be1790c 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -113,6 +113,20 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip) } #endif +/* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer, + * and we don't have unwind information for NULL. + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function + * pointer into its parent and then continue normally from there. + */ +static struct orc_entry null_orc_entry = { + .sp_offset = sizeof(long), + .sp_reg = ORC_REG_SP, + .bp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -120,6 +134,9 @@ static struct orc_entry *orc_find(unsigned long ip) if (!orc_init) return NULL; + if (ip == 0) + return &null_orc_entry; + /* For non-init vmlinux addresses, use the fast lookup table: */ if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { unsigned int idx, start, stop; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 843feb94a950..ccf03416e434 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -745,6 +745,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * OPCODE1() of the "short" jmp which checks the same condition. */ opc1 = OPCODE2(insn) - 0x10; + /* fall through */ default: if (!is_cond_jmp_opcode(opc1)) return -ENOSYS; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0d618ee634ac..bad8c51fee6e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -31,7 +31,7 @@ #undef i386 /* in case the preprocessor is a 32bit one */ -OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) @@ -401,7 +401,7 @@ SECTIONS * Per-cpu symbols which need to be offset from __per_cpu_load * for the boot processor. */ -#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load +#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); |