diff options
Diffstat (limited to 'arch/x86')
31 files changed, 155 insertions, 118 deletions
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 11b13c4b43d5..f19856d95c60 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t) *t = result; return result; } -int time(time_t *t) +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c0b0e9e8aa66..800104c8a3ed 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -266,6 +266,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/include/asm/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 034caa1a084e..b24b1c8b3979 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -214,8 +214,6 @@ struct x86_emulate_ops { void (*halt)(struct x86_emulate_ctxt *ctxt); void (*wbinvd)(struct x86_emulate_ctxt *ctxt); int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); - void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ - void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 977de5fb968b..516798431328 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -536,7 +536,20 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * QEMU userspace and the guest each have their own FPU state. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpu" state here contains the guest FPU context, with the + * host PRKU bits. + */ + struct fpu user_fpu; struct fpu guest_fpu; + u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; @@ -1435,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b20f9d623f9c..8f09012b92e7 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -236,11 +236,23 @@ */ #define EARLY_IDT_HANDLER_SIZE 9 +/* + * xen_early_idt_handler_array is for Xen pv guests: for each entry in + * early_idt_handler_array it contains a prequel in the form of + * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to + * max 8 bytes. + */ +#define XEN_EARLY_IDT_HANDLER_SIZE 8 + #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; +#endif + /* * Load a segment. Fall back on loading the zero segment if something goes * wrong. This variant assumes that loading zero fully clears the segment. diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 509046cfa5ce..877b5c1a1b12 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -173,40 +173,43 @@ static inline void cr4_init_shadow(void) this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } +static inline void __cr4_set(unsigned long cr4) +{ + lockdep_assert_irqs_disabled(); + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) { - cr4 |= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 | mask) != cr4) + __cr4_set(cr4 | mask); + local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) { - cr4 &= ~mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); + local_irq_restore(flags); } -static inline void cr4_toggle_bits(unsigned long mask) +static inline void cr4_toggle_bits_irqsoff(unsigned long mask) { unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - cr4 ^= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); + __cr4_set(cr4 ^ mask); } /* Read the CR4 shadow. */ diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index da1489cb64dc..1e901e421f2d 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generated-y += unistd_32.h generated-y += unistd_64.h generated-y += unistd_x32.h diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6a823a25eaff..750449152b04 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -542,8 +542,8 @@ error: } #ifdef CONFIG_GENERIC_IRQ_DEBUGFS -void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, - struct irq_data *irqd, int ind) +static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) { unsigned int cpu, vector, prev_cpu, prev_vector; struct apic_chip_data *apicd; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d58184b7cd44..bcb75dc97d44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x17: init_amd_zn(c); break; } - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) + /* + * Enable workaround for FXSAVE leak on CPUs + * without a XSaveErPtr feature + */ + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); cpu_detect_cache_sizes(c); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c6daec4bdba5..330b8462d426 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, #define F14H_MPB_MAX_SIZE 1824 #define F15H_MPB_MAX_SIZE 4096 #define F16H_MPB_MAX_SIZE 3458 +#define F17H_MPB_MAX_SIZE 3200 switch (family) { case 0x14: @@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, case 0x16: max_size = F16H_MPB_MAX_SIZE; break; + case 0x17: + max_size = F17H_MPB_MAX_SIZE; + break; default: max_size = F1XH_MPB_MAX_SIZE; break; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 97fb3e5737f5..bb988a24db92 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -299,7 +299,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, } if ((tifp ^ tifn) & _TIF_NOTSC) - cr4_toggle_bits(X86_CR4_TSD); + cr4_toggle_bits_irqsoff(X86_CR4_TSD); if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3d01df7d7cf6..05a97d5fe298 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -237,7 +237,7 @@ static void notrace start_secondary(void *unused) load_cr3(swapper_pg_dir); __flush_tlb_all(); #endif - + load_current_idt(); cpu_init(); x86_cpuinit.early_percpu_clock_init(); preempt_disable(); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e7d04d0c8008..abe74f779f9d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op) static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fninit"); - ctxt->ops->put_fpu(ctxt); return X86EMUL_CONTINUE; } @@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fnstcw %0": "+m"(fcw)); - ctxt->ops->put_fpu(ctxt); ctxt->dst.val = fcw; @@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fnstsw %0": "+m"(fsw)); - ctxt->ops->put_fpu(ctxt); ctxt->dst.val = fsw; @@ -4001,12 +3987,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->get_fpu(ctxt); - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); - ctxt->ops->put_fpu(ctxt); - if (rc != X86EMUL_CONTINUE) return rc; @@ -4049,8 +4031,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->get_fpu(ctxt); - if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4066,8 +4046,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: - ctxt->ops->put_fpu(ctxt); - return rc; } @@ -5317,9 +5295,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; - ctxt->ops->get_fpu(ctxt); rc = asm_safe("fwait"); - ctxt->ops->put_fpu(ctxt); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4704aaf6d19e..8eba631c4dbd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6751,16 +6751,10 @@ static __init int hardware_setup(void) goto out; } - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eee8e7faf1af..faf843c9b916 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); } @@ -5252,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) emul_to_vcpu(ctxt)->arch.halt_request = 1; } -static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) -{ - preempt_disable(); - kvm_load_guest_fpu(emul_to_vcpu(ctxt)); -} - -static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) -{ - preempt_enable(); -} - static int emulator_intercept(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -5340,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = { .halt = emulator_halt, .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, - .get_fpu = emulator_get_fpu, - .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, .set_nmi_mask = emulator_set_nmi_mask, @@ -6778,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + unsigned long apic_address; + + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (start <= apic_address && apic_address < end) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; @@ -6952,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -7297,12 +7296,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.complete_userspace_io)) { int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; vcpu->arch.complete_userspace_io = NULL; r = cui(vcpu); if (r <= 0) - goto out; + goto out_fpu; } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); @@ -7311,6 +7312,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) else r = vcpu_run(vcpu); +out_fpu: + kvm_put_guest_fpu(vcpu); out: post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7704,32 +7707,25 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +/* Swap (qemu) user FPU context for the guest FPU context. */ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - if (vcpu->guest_fpu_loaded) - return; - - /* - * Restore all possible states in the guest, - * and assume host would use all available bits. - * Guest xcr0 would be loaded later. - */ - vcpu->guest_fpu_loaded = 1; - __kernel_fpu_begin(); + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ~XFEATURE_MASK_PKRU); + preempt_enable(); trace_kvm_fpu(1); } +/* When vcpu_run ends, restore user space FPU context. */ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) - return; - - vcpu->guest_fpu_loaded = 0; + preempt_disable(); copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - __kernel_fpu_end(); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -7846,7 +7842,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) * To avoid have the INIT path from kvm_apic_has_events() that be * called with loaded FPU and does not let userspace fix the state. */ - kvm_put_guest_fpu(vcpu); + if (init_event) + kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, XFEATURE_MASK_BNDREGS); if (mpx_state_buffer) @@ -7855,6 +7852,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) XFEATURE_MASK_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); + if (init_event) + kvm_load_guest_fpu(vcpu); } if (!init_event) { diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 30bc4812ceb8..9fe656c42aa5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,6 +1,7 @@ #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/sched/debug.h> +#include <xen/xen.h> #include <asm/fpu/internal.h> #include <asm/traps.h> @@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) * Old CPUs leave the high bits of CS on the stack * undefined. I'm not sure which CPUs do this, but at least * the 486 DX works this way. + * Xen pv domains are not using the default __KERNEL_CS. */ - if (regs->cs != __KERNEL_CS) + if (!xen_pv_domain() && regs->cs != __KERNEL_CS) goto fail; /* diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/error.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/error.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/pte.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/pte.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index bb461cfd01ab..526536c81ddc 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void) * We should get host bridge information from ACPI unless the BIOS * doesn't support it. */ - if (acpi_os_get_root_pointer()) + if (!acpi_disabled && acpi_os_get_root_pointer()) return 0; #endif diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c34bd8233f7c..5f64f30873e2 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) /* * UV NMI handler */ -int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) +static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) { struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; int cpu = smp_processor_id(); @@ -1013,7 +1013,7 @@ void uv_nmi_init(void) } /* Setup HUB NMI info */ -void __init uv_nmi_setup_common(bool hubbed) +static void __init uv_nmi_setup_common(bool hubbed) { int size = sizeof(void *) * (1 << NODES_SHIFT); int cpu; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 84fcfde53f8f..5191de14f4df 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -226,8 +226,20 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) load_idt((const struct desc_ptr *)&ctxt->idt_limit); #endif +#ifdef CONFIG_X86_64 /* - * segment registers + * We need GSBASE restored before percpu access can work. + * percpu access can happen in exception handlers or in complicated + * helpers like load_gs_index(). + */ + wrmsrl(MSR_GS_BASE, ctxt->gs_base); +#endif + + fix_processor_context(); + + /* + * Restore segment registers. This happens after restoring the GDT + * and LDT, which happen in fix_processor_context(). */ #ifdef CONFIG_X86_32 loadsegment(es, ctxt->es); @@ -248,13 +260,14 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) load_gs_index(ctxt->gs); asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); + /* + * Restore FSBASE and user GSBASE after reloading the respective + * segment selectors. + */ wrmsrl(MSR_FS_BASE, ctxt->fs_base); - wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); #endif - fix_processor_context(); - do_fpu_end(); tsc_verify_tsc_adjust(true); x86_platform.restore_sched_clock_state(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b2b3f3f6531..f2414c6c5e7c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -622,7 +622,7 @@ static struct trap_array_entry trap_array[] = { { simd_coprocessor_error, xen_simd_coprocessor_error, false }, }; -static bool get_trap_addr(void **addr, unsigned int ist) +static bool __ref get_trap_addr(void **addr, unsigned int ist) { unsigned int nr; bool ist_okay = false; @@ -644,6 +644,14 @@ static bool get_trap_addr(void **addr, unsigned int ist) } } + if (nr == ARRAY_SIZE(trap_array) && + *addr >= (void *)early_idt_handler_array[0] && + *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) { + nr = (*addr - (void *)early_idt_handler_array[0]) / + EARLY_IDT_HANDLER_SIZE; + *addr = (void *)xen_early_idt_handler_array[nr]; + } + if (WARN_ON(ist != 0 && !ist_okay)) return false; @@ -1262,6 +1270,21 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_gdt(0); xen_init_irq_ops(); + + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because idt_setup_early_handler needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); + + idt_setup_early_handler(); + xen_init_capabilities(); #ifdef CONFIG_X86_LOCAL_APIC @@ -1295,18 +1318,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - - /* - * Setup xen_vcpu early because start_kernel needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); local_irq_disable(); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 8a10c9a9e2b5..417b339e5c8e 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,7 @@ #include <xen/interface/xen.h> +#include <linux/init.h> #include <linux/linkage.h> .macro xen_pv_trap name @@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat #endif xen_pv_trap hypervisor_callback + __INIT +ENTRY(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +END(xen_early_idt_handler_array) + __FINIT + hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* * Xen64 iret frame: |