diff options
Diffstat (limited to 'arch/x86')
30 files changed, 210 insertions, 142 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..5ad92419be19 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2217,14 +2217,8 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING If unsure, leave at the default value. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y depends on SMP - ---help--- - Say Y here to allow turning CPUs off and on. CPUs can be - controlled through /sys/devices/system/cpu. - ( Note: power management support will enable this option - automatically on SMP systems. ) - Say N if you want to disable CPU hotplug. config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d8b9d8ca4f8..a587805c6687 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -219,8 +219,12 @@ ifdef CONFIG_RETPOLINE # Additionally, avoid generating expensive indirect jumps which # are subject to retpolines for small number of switch cases. # clang turns off jump table generation by default when under - # retpoline builds, however, gcc does not for x86. - KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) + # retpoline builds, however, gcc does not for x86. This has + # only been fixed starting from gcc stable version 8.4.0 and + # onwards, but not for older ones. See gcc bug #86952. + ifndef CONFIG_CC_IS_CLANG + KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables) + endif endif archscripts: scripts_basic diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index fd13655e0f9b..d2f184165934 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -120,8 +120,6 @@ static inline void console_init(void) void set_sev_encryption_mask(void); -#endif - /* acpi.c */ #ifdef CONFIG_ACPI acpi_physical_address get_rsdp_addr(void); @@ -135,3 +133,5 @@ int count_immovable_mem_regions(void); #else static inline int count_immovable_mem_regions(void) { return 0; } #endif + +#endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 315a67b8896b..90154df8f125 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -13,8 +13,9 @@ */ #include <linux/types.h> -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/errno.h> +#include <linux/limits.h> #include <asm/asm.h> #include "ctype.h" #include "string.h" diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6461a16b4559..e4ba467a9fc6 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -103,9 +103,13 @@ static int hv_cpu_init(unsigned int cpu) u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; void **input_arg; + struct page *pg; input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *input_arg = page_address(alloc_page(GFP_KERNEL)); + pg = alloc_page(GFP_KERNEL); + if (unlikely(!pg)) + return -ENOMEM; + *input_arg = page_address(pg); hv_get_vp_index(msr_vp_index); diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 3417110574c1..31c379c1da41 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CPU_DEVICE_ID -#define _CPU_DEVICE_ID 1 +#ifndef _ASM_X86_CPU_DEVICE_ID +#define _ASM_X86_CPU_DEVICE_ID /* * Declare drivers belonging to specific x86 CPUs @@ -9,8 +9,6 @@ #include <linux/mod_devicetable.h> -extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); - /* * Match specific microcode revisions. * @@ -22,21 +20,22 @@ extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); */ struct x86_cpu_desc { - __u8 x86_family; - __u8 x86_vendor; - __u8 x86_model; - __u8 x86_stepping; - __u32 x86_microcode_rev; + u8 x86_family; + u8 x86_vendor; + u8 x86_model; + u8 x86_stepping; + u32 x86_microcode_rev; }; -#define INTEL_CPU_DESC(mod, step, rev) { \ - .x86_family = 6, \ - .x86_vendor = X86_VENDOR_INTEL, \ - .x86_model = mod, \ - .x86_stepping = step, \ - .x86_microcode_rev = rev, \ +#define INTEL_CPU_DESC(model, stepping, revision) { \ + .x86_family = 6, \ + .x86_vendor = X86_VENDOR_INTEL, \ + .x86_model = (model), \ + .x86_stepping = (stepping), \ + .x86_microcode_rev = (revision), \ } +extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table); -#endif +#endif /* _ASM_X86_CPU_DEVICE_ID */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce95b8cbd229..0e56ff7e4848 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -112,8 +112,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ - (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, \ + (unsigned long __percpu *)&cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a5db4475e72d..159b5988292f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -253,14 +253,14 @@ struct kvm_mmu_memory_cache { * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used * by indirect shadow page can not be more than 15 bits. * - * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, + * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access, * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. */ union kvm_mmu_page_role { u32 word; struct { unsigned level:4; - unsigned cr4_pae:1; + unsigned gpte_is_8_bytes:1; unsigned quadrant:2; unsigned direct:1; unsigned access:3; @@ -350,6 +350,7 @@ struct kvm_mmu_page { }; struct kvm_pio_request { + unsigned long linear_rip; unsigned long count; int in; int port; @@ -568,6 +569,7 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; u64 ia32_xss; u64 microcode_version; + u64 arch_capabilities; /* * Paging state of the vcpu @@ -1192,6 +1194,8 @@ struct kvm_x86_ops { int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); + + bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1252,7 +1256,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h index aaedd73ea2c6..df700a6cc869 100644 --- a/arch/x86/include/asm/processor-cyrix.h +++ b/arch/x86/include/asm/processor-cyrix.h @@ -3,19 +3,6 @@ * NSC/Cyrix CPU indexed register access. Must be inlined instead of * macros to ensure correct access ordering * Access order is always 0x22 (=offset), 0x23 (=value) - * - * When using the old macros a line like - * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); - * gets expanded to: - * do { - * outb((CX86_CCR2), 0x22); - * outb((({ - * outb((CX86_CCR2), 0x22); - * inb(0x23); - * }) | 0x88), 0x23); - * } while (0); - * - * which in fact violates the access order (= 0x22, 0x22, 0x23, 0x23). */ static inline u8 getCx86(u8 reg) @@ -29,11 +16,3 @@ static inline void setCx86(u8 reg, u8 data) outb(reg, 0x22); outb(data, 0x23); } - -#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86_old(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 63b3393bd98e..c53682303c9c 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -77,7 +77,11 @@ static inline size_t real_mode_size_needed(void) return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); } -void set_real_mode_mem(phys_addr_t mem, size_t size); +static inline void set_real_mode_mem(phys_addr_t mem) +{ + real_mode_header = (struct real_mode_header *) __va(mem); +} + void reserve_real_mode(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 58176b56354e..294ed4392a0e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "AGP: " fmt #include <linux/kernel.h> +#include <linux/kcore.h> #include <linux/types.h> #include <linux/init.h> #include <linux/memblock.h> @@ -57,7 +58,7 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -#ifdef CONFIG_PROC_VMCORE +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE) /* * If the first kernel maps the aperture over e820 RAM, the kdump kernel will * use the same range because it will remain configured in the northbridge. @@ -66,20 +67,25 @@ int fix_aperture __initdata = 1; */ static unsigned long aperture_pfn_start, aperture_page_count; -static int gart_oldmem_pfn_is_ram(unsigned long pfn) +static int gart_mem_pfn_is_ram(unsigned long pfn) { return likely((pfn < aperture_pfn_start) || (pfn >= aperture_pfn_start + aperture_page_count)); } -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void __init exclude_from_core(u64 aper_base, u32 aper_order) { aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; - WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram)); +#ifdef CONFIG_PROC_VMCORE + WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif +#ifdef CONFIG_PROC_KCORE + WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif } #else -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void exclude_from_core(u64 aper_base, u32 aper_order) { } #endif @@ -474,7 +480,7 @@ out: * may have allocated the range over its e820 RAM * and fixed up the northbridge */ - exclude_from_vmcore(last_aper_base, last_aper_order); + exclude_from_core(last_aper_base, last_aper_order); return 1; } @@ -520,7 +526,7 @@ out: * overlap with the first kernel's memory. We can't access the * range through vmcore even though it should be part of the dump. */ - exclude_from_vmcore(aper_alloc, aper_order); + exclude_from_core(aper_alloc, aper_order); /* Fix up the north bridges */ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d12226f60168..1d9b8aaea06c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -124,7 +124,7 @@ static void set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -135,11 +135,11 @@ static void set_cx86_memwb(void) pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -153,14 +153,14 @@ static void geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { /* A 6x86MX - it has the bug. */ set_cpu_bug(c, X86_BUG_COMA); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 97f9ada9ceda..5260185cbf7b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -608,6 +608,8 @@ static int microcode_reload_late(void) if (ret > 0) microcode_check(); + pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode); + return ret; } diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f33f11f69078..1573a0a6b525 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -501,11 +501,8 @@ out_unlock: void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms) { unsigned long delay = msecs_to_jiffies(delay_ms); - struct rdt_resource *r; int cpu; - r = &rdt_resources_all[RDT_RESOURCE_L3]; - cpu = cpumask_any(&dom->cpu_mask); dom->cqm_work_cpu = cpu; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dfd3aca82c61..fb32925a2e62 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -905,6 +905,8 @@ int __init hpet_enable(void) return 0; hpet_set_mapping(); + if (!hpet_virt_address) + return 0; /* * Read the period and check for a sane value: diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff9bfd40429e..d73083021002 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -354,6 +354,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, #endif default: WARN_ON_ONCE(1); + return -EINVAL; } /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3482460d984d..1bfe5c6e6cfe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -598,8 +598,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf_base = base; mpf_found = true; - pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", - base, base + sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010lx-%#010lx]\n", + base, base + sizeof(*mpf) - 1); memblock_reserve(base, sizeof(*mpf)); if (mpf->physptr) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 27c43525a05f..421899f6ad7b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -526,7 +526,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, new_config.enable = 0; stimer->config.as_uint64 = new_config.as_uint64; - stimer_mark_pending(stimer, false); + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } @@ -542,7 +544,10 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, stimer->config.enable = 0; else if (stimer->config.auto_enable) stimer->config.enable = 1; - stimer_mark_pending(stimer, false); + + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7837ab001d80..eee455a8a612 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -182,7 +182,7 @@ struct kvm_shadow_walk_iterator { static const union kvm_mmu_page_role mmu_base_role_mask = { .cr0_wp = 1, - .cr4_pae = 1, + .gpte_is_8_bytes = 1, .nxe = 1, .smep_andnot_wp = 1, .smap_andnot_wp = 1, @@ -2205,6 +2205,7 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); + #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ @@ -2215,12 +2216,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, for_each_valid_sp(_kvm, _sp, _gfn) \ if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else +static inline bool is_ept_sp(struct kvm_mmu_page *sp) +{ + return sp->role.cr0_wp && sp->role.smap_andnot_wp; +} + /* @sp->gfn should be write-protected at the call site */ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { - if (sp->role.cr4_pae != !!is_pae(vcpu) - || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { + if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) || + vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return false; } @@ -2423,7 +2429,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.level = level; role.direct = direct; if (role.direct) - role.cr4_pae = 0; + role.gpte_is_8_bytes = true; role.access = access; if (!vcpu->arch.mmu->direct_map && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { @@ -4794,7 +4800,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, role.base.access = ACC_ALL; role.base.nxe = !!is_nx(vcpu); - role.base.cr4_pae = !!is_pae(vcpu); role.base.cr0_wp = is_write_protection(vcpu); role.base.smm = is_smm(vcpu); role.base.guest_mode = is_guest_mode(vcpu); @@ -4815,6 +4820,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) role.base.ad_disabled = (shadow_accessed_mask == 0); role.base.level = kvm_x86_ops->get_tdp_level(vcpu); role.base.direct = true; + role.base.gpte_is_8_bytes = true; return role; } @@ -4879,6 +4885,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) role.base.smap_andnot_wp = role.ext.cr4_smap && !is_write_protection(vcpu); role.base.direct = !is_paging(vcpu); + role.base.gpte_is_8_bytes = !!is_pae(vcpu); if (!is_long_mode(vcpu)) role.base.level = PT32E_ROOT_LEVEL; @@ -4918,18 +4925,26 @@ static union kvm_mmu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly) { - union kvm_mmu_role role; + union kvm_mmu_role role = {0}; - /* Base role is inherited from root_mmu */ - role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; - role.ext = kvm_calc_mmu_role_ext(vcpu); + /* SMM flag is inherited from root_mmu */ + role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; role.base.level = PT64_ROOT_4LEVEL; + role.base.gpte_is_8_bytes = true; role.base.direct = false; role.base.ad_disabled = !accessed_dirty; role.base.guest_mode = true; role.base.access = ACC_ALL; + /* + * WP=1 and NOT_WP=1 is an impossible combination, use WP and the + * SMAP variation to denote shadow EPT entries. + */ + role.base.cr0_wp = true; + role.base.smap_andnot_wp = true; + + role.ext = kvm_calc_mmu_role_ext(vcpu); role.ext.execonly = execonly; return role; @@ -5179,7 +5194,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, gpa, bytes, sp->role.word); offset = offset_in_page(gpa); - pte_size = sp->role.cr4_pae ? 8 : 4; + pte_size = sp->role.gpte_is_8_bytes ? 8 : 4; /* * Sometimes, the OS only writes the last one bytes to update status @@ -5203,7 +5218,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) page_offset = offset_in_page(gpa); level = sp->role.level; *nspte = 1; - if (!sp->role.cr4_pae) { + if (!sp->role.gpte_is_8_bytes) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map @@ -5393,10 +5408,12 @@ emulate: * This can happen if a guest gets a page-fault on data access but the HW * table walker is not able to read the instruction page (e.g instruction * page is not present in memory). In those cases we simply restart the - * guest. + * guest, with the exception of AMD Erratum 1096 which is unrecoverable. */ - if (unlikely(insn && !insn_len)) - return 1; + if (unlikely(insn && !insn_len)) { + if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu)) + return 1; + } er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); @@ -5509,7 +5526,9 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_with_address(kvm, + start_gfn, + iterator.gfn - start_gfn + 1); flush = false; } cond_resched_lock(&kvm->mmu_lock); @@ -5517,7 +5536,8 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, } if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_with_address(kvm, start_gfn, + end_gfn - start_gfn + 1); flush = false; } @@ -6011,7 +6031,7 @@ out: /* * Calculate mmu pages needed for kvm. */ -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) +unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) { unsigned int nr_mmu_pages; unsigned int nr_pages = 0; diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9f6c855a0043..dd30dccd2ad5 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -29,10 +29,10 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ + trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ __entry->gfn, role.level, \ - role.cr4_pae ? " pae" : "", \ + role.gpte_is_8_bytes ? 8 : 4, \ role.quadrant, \ role.direct ? " direct" : "", \ access_str[role.access], \ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5b128a0a051..426039285fd1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7098,6 +7098,36 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, return -ENODEV; } +static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + bool is_user, smap; + + is_user = svm_get_cpl(vcpu) == 3; + smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + + /* + * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * + * In non SEV guest, hypervisor will be able to read the guest + * memory to decode the instruction pointer when insn_len is zero + * so we return true to indicate that decoding is possible. + * + * But in the SEV guest, the guest memory is encrypted with the + * guest specific key and hypervisor will not be able to decode the + * instruction pointer so we will not able to workaround it. Lets + * print the error and request to kill the guest. + */ + if (is_user && smap) { + if (!sev_guest(vcpu->kvm)) + return true; + + pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + + return false; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7231,6 +7261,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .nested_enable_evmcs = nested_enable_evmcs, .nested_get_evmcs_version = nested_get_evmcs_version, + + .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f24a2c225070..153e539c29c9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2585,6 +2585,11 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || !nested_cr3_valid(vcpu, vmcs12->host_cr3)) return -EINVAL; + + if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) || + is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) + return -EINVAL; + /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the * IA32_EFER MSR must be 0 in the field for that register. In addition, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c73375e01ab8..ab432a930ae8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1683,12 +1683,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->spec_ctrl; break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) - return 1; - msr_info->data = to_vmx(vcpu)->arch_capabilities; - break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -1895,11 +1889,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; - vmx->arch_capabilities = data; - break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -4088,8 +4077,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vmx->arch_capabilities = kvm_get_arch_capabilities(); - vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); /* 22.2.1, 20.8.1 */ @@ -7409,6 +7396,11 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + return 0; +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -7711,6 +7703,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_nested_state = NULL, .get_vmcs12_pages = NULL, .nested_enable_evmcs = NULL, + .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, }; static void vmx_cleanup_l1d_flush(void) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1554cb45b393..a1e00d0a2482 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -190,7 +190,6 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif - u64 arch_capabilities; u64 spec_ctrl; u32 vm_entry_controls_shadow; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65e4559eef2f..099b851dabaf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1125,7 +1125,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, @@ -1158,6 +1158,7 @@ static u32 emulated_msrs[] = { MSR_IA32_TSC_ADJUST, MSR_IA32_TSCDEADLINE, + MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, @@ -2443,6 +2444,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr_info->host_initiated) vcpu->arch.microcode_version = data; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vcpu->arch.arch_capabilities = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2747,6 +2753,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = vcpu->arch.microcode_version; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = vcpu->arch.arch_capabilities; + break; case MSR_IA32_TSC: msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; break; @@ -6523,14 +6535,27 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +static int complete_fast_pio_out(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pio.count = 0; + + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) + return 1; + + return kvm_skip_emulated_instruction(vcpu); +} + static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, size, port, &val, 1); - /* do not return to emulator after return from userspace */ - vcpu->arch.pio.count = 0; + + if (!ret) { + vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); + vcpu->arch.complete_userspace_io = complete_fast_pio_out; + } return ret; } @@ -6541,6 +6566,11 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) /* We should only ever be called with arch.pio.count equal to 1 */ BUG_ON(vcpu->arch.pio.count != 1); + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) { + vcpu->arch.pio.count = 0; + return 1; + } + /* For size less than 4 we merge, else we zero extend */ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0; @@ -6553,7 +6583,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) vcpu->arch.pio.port, &val, 1); kvm_register_write(vcpu, VCPU_REGS_RAX, val); - return 1; + return kvm_skip_emulated_instruction(vcpu); } static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, @@ -6572,6 +6602,7 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, return ret; } + vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = complete_fast_pio_in; return 0; @@ -6579,16 +6610,13 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) { - int ret = kvm_skip_emulated_instruction(vcpu); + int ret; - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ if (in) - return kvm_fast_pio_in(vcpu, size, port) && ret; + ret = kvm_fast_pio_in(vcpu, size, port); else - return kvm_fast_pio_out(vcpu, size, port) && ret; + ret = kvm_fast_pio_out(vcpu, size, port); + return ret && kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_fast_pio); @@ -8733,6 +8761,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); @@ -9429,13 +9458,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int nr_mmu_pages = 0; - if (!kvm->arch.n_requested_mmu_pages) - nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); - - if (nr_mmu_pages) - kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + kvm_mmu_change_mmu_pages(kvm, + kvm_mmu_calculate_default_mmu_pages(kvm)); /* * Dirty logging tracks sptes in 4k granularity, meaning that large diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 9baca3e054be..e7925d668b68 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -94,7 +94,7 @@ static unsigned do_csum(const unsigned char *buff, unsigned len) : "m" (*(unsigned long *)buff), "r" (zero), "0" (result)); --count; - buff += 8; + buff += 8; } result = add32_with_carry(result>>32, result&0xffffffff); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index db3165714521..dc726e07d8ba 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -230,7 +230,7 @@ bool mmap_address_hint_valid(unsigned long addr, unsigned long len) /* Can we access it for direct reading/writing? Must be RAM: */ int valid_phys_addr_range(phys_addr_t addr, size_t count) { - return addr + count <= __pa(high_memory); + return addr + count - 1 <= __pa(high_memory - 1); } /* Can we access it through mmap? Must be a valid physical address: */ diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 4fee5c3003ed..139b28a01ce4 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -77,7 +77,7 @@ static void __init pti_print_if_secure(const char *reason) pr_info("%s\n", reason); } -enum pti_mode { +static enum pti_mode { PTI_AUTO = 0, PTI_FORCE_OFF, PTI_FORCE_ON @@ -602,7 +602,7 @@ static void pti_clone_kernel_text(void) set_memory_global(start, (end_global - start) >> PAGE_SHIFT); } -void pti_set_kernel_image_nonglobal(void) +static void pti_set_kernel_image_nonglobal(void) { /* * The identity map is created with PMDs, regardless of the diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 458a0e2bcc57..a25a9fd987a9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -449,7 +449,7 @@ void __init efi_free_boot_services(void) */ rm_size = real_mode_size_needed(); if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { - set_real_mode_mem(start, rm_size); + set_real_mode_mem(start); start += rm_size; size -= rm_size; } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index d10105825d57..7dce39c8c034 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -15,15 +15,6 @@ u32 *trampoline_cr4_features; /* Hold the pgd entry used on booting additional CPUs */ pgd_t trampoline_pgd_entry; -void __init set_real_mode_mem(phys_addr_t mem, size_t size) -{ - void *base = __va(mem); - - real_mode_header = (struct real_mode_header *) base; - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - base, (unsigned long long)mem, size); -} - void __init reserve_real_mode(void) { phys_addr_t mem; @@ -42,7 +33,7 @@ void __init reserve_real_mode(void) } memblock_reserve(mem, size); - set_real_mode_mem(mem, size); + set_real_mode_mem(mem); } static void __init setup_real_mode(void) |