summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/boot/dts/acadia.dts2
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig232
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h22
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h15
-rw-r--r--arch/powerpc/include/asm/cputable.h11
-rw-r--r--arch/powerpc/include/asm/eeh.h10
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h4
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h12
-rw-r--r--arch/powerpc/include/asm/exception-64s.h5
-rw-r--r--arch/powerpc/include/asm/hugetlb.h6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h1
-rw-r--r--arch/powerpc/include/asm/kprobes.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h4
-rw-r--r--arch/powerpc/include/asm/mce.h4
-rw-r--r--arch/powerpc/include/asm/mmu_context.h50
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/opal-api.h3
-rw-r--r--arch/powerpc/include/asm/opal.h6
-rw-r--r--arch/powerpc/include/asm/paca.h11
-rw-r--r--arch/powerpc/include/asm/page_64.h6
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h4
-rw-r--r--arch/powerpc/include/asm/powernv.h4
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h27
-rw-r--r--arch/powerpc/include/asm/tlbflush.h2
-rw-r--r--arch/powerpc/include/asm/tm.h7
-rw-r--r--arch/powerpc/include/asm/topology.h8
-rw-r--r--arch/powerpc/include/asm/uaccess.h17
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cputable.c20
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/kernel/eeh.c46
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c8
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S45
-rw-r--r--arch/powerpc/kernel/idle_book3s.S66
-rw-r--r--arch/powerpc/kernel/irq.c51
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c30
-rw-r--r--arch/powerpc/kernel/kprobes.c45
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c4
-rw-r--r--arch/powerpc/kernel/mce.c147
-rw-r--r--arch/powerpc/kernel/mce_power.c115
-rw-r--r--arch/powerpc/kernel/module_64.c3
-rw-r--r--arch/powerpc/kernel/paca.c12
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/process.c68
-rw-r--r--arch/powerpc/kernel/prom.c37
-rw-r--r--arch/powerpc/kernel/setup-common.c4
-rw-r--r--arch/powerpc/kernel/signal_32.c6
-rw-r--r--arch/powerpc/kernel/signal_64.c7
-rw-r--r--arch/powerpc/kernel/tau_6xx.c3
-rw-r--r--arch/powerpc/kernel/tm.S59
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S4
-rw-r--r--arch/powerpc/kernel/traps.c256
-rw-r--r--arch/powerpc/kernel/watchdog.c29
-rw-r--r--arch/powerpc/kvm/book3s_hv.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S8
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/lib/sstep.c19
-rw-r--r--arch/powerpc/mm/Makefile6
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c10
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/init_64.c21
-rw-r--r--arch/powerpc/mm/mmu_context.c9
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c25
-rw-r--r--arch/powerpc/mm/numa.c63
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/mm/slb_low.S4
-rw-r--r--arch/powerpc/mm/tlb-radix.c84
-rw-r--r--arch/powerpc/net/bpf_jit64.h7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c16
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c8
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype19
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c4
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c42
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c180
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.h1
-rw-r--r--arch/powerpc/platforms/powernv/setup.c26
-rw-r--r--arch/powerpc/platforms/powernv/smp.c59
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c19
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c8
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c2
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/powerpc/sysdev/axonram.c2
-rw-r--r--arch/powerpc/sysdev/ipic.c4
-rw-r--r--arch/powerpc/xmon/xmon.c169
-rw-r--r--drivers/misc/cxl/api.c16
-rw-r--r--drivers/misc/cxl/context.c3
-rw-r--r--drivers/misc/cxl/cxl.h22
-rw-r--r--drivers/misc/cxl/debugfs.c29
-rw-r--r--drivers/misc/cxl/fault.c15
-rw-r--r--drivers/misc/cxl/file.c24
-rw-r--r--drivers/misc/cxl/native.c27
-rw-r--r--drivers/misc/cxl/pci.c88
-rw-r--r--drivers/mtd/devices/powernv_flash.c83
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c17
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c6
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c371
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h5
120 files changed, 2467 insertions, 744 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 05496622b4ef..ef03e6e16bdb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3185,6 +3185,10 @@
allowed (eg kernel_enable_fpu()/kernel_disable_fpu()).
There is some performance impact when enabling this.
+ ppc_tm= [PPC]
+ Format: {"off"}
+ Disable Hardware Transactional Memory
+
print-fatal-signals=
[KNL] debug: print fatal signals
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 809c468edab1..b8b75b423316 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -334,7 +334,7 @@ config PPC_OF_PLATFORM_PCI
default n
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- depends on PPC32 || PPC_STD_MMU_64
+ depends on PPC32 || PPC_BOOK3S_64
def_bool y
config ARCH_SUPPORTS_UPROBES
@@ -721,7 +721,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
- depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
+ depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64)
select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
@@ -780,7 +780,7 @@ config FORCE_MAX_ZONEORDER
config PPC_SUBPAGE_PROT
bool "Support setting protections for 4k subpages"
- depends on PPC_STD_MMU_64 && PPC_64K_PAGES
+ depends on PPC_BOOK3S_64 && PPC_64K_PAGES
help
This option adds support for a system call to allow user programs
to set access permissions (read/write, readonly, or no access)
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index c86df246339e..b8a1eb42a38d 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -369,4 +369,10 @@ config PPC_HTDUMP
def_bool y
depends on PPC_PTDUMP && PPC_BOOK3S
+config PPC_FAST_ENDIAN_SWITCH
+ bool "Deprecated fast endian-switch syscall"
+ depends on DEBUG_KERNEL && PPC_BOOK3S_64
+ help
+ If you're unsure what this is, say N.
+
endmenu
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index 57291f61ffe7..86266159521e 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -183,7 +183,7 @@
usb@ef603000 {
compatible = "ohci-be";
reg = <0xef603000 0x80>;
- interrupts-parent = <&UIC0>;
+ interrupt-parent = <&UIC0>;
interrupts = <0xd 0x4 0xe 0x4>;
};
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index caee834760d2..4891bbed6258 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -192,6 +192,7 @@ CONFIG_IPMI_DEVICE_INTERFACE=y
CONFIG_IPMI_POWERNV=y
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=1024
+CONFIG_I2C_CHARDEV=y
CONFIG_DRM=y
CONFIG_DRM_AST=y
CONFIG_FIRMWARE_EDID=y
@@ -295,6 +296,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PPC_EMULATED_STATS=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 3d935969e5a2..bde2cd1005a2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -193,6 +193,7 @@ CONFIG_VIRTIO_CONSOLE=m
CONFIG_IBM_BSR=m
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=1024
+CONFIG_I2C_CHARDEV=y
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_OF=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
new file mode 100644
index 000000000000..6bd5e7261335
--- /dev/null
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -0,0 +1,232 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_JUMP_LABEL=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SIG_FORCE=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
+CONFIG_PPC_64K_PAGES=y
+CONFIG_SCHED_SMT=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off"
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_DNS_RESOLVER=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=m
+CONFIG_MTD_POWERNV_FLASH=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_EEPROM_AT24=y
+# CONFIG_CXL is not set
+CONFIG_BLK_DEV_SD=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_TRACE is not set
+# CONFIG_SCSI_IPR_DUMP is not set
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=y
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_SFC=m
+# CONFIG_USB_NET_DRIVERS is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+CONFIG_HW_RANDOM=y
+CONFIG_TCG_TIS_I2C_NUVOTON=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_DRM_AST=m
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_GENERIC is not set
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=m
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_PCI=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_ISO9660_FS=m
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC16=y
+CONFIG_CRC_ITU_T=y
+CONFIG_LIBCRC32C=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHEDSTATS=y
+# CONFIG_FTRACE is not set
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_SECURITY=y
+CONFIG_IMA=y
+CONFIG_EVM=y
+# CONFIG_CRYPTO_ECHAINIV is not set
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_CMAC=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 2f6373144e2c..99c99bb04353 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -65,6 +65,28 @@ static inline void hash__flush_tlb_mm(struct mm_struct *mm)
{
}
+static inline void hash__local_flush_all_mm(struct mm_struct *mm)
+{
+ /*
+ * There's no Page Walk Cache for hash, so what is needed is
+ * the same as flush_tlb_mm(), which doesn't really make sense
+ * with hash. So the only thing we could do is flush the
+ * entire LPID! Punt for now, as it's not being used.
+ */
+ WARN_ON_ONCE(1);
+}
+
+static inline void hash__flush_all_mm(struct mm_struct *mm)
+{
+ /*
+ * There's no Page Walk Cache for hash, so what is needed is
+ * the same as flush_tlb_mm(), which doesn't really make sense
+ * with hash. So the only thing we could do is flush the
+ * entire LPID! Punt for now, as it's not being used.
+ */
+ WARN_ON_ONCE(1);
+}
+
static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 9b433a624bf3..af06c6fe8a9f 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -21,17 +21,20 @@ extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long sta
extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
+extern void radix__local_flush_all_mm(struct mm_struct *mm);
extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize);
extern void radix__tlb_flush(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
extern void radix__flush_tlb_mm(struct mm_struct *mm);
+extern void radix__flush_all_mm(struct mm_struct *mm);
extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize);
#else
#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
+#define radix__flush_all_mm(mm) radix__local_flush_all_mm(mm)
#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 72b925f97bab..70760d018bcd 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -57,6 +57,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
return hash__local_flush_tlb_page(vma, vmaddr);
}
+static inline void local_flush_all_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__local_flush_all_mm(mm);
+ return hash__local_flush_all_mm(mm);
+}
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
if (radix_enabled())
@@ -79,9 +86,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
return radix__flush_tlb_page(vma, vmaddr);
return hash__flush_tlb_page(vma, vmaddr);
}
+
+static inline void flush_all_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ return radix__flush_all_mm(mm);
+ return hash__flush_all_mm(mm);
+}
#else
#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
+#define flush_all_mm(mm) local_flush_all_mm(mm)
#endif /* CONFIG_SMP */
/*
* flush the page walk cache for the address
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a9bf921f4efc..1ca26ca85ec6 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -206,7 +206,7 @@ enum {
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_ICSWX LONG_ASM_CONST(0x0020000000000000)
+/* Free LONG_ASM_CONST(0x0020000000000000) */
#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
@@ -215,6 +215,7 @@ enum {
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
+#define CPU_FTR_POWER9_DD20 LONG_ASM_CONST(0x8000000000000000)
#ifndef __ASSEMBLY__
@@ -451,7 +452,7 @@ enum {
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | \
CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
@@ -460,7 +461,7 @@ enum {
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
@@ -477,6 +478,7 @@ enum {
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
+#define CPU_FTRS_POWER9_DD20 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD20)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -495,7 +497,8 @@ enum {
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1)
+ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
+ CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD20)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9847ae3a12d1..5161c37dd039 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -93,7 +93,7 @@ struct eeh_pe {
struct pci_bus *bus; /* Top PCI bus for bus PE */
int check_count; /* Times of ignored error */
int freeze_count; /* Times of froze up */
- struct timeval tstamp; /* Time on first-time freeze */
+ time64_t tstamp; /* Time on first-time freeze */
int false_positives; /* Times of reported #ff's */
atomic_t pass_dev_cnt; /* Count of passed through devs */
struct eeh_pe *parent; /* Parent PE */
@@ -200,7 +200,6 @@ enum {
struct eeh_ops {
char *name;
int (*init)(void);
- int (*post_init)(void);
void* (*probe)(struct pci_dn *pdn, void *data);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_pe_addr)(struct eeh_pe *pe);
@@ -275,7 +274,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-int eeh_init(void);
+void eeh_probe_devices(void);
int __init eeh_ops_register(struct eeh_ops *ops);
int __exit eeh_ops_unregister(const char *name);
int eeh_check_failure(const volatile void __iomem *token);
@@ -321,10 +320,7 @@ static inline bool eeh_enabled(void)
return false;
}
-static inline int eeh_init(void)
-{
- return 0;
-}
+static inline void eeh_probe_devices(void) { }
static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
{
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f00e10e2a335..651e1354498e 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -55,6 +55,10 @@ extern struct ppc_emulated {
struct ppc_emulated_entry mfdscr;
struct ppc_emulated_entry mtdscr;
struct ppc_emulated_entry lq_stq;
+ struct ppc_emulated_entry lxvw4x;
+ struct ppc_emulated_entry lxvh8x;
+ struct ppc_emulated_entry lxvd2x;
+ struct ppc_emulated_entry lxvb16x;
#endif
} ppc_emulated;
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 334459ad145b..90863245df53 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -508,7 +508,7 @@ static unsigned long epapr_hypercall(unsigned long *in,
static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
unsigned long r;
@@ -520,7 +520,7 @@ static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
static inline long epapr_hypercall0(unsigned int nr)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
return epapr_hypercall(in, out, nr);
@@ -528,7 +528,7 @@ static inline long epapr_hypercall0(unsigned int nr)
static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -538,7 +538,7 @@ static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -549,7 +549,7 @@ static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
static inline long epapr_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -562,7 +562,7 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a318973af05..b27205297e1d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -55,6 +55,11 @@
#endif
/*
+ * maximum recursive depth of MCE exceptions
+ */
+#define MAX_MCE_DEPTH 4
+
+/*
* EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
* EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
* in the save area so it's not necessary to overlap them. Could be used
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index b8a0fb442c64..795d825c2edd 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -40,12 +40,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
return radix__flush_hugetlb_page(vma, vmaddr);
}
-static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- if (radix_enabled())
- return radix__local_flush_hugetlb_page(vma, vmaddr);
-}
#else
static inline pte_t *hugepd_page(hugepd_t hpd)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index c1dd1929342d..d2ef36111518 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -31,6 +31,7 @@
#ifndef __ASSEMBLY__
+extern void replay_system_reset(void);
extern void __replay_interrupt(unsigned int vector);
extern void timer_interrupt(struct pt_regs *);
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 8814a7249ceb..9f3be5c8a4a3 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,8 +103,8 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
-extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int __is_active_jprobe(unsigned long addr);
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
#else
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 83596f32f50b..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -104,10 +104,6 @@ struct kvmppc_host_state {
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /*
- * hwthread_req/hwthread_state pair is used to pull sibling threads
- * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
- */
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 190d69a7f701..3a1226e9b465 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -204,12 +204,10 @@ struct mce_error_info {
extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
- uint64_t addr);
+ uint64_t addr, uint64_t phys_addr);
extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode);
-extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
-
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 309592589e30..20eae6f76247 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -77,6 +77,52 @@ extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+ atomic_inc(&mm->context.active_cpus);
+}
+
+static inline void dec_mm_active_cpus(struct mm_struct *mm)
+{
+ atomic_dec(&mm->context.active_cpus);
+}
+
+static inline void mm_context_add_copro(struct mm_struct *mm)
+{
+ /*
+ * On hash, should only be called once over the lifetime of
+ * the context, as we can't decrement the active cpus count
+ * and flush properly for the time being.
+ */
+ inc_mm_active_cpus(mm);
+}
+
+static inline void mm_context_remove_copro(struct mm_struct *mm)
+{
+ /*
+ * Need to broadcast a global flush of the full mm before
+ * decrementing active_cpus count, as the next TLBI may be
+ * local and the nMMU and/or PSL need to be cleaned up.
+ * Should be rare enough so that it's acceptable.
+ *
+ * Skip on hash, as we don't know how to do the proper flush
+ * for the time being. Invalidations will remain global if
+ * used on hash.
+ */
+ if (radix_enabled()) {
+ flush_all_mm(mm);
+ dec_mm_active_cpus(mm);
+ }
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
+static inline void mm_context_add_copro(struct mm_struct *mm) { }
+static inline void mm_context_remove_copro(struct mm_struct *mm) { }
+#endif
+
+
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
@@ -118,9 +164,13 @@ static inline void arch_dup_mmap(struct mm_struct *oldmm,
{
}
+#ifndef CONFIG_PPC_BOOK3S_64
static inline void arch_exit_mmap(struct mm_struct *mm)
{
}
+#else
+extern void arch_exit_mmap(struct mm_struct *mm);
+#endif
static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index f0ff384d4ca5..b1c42ac54d96 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -203,7 +203,7 @@ static inline unsigned long pte_update(struct mm_struct *mm,
if (!huge)
assert_pte_locked(mm, addr);
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (old & _PAGE_HASHPTE)
hpte_need_flush(mm, addr, ptep, old, huge);
#endif
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..233c7504b1f2 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -188,6 +188,7 @@
#define OPAL_XIVE_DUMP 142
#define OPAL_XIVE_RESERVED3 143
#define OPAL_XIVE_RESERVED4 144
+#define OPAL_SIGNAL_SYSTEM_RESET 145
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
#define OPAL_NPU_MAP_LPAR 148
@@ -895,6 +896,8 @@ enum {
*/
OPAL_REINIT_CPUS_MMU_HASH = (1 << 2),
OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3),
+
+ OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED = (1 << 4),
};
typedef struct oppanel_line {
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 726c23304a57..0c545f7fc77b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -281,6 +281,8 @@ int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
int opal_sensor_group_clear(u32 group_hndl, int token);
+s64 opal_signal_system_reset(s32 cpu);
+
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
int depth, void *data);
@@ -304,11 +306,11 @@ extern void opal_notifier_enable(void);
extern void opal_notifier_disable(void);
extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
-extern int __opal_async_get_token(void);
extern int opal_async_get_token_interruptible(void);
-extern int __opal_async_release_token(int token);
extern int opal_async_release_token(int token);
extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
+extern int opal_async_wait_response_interruptible(uint64_t token,
+ struct opal_msg *msg);
extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
struct rtc_time;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 04b60af027ae..c907ae23c956 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -91,14 +91,14 @@ struct paca_struct {
u8 cpu_start; /* At startup, processor spins until */
/* this becomes non-zero. */
u8 kexec_state; /* set when kexec down has irqs off */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
struct slb_shadow *slb_shadow_ptr;
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
u64 dscr_default; /* per-CPU default DSCR */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* Now, starting in cacheline 2, the exception save areas
*/
@@ -110,7 +110,7 @@ struct paca_struct {
u16 vmalloc_sllp;
u16 slb_cache_ptr;
u32 slb_cache[SLB_CACHE_ENTRIES];
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_BOOK3E
u64 exgen[8] __aligned(0x40);
@@ -192,7 +192,7 @@ struct paca_struct {
struct stop_sprs stop_sprs;
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* Non-maskable exceptions that are not performance critical */
u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
u64 exmc[EX_SIZE]; /* used for machine checks */
@@ -210,6 +210,7 @@ struct paca_struct {
*/
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
+ u8 hmi_p9_special_emu; /* HMI P9 special emulation */
#endif
/* Stuff for accurate time accounting */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index c4d9654bd637..56234c6fcd61 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -117,21 +117,21 @@ extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
#endif /* __ASSEMBLY__ */
#else
#define slice_init()
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
#define slice_set_user_psize(mm, psize) \
do { \
(mm)->context.user_psize = (psize); \
(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
} while (0)
-#else /* CONFIG_PPC_STD_MMU_64 */
+#else /* !CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_64K_PAGES
#define get_slice_psize(mm, addr) MMU_PAGE_64K
#else /* CONFIG_PPC_64K_PAGES */
#define get_slice_psize(mm, addr) MMU_PAGE_4K
#endif /* !CONFIG_PPC_64K_PAGES */
#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
-#endif /* !CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#define slice_set_range_psize(mm, start, len, psize) \
slice_set_user_psize((mm), (psize))
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 0b8aa1fe2d5f..62ed83db04ae 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -218,6 +218,7 @@ struct pci_dn {
#endif
struct list_head child_list;
struct list_head list;
+ struct resource holes[PCI_SRIOV_NUM_BARS];
};
/* Get the pointer to a device_node's pci_dn */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index 67e7e3d990f4..03ab638593e1 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -76,7 +76,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 369a164b545c..5d2c38d26396 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -49,13 +49,13 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/cmpxchg.h>
static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index f62797702300..dc5f6a5d4575 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -22,6 +22,8 @@ extern void pnv_npu2_destroy_context(struct npu_context *context,
extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
unsigned long *flags, unsigned long *status,
int count);
+
+void pnv_tm_init(void);
#else
static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
@@ -36,6 +38,8 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
unsigned long *status, int count) {
return -ENODEV;
}
+
+static inline void pnv_tm_init(void) { }
#endif
#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 36f3e41c9fbe..ae94b3626b6c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -774,9 +774,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
#ifdef CONFIG_PPC_BOOK3E
#define FIXUP_ENDIAN
#else
+/*
+ * This version may be used in in HV or non-HV context.
+ * MSR[EE] must be disabled.
+ */
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
- b $+44; /* Skip trampoline if endian is good */ \
+ b 191f; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
.long 0x00004039; /* li r10,0 */ \
@@ -786,7 +790,26 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
.long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
- .long 0x2400004c /* rfid */
+ .long 0x2400004c; /* rfid */ \
+191:
+
+/*
+ * This version that may only be used with MSR[HV]=1
+ * - Does not clear MSR[RI], so more robust.
+ * - Slightly smaller and faster.
+ */
+#define FIXUP_ENDIAN_HV \
+ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
+ b 191f; /* Skip trampoline if endian is good */ \
+ .long 0xa600607d; /* mfmsr r11 */ \
+ .long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
+ .long 0xa64b5a7d; /* mthsrr0 r10 */ \
+ .long 0xa64b7b7d; /* mthsrr1 r11 */ \
+ .long 0x2402004c; /* hrfid */ \
+191:
#endif /* !CONFIG_PPC_BOOK3E */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 13dbcd41885e..7d5a157c7832 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -77,7 +77,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
flush_tlb_mm(mm);
}
-#elif defined(CONFIG_PPC_STD_MMU_64)
+#elif defined(CONFIG_PPC_BOOK3S_64)
#include <asm/book3s/64/tlbflush.h>
#else
#error Unsupported MMU type
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index 82e06ca3a49b..79d4156554b4 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -11,12 +11,13 @@
extern void tm_enable(void);
extern void tm_reclaim(struct thread_struct *thread,
- unsigned long orig_msr, uint8_t cause);
+ uint8_t cause);
extern void tm_reclaim_current(uint8_t cause);
-extern void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
+extern void tm_recheckpoint(struct thread_struct *thread);
extern void tm_abort(uint8_t cause);
extern void tm_save_sprs(struct thread_struct *thread);
extern void tm_restore_sprs(struct thread_struct *thread);
+extern bool tm_suspend_disabled;
+
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 2d84bca8d053..34da2c5fd1df 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -96,6 +96,14 @@ static inline int prrn_is_enabled(void)
}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
+#if defined(CONFIG_PPC_SPLPAR)
+extern int timed_topology_update(int nsecs);
+#else
+#define timed_topology_update(nsecs)
+#endif /* CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
+
#include <asm-generic/topology.h>
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9c0e60ca1666..e34f15e727d9 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -173,6 +173,23 @@ do { \
extern long __get_user_bad(void);
+/*
+ * This does an atomic 128 byte aligned load from userspace.
+ * Upto caller to do enable_kernel_vmx() before calling!
+ */
+#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
+ __asm__ __volatile__( \
+ "1: lvx 0,0,%1 # get user\n" \
+ " stvx 0,0,%2 # put kernel\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ EX_TABLE(1b, 3b) \
+ : "=r" (err) \
+ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
+
#define __get_user_asm(x, addr, err, op) \
__asm__ __volatile__( \
"1: "op" %1,0(%2) # get_user\n" \
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 4d877144f377..b3b64cba71ec 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -48,6 +48,7 @@
#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */
#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
+#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM w/out suspended state */
/*
* IMPORTANT!
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8cfb20e38cfe..200623e71474 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -208,7 +208,7 @@ int main(void)
OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACASLBCACHE, paca_struct, slb_cache);
OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
@@ -230,7 +230,7 @@ int main(void)
OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 760872916013..171820190de7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -547,6 +547,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
+ { /* Power9 DD2.0 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0200,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD20,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
{ /* Power9 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 7275fed271af..58e3b51de31c 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -634,7 +634,7 @@ static struct dt_cpu_feature_match __initdata
{"no-execute", feat_enable, 0},
{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
- {"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX},
+ {"coprocessor-icswx", feat_enable, 0},
{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
{"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
{"wait", feat_enable, 0},
@@ -735,6 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0200)
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD20;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 116000b45531..cbca0a667682 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -972,6 +972,18 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+void eeh_probe_devices(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct pci_dn *pdn;
+
+ /* Enable EEH for all adapters */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ pdn = hose->pci_data;
+ traverse_pci_dn(pdn, eeh_ops->probe, NULL);
+ }
+}
+
/**
* eeh_init - EEH initialization
*
@@ -987,22 +999,11 @@ static struct notifier_block eeh_reboot_nb = {
* Even if force-off is set, the EEH hardware is still enabled, so that
* newer systems can boot.
*/
-int eeh_init(void)
+static int eeh_init(void)
{
struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
- static int cnt = 0;
int ret = 0;
- /*
- * We have to delay the initialization on PowerNV after
- * the PCI hierarchy tree has been built because the PEs
- * are figured out based on PCI devices instead of device
- * tree nodes
- */
- if (machine_is(powernv) && cnt++ <= 0)
- return ret;
-
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
@@ -1028,22 +1029,7 @@ int eeh_init(void)
if (ret)
return ret;
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
-
- /*
- * Call platform post-initialization. Actually, It's good chance
- * to inform platform that EEH is ready to supply service if the
- * I/O cache stuff has been built up.
- */
- if (eeh_ops->post_init) {
- ret = eeh_ops->post_init();
- if (ret)
- return ret;
- }
+ eeh_probe_devices();
if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
@@ -1757,10 +1743,6 @@ static int eeh_enable_dbgfs_set(void *data, u64 val)
else
eeh_add_flag(EEH_FORCE_DISABLED);
- /* Notify the backend */
- if (eeh_ops->post_init)
- eeh_ops->post_init();
-
return 0;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 8b840191df59..57d54163bf94 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -623,7 +623,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
struct eeh_rmv_data *rmv_data)
{
struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
- struct timeval tstamp;
+ time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 2e8d1b2b5af4..2d4956e97aa9 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -526,16 +526,16 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
void eeh_pe_update_time_stamp(struct eeh_pe *pe)
{
- struct timeval tstamp;
+ time64_t tstamp;
if (!pe) return;
if (pe->freeze_count <= 0) {
pe->freeze_count = 0;
- do_gettimeofday(&pe->tstamp);
+ pe->tstamp = ktime_get_seconds();
} else {
- do_gettimeofday(&tstamp);
- if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
+ tstamp = ktime_get_seconds();
+ if (tstamp - pe->tstamp > 3600) {
pe->tstamp = tstamp;
pe->freeze_count = 0;
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4a0fd4f40245..3320bcac7192 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -539,7 +539,7 @@ _GLOBAL(_switch)
std r6,PACACURRENT(r13) /* Set new 'current' */
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -588,7 +588,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 48da0f5d2f7f..6879aed47377 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -113,6 +113,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
cmpwi cr3,r10,2 ; \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
+ KVMTEST_PR(n) ; \
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#else
#define IDLETEST NOTEST
@@ -129,6 +130,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
+TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
@@ -232,7 +234,7 @@ BEGIN_FTR_SECTION
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
/* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,4
+ cmpwi r10,MAX_MCE_DEPTH
bgt 2f /* Check if we hit limit of 4 */
std r11,GPR1(r1) /* Save r1 on the stack. */
std r11,0(r1) /* make stack chain pointer */
@@ -605,7 +607,7 @@ EXC_COMMON_BEGIN(slb_miss_common)
cmpdi cr5,r11,MSR_RI
crset 4*cr0+eq
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_MMU_FTR_SECTION
bl slb_allocate
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
@@ -865,12 +867,6 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER(reg, system_call_common)
-#define SYSCALL_FASTENDIAN_TEST \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
-
/*
* After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
* and HMT_MEDIUM.
@@ -885,6 +881,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
rfid ; \
b . ; /* prevent speculative execution */
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+#define SYSCALL_FASTENDIAN_TEST \
+BEGIN_FTR_SECTION \
+ cmpdi r0,0x1ebe ; \
+ beq- 1f ; \
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
+
#define SYSCALL_FASTENDIAN \
/* Fast LE/BE switch system call */ \
1: mfspr r12,SPRN_SRR1 ; \
@@ -893,6 +896,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mr r13,r9 ; \
rfid ; /* return to userspace */ \
b . ; /* prevent speculative execution */
+#else
+#define SYSCALL_FASTENDIAN_TEST
+#define SYSCALL_FASTENDIAN
+#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
#if defined(CONFIG_RELOCATABLE)
/*
@@ -1010,6 +1017,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
+ cmpdi cr0,r3,0
+
/* Windup the stack. */
/* Move original HSRR0 and HSRR1 into the respective regs */
ld r9,_MSR(r1)
@@ -1026,10 +1035,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
REST_8GPRS(2, r1)
REST_GPR(10, r1)
ld r11,_CCR(r1)
+ REST_2GPRS(12, r1)
+ bne 1f
mtcr r11
REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
+ ld r1,GPR1(r1)
+ hrfid
+
+1: mtcr r11
+ REST_GPR(11, r1)
ld r1,GPR1(r1)
/*
@@ -1042,8 +1056,9 @@ hmi_exception_after_realmode:
EXCEPTION_PROLOG_0(PACA_EXGEN)
b tramp_real_hmi_exception
-EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
-
+EXC_COMMON_BEGIN(hmi_exception_common)
+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
+ ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
@@ -1482,7 +1497,7 @@ USE_TEXT_SECTION()
*/
.balign IFETCH_ALIGN_BYTES
do_hash_page:
- #ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
lis r0,DSISR_BAD_FAULT_64S@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
@@ -1513,7 +1528,7 @@ do_hash_page:
/* Reload DSISR into r4 for the DABR check below */
ld r4,_DSISR(r1)
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
@@ -1542,7 +1557,7 @@ handle_dabr_fault:
12: b ret_from_except_lite
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1125c9be9e06..59657783d1d5 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -112,12 +112,14 @@ power9_save_additional_sprs:
std r4, STOP_HFSCR(r13)
mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR0
std r3, STOP_MMCRA(r13)
- std r4, STOP_MMCR1(r13)
+ std r4, _MMCR0(r1)
- mfspr r3, SPRN_MMCR2
- std r3, STOP_MMCR2(r13)
+ mfspr r3, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR2
+ std r3, STOP_MMCR1(r13)
+ std r4, STOP_MMCR2(r13)
blr
power9_restore_additional_sprs:
@@ -135,11 +137,14 @@ power9_restore_additional_sprs:
ld r4, STOP_MMCRA(r13)
mtspr SPRN_HFSCR, r3
mtspr SPRN_MMCRA, r4
- /* We have already restored PACA_MMCR0 */
- ld r3, STOP_MMCR1(r13)
- ld r4, STOP_MMCR2(r13)
- mtspr SPRN_MMCR1, r3
- mtspr SPRN_MMCR2, r4
+
+ ld r3, _MMCR0(r1)
+ ld r4, STOP_MMCR1(r13)
+ mtspr SPRN_MMCR0, r3
+ mtspr SPRN_MMCR1, r4
+
+ ld r3, STOP_MMCR2(r13)
+ mtspr SPRN_MMCR2, r3
blr
/*
@@ -319,20 +324,13 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
+power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-power_enter_stop_kvm_rm:
- /*
- * This is currently unused because POWER9 KVM does not have to
- * gather secondary threads into sibling mode, but the code is
- * here in case that function is required.
- *
- * Tell KVM we're entering idle.
- */
+ /* Tell KVM we're entering idle */
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
-power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
@@ -357,6 +355,7 @@ power_enter_stop:
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
+BEGIN_FTR_SECTION
/*
* POWER9 DD2 can incorrectly set PMAO when waking up after a
* state-loss idle. Saving and restoring MMCR0 over idle is a
@@ -364,6 +363,7 @@ power_enter_stop:
*/
mfspr r4,SPRN_MMCR0
std r4,_MMCR0(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20)
/*
* Check if the requested state is a deep idle state.
@@ -496,18 +496,6 @@ pnv_powersave_wakeup_mce:
b pnv_powersave_wakeup
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-kvm_start_guest_check:
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beqlr
- b kvm_start_guest
-#endif
-
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -532,9 +520,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-BEGIN_FTR_SECTION
- bl kvm_start_guest_check
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
+ b kvm_start_guest
+1:
#endif
/* Return SRR1 from power7_nap() */
@@ -555,15 +549,17 @@ pnv_restore_hyp_resource_arch300:
* then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
+BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
+ ld r4,_MMCR0(r1)
+ mtspr SPRN_MMCR0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20)
mfspr r4,SPRN_MMCRA
ori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
xori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
- ld r4,_MMCR0(r1)
- mtspr SPRN_MMCR0,r4
1:
/*
* POWER ISA 3. Use PSSCR to determine if we
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4e65bf82f5e0..b7a84522e652 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -143,6 +143,13 @@ notrace unsigned int __check_irq_replay(void)
*/
unsigned char happened = local_paca->irq_happened;
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+
if (happened & PACA_IRQ_HARD_DIS) {
/* Clear bit 0 which we wouldn't clear otherwise */
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
@@ -270,6 +277,7 @@ notrace void arch_local_irq_restore(unsigned long en)
#endif /* CONFIG_TRACE_IRQFLAGS */
set_soft_enabled(0);
+ trace_hardirqs_off();
/*
* Check if anything needs to be re-emitted. We haven't
@@ -279,6 +287,7 @@ notrace void arch_local_irq_restore(unsigned long en)
replay = __check_irq_replay();
/* We can soft-enable now */
+ trace_hardirqs_on();
set_soft_enabled(1);
/*
@@ -394,11 +403,19 @@ bool prep_irq_for_idle_irqsoff(void)
/*
* Take the SRR1 wakeup reason, index into this table to find the
* appropriate irq_happened bit.
+ *
+ * Sytem reset exceptions taken in idle state also come through here,
+ * but they are NMI interrupts so do not need to wait for IRQs to be
+ * restored, and should be taken as early as practical. These are marked
+ * with 0xff in the table. The Power ISA specifies 0100b as the system
+ * reset interrupt reason.
*/
+#define IRQ_SYSTEM_RESET 0xff
+
static const u8 srr1_to_lazyirq[0x10] = {
0, 0, 0,
PACA_IRQ_DBELL,
- 0,
+ IRQ_SYSTEM_RESET,
PACA_IRQ_DBELL,
PACA_IRQ_DEC,
0,
@@ -407,15 +424,43 @@ static const u8 srr1_to_lazyirq[0x10] = {
PACA_IRQ_HMI,
0, 0, 0, 0, 0 };
+void replay_system_reset(void)
+{
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.trap = 0x100;
+ get_paca()->in_nmi = 1;
+ system_reset_exception(&regs);
+ get_paca()->in_nmi = 0;
+}
+EXPORT_SYMBOL_GPL(replay_system_reset);
+
void irq_set_pending_from_srr1(unsigned long srr1)
{
unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+ u8 reason = srr1_to_lazyirq[idx];
+
+ /*
+ * Take the system reset now, which is immediately after registers
+ * are restored from idle. It's an NMI, so interrupts need not be
+ * re-enabled before it is taken.
+ */
+ if (unlikely(reason == IRQ_SYSTEM_RESET)) {
+ replay_system_reset();
+ return;
+ }
/*
* The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
- * so this can be called unconditionally with srr1 wake reason.
+ * so this can be called unconditionally with the SRR1 wake
+ * reason as returned by the idle code, which uses 0 to mean no
+ * interrupt.
+ *
+ * If a future CPU was to designate this as an interrupt reason,
+ * then a new index for no interrupt must be assigned.
*/
- local_paca->irq_happened |= srr1_to_lazyirq[idx];
+ local_paca->irq_happened |= reason;
}
#endif /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 6c089d9757c9..4b1f34f685b1 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -25,6 +25,21 @@
#include <linux/preempt.h>
#include <linux/ftrace.h>
+/*
+ * This is called from ftrace code after invoking registered handlers to
+ * disambiguate regs->nip changes done by jprobes and livepatch. We check if
+ * there is an active jprobe at the provided address (mcount location).
+ */
+int __is_active_jprobe(unsigned long addr)
+{
+ if (!preemptible()) {
+ struct kprobe *p = raw_cpu_read(current_kprobe);
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+ }
+
+ return 0;
+}
+
static nokprobe_inline
int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_nip)
@@ -65,6 +80,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
/* Disable irq for emulating a breakpoint and avoiding preempt */
local_irq_save(flags);
hard_irq_disable();
+ preempt_disable();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
@@ -86,12 +102,18 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
__skip_singlestep(p, regs, kcb, orig_nip);
- /*
- * If pre_handler returns !0, it sets regs->nip and
- * resets current kprobe.
- */
+ else {
+ /*
+ * If pre_handler returns !0, it sets regs->nip and
+ * resets current kprobe. In this case, we still need
+ * to restore irq, but not preemption.
+ */
+ local_irq_restore(flags);
+ return;
+ }
}
end:
+ preempt_enable_no_resched();
local_irq_restore(flags);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bebc3007a793..a20ce12adab1 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,12 +43,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-int is_current_kprobe_addr(unsigned long addr)
-{
- struct kprobe *p = kprobe_running();
- return (p && (unsigned long)p->addr == addr) ? 1 : 0;
-}
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -239,7 +233,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
+static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
unsigned int insn = *p->ainsn.insn;
@@ -261,9 +255,20 @@ int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
*/
printk("Can't step on instruction %x\n", insn);
BUG();
- } else if (ret == 0)
- /* This instruction can't be boosted */
- p->ainsn.boostable = -1;
+ } else {
+ /*
+ * If we haven't previously emulated this instruction, then it
+ * can't be boosted. Note it down so we don't try to do so again.
+ *
+ * If, however, we had emulated this instruction in the past,
+ * then this is just an error with the current run (for
+ * instance, exceptions due to a load/store). We return 0 so
+ * that this is now single-stepped, but continue to try
+ * emulating it in subsequent probe hits.
+ */
+ if (unlikely(p->ainsn.boostable != 1))
+ p->ainsn.boostable = -1;
+ }
return ret;
}
@@ -639,24 +644,22 @@ NOKPROBE_SYMBOL(setjmp_pre_handler);
void __used jprobe_return(void)
{
- asm volatile("trap" ::: "memory");
+ asm volatile("jprobe_return_trap:\n"
+ "trap\n"
+ ::: "memory");
}
NOKPROBE_SYMBOL(jprobe_return);
-static void __used jprobe_return_end(void)
-{
-}
-NOKPROBE_SYMBOL(jprobe_return_end);
-
int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- /*
- * FIXME - we should ideally be validating that we got here 'cos
- * of the "trap" in jprobe_return() above, before restoring the
- * saved regs...
- */
+ if (regs->nip != ppc_kallsyms_lookup_name("jprobe_return_trap")) {
+ pr_debug("longjmp_break_handler NIP (0x%lx) does not match jprobe_return_trap (0x%lx)\n",
+ regs->nip, ppc_kallsyms_lookup_name("jprobe_return_trap"));
+ return 0;
+ }
+
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
/* It's OK to start function graph tracing again */
unpause_graph_tracing();
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 5c12e21d0d1a..49d34d7271e7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image)
/* NOTREACHED */
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
static unsigned long htab_size;
@@ -402,4 +402,4 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9b2ea7e71c06..742e4658c5dc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -39,11 +39,21 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
static DEFINE_PER_CPU(int, mce_queue_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+/* Queue for delayed MCE UE events. */
+static DEFINE_PER_CPU(int, mce_ue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
+ mce_ue_event_queue);
+
static void machine_check_process_queued_event(struct irq_work *work);
+void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
{
@@ -82,7 +92,7 @@ static void mce_set_error_info(struct machine_check_event *mce,
*/
void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
- uint64_t nip, uint64_t addr)
+ uint64_t nip, uint64_t addr, uint64_t phys_addr)
{
int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
@@ -140,6 +150,11 @@ void save_mce_event(struct pt_regs *regs, long handled,
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
+ if (phys_addr != ULONG_MAX) {
+ mce->u.ue_error.physical_address_provided = true;
+ mce->u.ue_error.physical_address = phys_addr;
+ machine_check_ue_event(mce);
+ }
}
return;
}
@@ -193,6 +208,26 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_ue_event(struct machine_check_event *evt)
+{
+ int index;
+
+ index = __this_cpu_inc_return(mce_ue_count) - 1;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(mce_ue_count);
+ return;
+ }
+ memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+
+ /* Queue work to process this event later. */
+ schedule_work(&mce_ue_event_work);
+}
+
/*
* Queue up the MCE event which then can be handled later.
*/
@@ -215,7 +250,39 @@ void machine_check_queue_event(void)
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
}
-
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ while (__this_cpu_read(mce_ue_count) > 0) {
+ index = __this_cpu_read(mce_ue_count) - 1;
+ evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * This should probably queued elsewhere, but
+ * oh! well
+ */
+ if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.physical_address_provided) {
+ unsigned long pfn;
+
+ pfn = evt->u.ue_error.physical_address >>
+ PAGE_SHIFT;
+ memory_failure(pfn, SIGBUS, 0);
+ } else
+ pr_warn("Failed to identify bad address from "
+ "where the uncorrectable error (UE) "
+ "was generated\n");
+ }
+#endif
+ __this_cpu_dec(mce_ue_count);
+ }
+}
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
@@ -223,6 +290,7 @@ void machine_check_queue_event(void)
static void machine_check_process_queued_event(struct irq_work *work)
{
int index;
+ struct machine_check_event *evt;
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -232,8 +300,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
*/
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
- machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]), false);
+ evt = this_cpu_ptr(&mce_event_queue[index]);
+ machine_check_print_event_info(evt, false);
__this_cpu_dec(mce_queue_count);
}
}
@@ -340,7 +408,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
printk("%s Effective address: %016llx\n",
level, evt->u.ue_error.effective_address);
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
+ printk("%s Physical address: %016llx\n",
level, evt->u.ue_error.physical_address);
break;
case MCE_ERROR_TYPE_SLB:
@@ -411,45 +479,6 @@ void machine_check_print_event_info(struct machine_check_event *evt,
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
-uint64_t get_mce_fault_addr(struct machine_check_event *evt)
-{
- switch (evt->error_type) {
- case MCE_ERROR_TYPE_UE:
- if (evt->u.ue_error.effective_address_provided)
- return evt->u.ue_error.effective_address;
- break;
- case MCE_ERROR_TYPE_SLB:
- if (evt->u.slb_error.effective_address_provided)
- return evt->u.slb_error.effective_address;
- break;
- case MCE_ERROR_TYPE_ERAT:
- if (evt->u.erat_error.effective_address_provided)
- return evt->u.erat_error.effective_address;
- break;
- case MCE_ERROR_TYPE_TLB:
- if (evt->u.tlb_error.effective_address_provided)
- return evt->u.tlb_error.effective_address;
- break;
- case MCE_ERROR_TYPE_USER:
- if (evt->u.user_error.effective_address_provided)
- return evt->u.user_error.effective_address;
- break;
- case MCE_ERROR_TYPE_RA:
- if (evt->u.ra_error.effective_address_provided)
- return evt->u.ra_error.effective_address;
- break;
- case MCE_ERROR_TYPE_LINK:
- if (evt->u.link_error.effective_address_provided)
- return evt->u.link_error.effective_address;
- break;
- default:
- case MCE_ERROR_TYPE_UNKNOWN:
- break;
- }
- return 0;
-}
-EXPORT_SYMBOL(get_mce_fault_addr);
-
/*
* This function is called in real mode. Strictly no printk's please.
*
@@ -470,6 +499,34 @@ long hmi_exception_realmode(struct pt_regs *regs)
{
__this_cpu_inc(irq_stat.hmi_exceptions);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
+ if (pvr_version_is(PVR_POWER9)) {
+ unsigned long hmer = mfspr(SPRN_HMER);
+
+ /* Do we have the debug bit set */
+ if (hmer & PPC_BIT(17)) {
+ hmer &= ~PPC_BIT(17);
+ mtspr(SPRN_HMER, hmer);
+
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * user space
+ */
+ if (user_mode(regs))
+ local_paca->hmi_p9_special_emu = 1;
+
+ /*
+ * Don't bother going to OPAL if that's the
+ * only relevant bit.
+ */
+ if (!(hmer & mfspr(SPRN_HMEER)))
+ return local_paca->hmi_p9_special_emu;
+ }
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
wait_for_subcore_guest_exit();
if (ppc_md.hmi_exception_early)
@@ -477,5 +534,5 @@ long hmi_exception_realmode(struct pt_regs *regs)
wait_for_tb_resync();
- return 0;
+ return 1;
}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 72f153c6f3fa..644f7040b91c 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -27,6 +27,36 @@
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/pte-walk.h>
+#include <asm/sstep.h>
+#include <asm/exception-64s.h>
+
+/*
+ * Convert an address related to an mm to a PFN. NOTE: we are in real
+ * mode, we could potentially race with page table updates.
+ */
+static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+{
+ pte_t *ptep;
+ unsigned long flags;
+ struct mm_struct *mm;
+
+ if (user_mode(regs))
+ mm = current->mm;
+ else
+ mm = &init_mm;
+
+ local_irq_save(flags);
+ if (mm == current->mm)
+ ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+ else
+ ptep = find_init_mm_pte(addr, NULL);
+ local_irq_restore(flags);
+ if (!ptep || pte_special(*ptep))
+ return ULONG_MAX;
+ return pte_pfn(*ptep);
+}
static void flush_tlb_206(unsigned int num_sets, unsigned int action)
{
@@ -128,7 +158,7 @@ void __flush_tlb_power9(unsigned int action)
{
unsigned int num_sets;
- if (radix_enabled())
+ if (early_radix_enabled())
num_sets = POWER9_TLB_SETS_RADIX;
else
num_sets = POWER9_TLB_SETS_HASH;
@@ -138,7 +168,7 @@ void __flush_tlb_power9(unsigned int action)
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static void flush_and_reload_slb(void)
{
struct slb_shadow *slb;
@@ -185,7 +215,7 @@ static void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -421,9 +451,45 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
{ 0, false, 0, 0, 0, 0 } };
+static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+ uint64_t *phys_addr)
+{
+ /*
+ * Carefully look at the NIP to determine
+ * the instruction to analyse. Reading the NIP
+ * in real-mode is tricky and can lead to recursive
+ * faults
+ */
+ int instr;
+ unsigned long pfn, instr_addr;
+ struct instruction_op op;
+ struct pt_regs tmp = *regs;
+
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+ instr = *(unsigned int *)(instr_addr);
+ if (!analyse_instr(&op, &tmp, instr)) {
+ pfn = addr_to_pfn(regs, op.ea);
+ *addr = op.ea;
+ *phys_addr = (pfn << PAGE_SHIFT);
+ return 0;
+ }
+ /*
+ * analyse_instr() might fail if the instruction
+ * is not a load/store, although this is unexpected
+ * for load/store errors or if we got the NIP
+ * wrong
+ */
+ }
+ *addr = 0;
+ return -1;
+}
+
static int mce_handle_ierror(struct pt_regs *regs,
const struct mce_ierror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t srr1 = regs->msr;
int handled = 0;
@@ -475,8 +541,22 @@ static int mce_handle_ierror(struct pt_regs *regs,
}
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
- if (table[i].nip_valid)
+ if (table[i].nip_valid) {
*addr = regs->nip;
+ if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ unsigned long pfn;
+
+ if (get_paca()->in_mce < MAX_MCE_DEPTH) {
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ *phys_addr =
+ (pfn << PAGE_SHIFT);
+ handled = 1;
+ }
+ }
+ }
+ }
return handled;
}
@@ -489,7 +569,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
static int mce_handle_derror(struct pt_regs *regs,
const struct mce_derror_table table[],
- struct mce_error_info *mce_err, uint64_t *addr)
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
uint64_t dsisr = regs->dsisr;
int handled = 0;
@@ -555,7 +636,17 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
-
+ else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ /*
+ * We do a maximum of 4 nested MCE calls, see
+ * kernel/exception-64s.h
+ */
+ if (get_paca()->in_mce < MAX_MCE_DEPTH)
+ if (!mce_find_instr_ea_and_pfn(regs, addr,
+ phys_addr))
+ handled = 1;
+ }
found = 1;
}
@@ -592,19 +683,21 @@ static long mce_handle_error(struct pt_regs *regs,
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
- uint64_t addr;
+ uint64_t addr, phys_addr;
uint64_t srr1 = regs->msr;
long handled;
if (SRR1_MC_LOADSTORE(srr1))
- handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
+ &phys_addr);
else
- handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
+ handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
+ &phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_err, regs->nip, addr);
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
return handled;
}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 0b0f89685b67..759104b99f9f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -429,7 +429,8 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
/* Find this stub, or if that fails, the next avail. entry */
stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
- BUG_ON(i >= num_stubs);
+ if (WARN_ON(i >= num_stubs))
+ return 0;
if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
return (unsigned long)&stubs[i];
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 2ff2b8a19f71..5d38d5ea9a24 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -90,7 +90,7 @@ static inline void free_lppacas(void) { }
#endif /* CONFIG_PPC_BOOK3S */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* 3 persistent SLBs are registered here. The buffer will be zero
@@ -135,11 +135,11 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
return s;
}
-#else /* CONFIG_PPC_STD_MMU_64 */
+#else /* !CONFIG_PPC_BOOK3S_64 */
static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/* The Paca is an array with one entry per processor. Each contains an
* lppaca, which contains the information shared between the
@@ -170,9 +170,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
#ifdef CONFIG_PPC_BOOK3E
/* For now -- if we have threads this will be adjusted later */
@@ -271,7 +271,7 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* CONFIG_PPC_BOOK3S */
+#else /* !CONFIG_PPC_BOOK3S */
return;
#endif
}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 932b9741aa8f..15ce0306b092 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -90,14 +90,14 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
* to do an appropriate TLB flush here too
*/
if (bus->self) {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
struct resource *res = bus->resource[0];
#endif
pr_debug("IO unmapping for PCI-PCI bridge %s\n",
pci_name(bus->self));
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
__flush_hash_table_range(&init_mm, res->start + _IO_BASE,
res->end + _IO_BASE + 1);
#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a0c74bbf3454..4083b737decb 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -77,6 +77,13 @@
extern unsigned long _get_SP(void);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Are we running in "Suspend disabled" mode? If so we have to block any
+ * sigreturn that would get us into suspended state, and we also warn in some
+ * other paths that we should never reach with suspend disabled.
+ */
+bool tm_suspend_disabled __ro_after_init = false;
+
static void check_if_tm_restore_required(struct task_struct *tsk)
{
/*
@@ -97,9 +104,23 @@ static inline bool msr_tm_active(unsigned long msr)
{
return MSR_TM_ACTIVE(msr);
}
+
+static bool tm_active_with_fp(struct task_struct *tsk)
+{
+ return msr_tm_active(tsk->thread.regs->msr) &&
+ (tsk->thread.ckpt_regs.msr & MSR_FP);
+}
+
+static bool tm_active_with_altivec(struct task_struct *tsk)
+{
+ return msr_tm_active(tsk->thread.regs->msr) &&
+ (tsk->thread.ckpt_regs.msr & MSR_VEC);
+}
#else
static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
+static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
+static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -232,7 +253,7 @@ EXPORT_SYMBOL(enable_kernel_fp);
static int restore_fp(struct task_struct *tsk)
{
- if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
+ if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -314,7 +335,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) {
+ (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -853,6 +874,10 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if (!MSR_TM_SUSPENDED(mfmsr()))
return;
+ giveup_all(container_of(thr, struct task_struct, thread));
+
+ tm_reclaim(thr, cause);
+
/*
* If we are in a transaction and FP is off then we can't have
* used FP inside that transaction. Hence the checkpointed
@@ -871,10 +896,6 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
memcpy(&thr->ckvr_state, &thr->vr_state,
sizeof(struct thread_vr_state));
-
- giveup_all(container_of(thr, struct task_struct, thread));
-
- tm_reclaim(thr, thr->ckpt_regs.msr, cause);
}
void tm_reclaim_current(uint8_t cause)
@@ -903,6 +924,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
if (!MSR_TM_ACTIVE(thr->regs->msr))
goto out_and_saveregs;
+ WARN_ON(tm_suspend_disabled);
+
TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
@@ -923,11 +946,9 @@ out_and_saveregs:
tm_save_sprs(thr);
}
-extern void __tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
+extern void __tm_recheckpoint(struct thread_struct *thread);
-void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr)
+void tm_recheckpoint(struct thread_struct *thread)
{
unsigned long flags;
@@ -946,15 +967,13 @@ void tm_recheckpoint(struct thread_struct *thread,
*/
tm_restore_sprs(thread);
- __tm_recheckpoint(thread, orig_msr);
+ __tm_recheckpoint(thread);
local_irq_restore(flags);
}
static inline void tm_recheckpoint_new_task(struct task_struct *new)
{
- unsigned long msr;
-
if (!cpu_has_feature(CPU_FTR_TM))
return;
@@ -973,13 +992,11 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
tm_restore_sprs(&new->thread);
return;
}
- msr = new->thread.ckpt_regs.msr;
/* Recheckpoint to restore original checkpointed register state. */
- TM_DEBUG("*** tm_recheckpoint of pid %d "
- "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
- new->pid, new->thread.regs->msr, msr);
+ TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
+ new->pid, new->thread.regs->msr);
- tm_recheckpoint(&new->thread, msr);
+ tm_recheckpoint(&new->thread);
/*
* The checkpointed state has been restored but the live state has
@@ -1155,7 +1172,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
}
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1163,7 +1180,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
__flush_tlb_pending(batch);
batch->active = 0;
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
switch_booke_debug_regs(&new->thread.debug);
@@ -1209,7 +1226,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
last = _switch(old_thread, new_thread);
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
batch = this_cpu_ptr(&ppc64_tlb_batch);
@@ -1238,7 +1255,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
: : "r"(dummy_copy_buffer), "r"(0));
}
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
}
@@ -1467,7 +1484,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1898,7 +1915,8 @@ unsigned long get_wchan(struct task_struct *p)
do {
sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
+ p->state == TASK_RUNNING)
return 0;
if (count > 0) {
ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
@@ -2046,7 +2064,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
unsigned long base = mm->brk;
unsigned long ret;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* If we are using 1TB segments and we are allowed to randomise
* the heap, we can put it above 1TB so it is backed by a 1TB
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f83056297441..b15bae265c90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -47,6 +47,7 @@
#include <asm/mmu.h>
#include <asm/paca.h>
#include <asm/pgtable.h>
+#include <asm/powernv.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
@@ -228,7 +229,7 @@ static void __init check_cpu_pa_features(unsigned long node)
ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -658,6 +659,38 @@ static void __init early_reserve_mem(void)
#endif
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static bool tm_disabled __initdata;
+
+static int __init parse_ppc_tm(char *str)
+{
+ bool res;
+
+ if (kstrtobool(str, &res))
+ return -EINVAL;
+
+ tm_disabled = !res;
+
+ return 0;
+}
+early_param("ppc_tm", parse_ppc_tm);
+
+static void __init tm_init(void)
+{
+ if (tm_disabled) {
+ pr_info("Disabling hardware transactional memory (HTM)\n");
+ cur_cpu_spec->cpu_user_features2 &=
+ ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+ return;
+ }
+
+ pnv_tm_init();
+}
+#else
+static void tm_init(void) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
@@ -767,6 +800,8 @@ void __init early_init_devtree(void *params)
powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
#endif
+ tm_init();
+
DBG(" <- early_init_devtree()\n");
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e3bc16d02b2..fa661ed616f5 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -773,7 +773,7 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
#endif
#ifdef CONFIG_PPC_STD_MMU_32
@@ -800,7 +800,7 @@ static __init void print_system_info(void)
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#endif
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
if (htab_address)
pr_info("htab_address = 0x%p\n", htab_address);
if (htab_hash_mask)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 92fb1c8dbbd8..16d16583cf11 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -519,6 +519,8 @@ static int save_tm_user_regs(struct pt_regs *regs,
{
unsigned long msr = regs->msr;
+ WARN_ON(tm_suspend_disabled);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
@@ -769,6 +771,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
int i;
#endif
+ if (tm_suspend_disabled)
+ return 1;
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal.
@@ -876,7 +880,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
/* Make sure the transaction is marked as failed */
current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&current->thread, msr);
+ tm_recheckpoint(&current->thread);
/* This loads the speculative FP/VEC state, if used */
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index c83c115858c1..528ea744911c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -214,6 +214,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ WARN_ON(tm_suspend_disabled);
+
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
@@ -430,6 +432,9 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
BUG_ON(tsk != current);
+ if (tm_suspend_disabled)
+ return -EINVAL;
+
/* copy the GPRs */
err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
@@ -547,7 +552,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
/* Make sure the transaction is marked as failed */
tsk->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&tsk->thread, msr);
+ tm_recheckpoint(&tsk->thread);
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index a753b72efbc0..726c2d683f93 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -229,8 +229,7 @@ int __init TAU_init(void)
/* first, set up the window shrinking timer */
- init_timer(&tau_timer);
- tau_timer.function = tau_timeout_smp;
+ setup_timer(&tau_timer, tau_timeout_smp, 0UL);
tau_timer.expires = jiffies + shrink_timer;
add_timer(&tau_timer);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index c4ba37822ba0..d89fb0e6f9ed 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -79,15 +79,12 @@ _GLOBAL(tm_abort)
blr
/* void tm_reclaim(struct thread_struct *thread,
- * unsigned long orig_msr,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
* transactions and updates thread->regs.tm_ckpt_* with the
* original checkpointed state. Note that thread->regs is
* unchanged.
- * - FP regs are written back to thread->transact_fpr before
- * reclaiming. These are the transactional (current) versions.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -98,9 +95,9 @@ _GLOBAL(tm_abort)
* Call with IRQs off, stacks get all out of sync for some periods in here!
*/
_GLOBAL(tm_reclaim)
- mfcr r6
+ mfcr r5
mflr r0
- stw r6, 8(r1)
+ stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
@@ -108,7 +105,6 @@ _GLOBAL(tm_reclaim)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
std r3, STK_PARAM(R3)(r1)
- std r4, STK_PARAM(R4)(r1)
SAVE_NVGPRS(r1)
/* We need to setup MSR for VSX register save instructions. */
@@ -138,8 +134,8 @@ _GLOBAL(tm_reclaim)
std r1, PACAR1(r13)
/* Clear MSR RI since we are about to change r1, EE is already off. */
- li r4, 0
- mtmsrd r4, 1
+ li r5, 0
+ mtmsrd r5, 1
/*
* BE CAREFUL HERE:
@@ -151,7 +147,7 @@ _GLOBAL(tm_reclaim)
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
- TRECLAIM(R5) /* Cause in r5 */
+ TRECLAIM(R4) /* Cause in r4 */
/* ******************** GPRs ******************** */
/* Stash the checkpointed r13 away in the scratch SPR and get the real
@@ -242,40 +238,30 @@ _GLOBAL(tm_reclaim)
/* ******************** FPR/VR/VSRs ************
- * After reclaiming, capture the checkpointed FPRs/VRs /if used/.
- *
- * (If VSX used, FP and VMX are implied. Or, we don't need to look
- * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
- *
- * We're passed the thread's MSR as the second parameter
+ * After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
* instructions!
*/
- ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */
mr r3, r12
- andis. r0, r4, MSR_VEC@h
- beq dont_backup_vec
+ /* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
-dont_backup_vec:
+
+ /* VRSAVE */
mfspr r0, SPRN_VRSAVE
std r0, THREAD_CKVRSAVE(r3)
- andi. r0, r4, MSR_FP
- beq dont_backup_fp
-
+ /* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
-
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
-dont_backup_fp:
/* TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
@@ -343,22 +329,19 @@ _GLOBAL(__tm_recheckpoint)
*/
subi r7, r7, STACK_FRAME_OVERHEAD
+ /* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
- /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
-
- /* Enable FP/vec in MSR if necessary! */
- lis r5, MSR_VEC@h
+ mr r5, r6
ori r5, r5, MSR_FP
- and. r5, r4, r5
- beq restore_gprs /* if neither, skip both */
-
+#ifdef CONFIG_ALTIVEC
+ oris r5, r5, MSR_VEC@h
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
- oris r5, r5, MSR_VSX@h
+ oris r5,r5, MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */
- mtmsr r5
+ mtmsrd r5
#ifdef CONFIG_ALTIVEC
/*
@@ -367,28 +350,20 @@ _GLOBAL(__tm_recheckpoint)
* thread.fp_state[] version holds the 'live' (transactional)
* and will be loaded subsequently by any FPUnavailable trap.
*/
- andis. r0, r4, MSR_VEC@h
- beq dont_restore_vec
-
addi r8, r3, THREAD_CKVRSTATE
li r5, VRSTATE_VSCR
lvx v0, r8, r5
mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
-dont_restore_vec:
ld r5, THREAD_CKVRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
- andi. r0, r4, MSR_FP
- beq dont_restore_fp
-
addi r8, r3, THREAD_CKFPSTATE
lfd fr0, FPSTATE_FPSCR(r8)
MTFSF_L(fr0)
REST_32FPRS_VSRS(0, R4, R8)
-dont_restore_fp:
mtmsr r6 /* FP/Vec off again! */
restore_gprs:
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index b4e2b7165f79..3f3e81852422 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -110,9 +110,9 @@ ftrace_call:
/* NIP has not been altered, skip over further checks */
beq 1f
- /* Check if there is an active kprobe on us */
+ /* Check if there is an active jprobe on us */
subi r3, r14, 4
- bl is_current_kprobe_addr
+ bl __is_active_jprobe
nop
/*
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 13c9dcdcba69..f3eb61be0d30 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,6 +37,7 @@
#include <linux/kdebug.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
+#include <linux/smp.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -699,6 +700,187 @@ void SMIException(struct pt_regs *regs)
die("System Management Interrupt", regs, SIGABRT);
}
+#ifdef CONFIG_VSX
+static void p9_hmi_special_emu(struct pt_regs *regs)
+{
+ unsigned int ra, rb, t, i, sel, instr, rc;
+ const void __user *addr;
+ u8 vbuf[16], *vdst;
+ unsigned long ea, msr, msr_mask;
+ bool swap;
+
+ if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
+ return;
+
+ /*
+ * lxvb16x opcode: 0x7c0006d8
+ * lxvd2x opcode: 0x7c000698
+ * lxvh8x opcode: 0x7c000658
+ * lxvw4x opcode: 0x7c000618
+ */
+ if ((instr & 0xfc00073e) != 0x7c000618) {
+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr);
+ return;
+ }
+
+ /* Grab vector registers into the task struct */
+ msr = regs->msr; /* Grab msr before we flush the bits */
+ flush_vsx_to_thread(current);
+ enable_kernel_altivec();
+
+ /*
+ * Is userspace running with a different endian (this is rare but
+ * not impossible)
+ */
+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+
+ /* Decode the instruction */
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ t = (instr >> 21) & 0x1f;
+ if (instr & 1)
+ vdst = (u8 *)&current->thread.vr_state.vr[t];
+ else
+ vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
+
+ /* Grab the vector address */
+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
+ if (is_32bit_task())
+ ea &= 0xfffffffful;
+ addr = (__force const void __user *)ea;
+
+ /* Check it */
+ if (!access_ok(VERIFY_READ, addr, 16)) {
+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ /* Read the vector */
+ rc = 0;
+ if ((unsigned long)addr & 0xfUL)
+ /* unaligned case */
+ rc = __copy_from_user_inatomic(vbuf, addr, 16);
+ else
+ __get_user_atomic_128_aligned(vbuf, addr, rc);
+ if (rc) {
+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid, regs->nip,
+ instr, (unsigned long) addr);
+
+ /* Grab instruction "selector" */
+ sel = (instr >> 6) & 3;
+
+ /*
+ * Check to make sure the facility is actually enabled. This
+ * could happen if we get a false positive hit.
+ *
+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
+ */
+ msr_mask = MSR_VSX;
+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
+ msr_mask = MSR_VEC;
+ if (!(msr & msr_mask)) {
+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
+ " instr=%08x msr:%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, msr);
+ return;
+ }
+
+ /* Do logging here before we modify sel based on endian */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ PPC_WARN_EMULATED(lxvw4x, regs);
+ break;
+ case 1: /* lxvh8x */
+ PPC_WARN_EMULATED(lxvh8x, regs);
+ break;
+ case 2: /* lxvd2x */
+ PPC_WARN_EMULATED(lxvd2x, regs);
+ break;
+ case 3: /* lxvb16x */
+ PPC_WARN_EMULATED(lxvb16x, regs);
+ break;
+ }
+
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * An LE kernel stores the vector in the task struct as an LE
+ * byte array (effectively swapping both the components and
+ * the content of the components). Those instructions expect
+ * the components to remain in ascending address order, so we
+ * swap them back.
+ *
+ * If we are running a BE user space, the expectation is that
+ * of a simple memcpy, so forcing the emulation to look like
+ * a lxvb16x should do the trick.
+ */
+ if (swap)
+ sel = 3;
+
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
+ break;
+ case 3: /* lxvb16x */
+ for (i = 0; i < 16; i++)
+ vdst[i] = vbuf[15-i];
+ break;
+ }
+#else /* __LITTLE_ENDIAN__ */
+ /* On a big endian kernel, a BE userspace only needs a memcpy */
+ if (!swap)
+ sel = 3;
+
+ /* Otherwise, we need to swap the content of the components */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
+ break;
+ case 3: /* lxvb16x */
+ memcpy(vdst, vbuf, 16);
+ break;
+ }
+#endif /* !__LITTLE_ENDIAN__ */
+
+ /* Go to next instruction */
+ regs->nip += 4;
+}
+#endif /* CONFIG_VSX */
+
void handle_hmi_exception(struct pt_regs *regs)
{
struct pt_regs *old_regs;
@@ -706,6 +888,21 @@ void handle_hmi_exception(struct pt_regs *regs)
old_regs = set_irq_regs(regs);
irq_enter();
+#ifdef CONFIG_VSX
+ /* Real mode flagged P9 special emu is needed */
+ if (local_paca->hmi_p9_special_emu) {
+ local_paca->hmi_p9_special_emu = 0;
+
+ /*
+ * We don't want to take page faults while doing the
+ * emulation, we just replay the instruction if necessary.
+ */
+ pagefault_disable();
+ p9_hmi_special_emu(regs);
+ pagefault_enable();
+ }
+#endif /* CONFIG_VSX */
+
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
@@ -1140,13 +1337,8 @@ void program_check_exception(struct pt_regs *regs)
* - A treclaim is attempted when non transactional.
* - A tend is illegally attempted.
* - writing a TM SPR when transactional.
- */
- if (!user_mode(regs) &&
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
- goto bail;
- }
- /* If usermode caused this, it's done something illegal and
+ *
+ * If usermode caused this, it's done something illegal and
* gets a SIGILL slap on the wrist. We call it an illegal
* operand to distinguish from the instruction just being bad
* (e.g. executing a 'tend' on a CPU without TM!); it's an
@@ -1487,7 +1679,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
/* Reclaim didn't save out any FPRs to transact_fprs. */
/* Enable FP for the task: */
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ current->thread.load_fp = 1;
/* This loads and recheckpoints the FP registers from
* thread.fpr[]. They will remain in registers after the
@@ -1495,15 +1687,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
* If VMX is in use, the VRs now hold checkpointed values,
* so we don't want to load the VRs from the thread_struct.
*/
- tm_recheckpoint(&current->thread, MSR_FP);
-
- /* If VMX is in use, get the transactional values back */
- if (regs->msr & MSR_VEC) {
- msr_check_and_set(MSR_VEC);
- load_vr_state(&current->thread.vr_state);
- /* At this point all the VSX state is loaded, so enable it */
- regs->msr |= MSR_VSX;
- }
+ tm_recheckpoint(&current->thread);
}
void altivec_unavailable_tm(struct pt_regs *regs)
@@ -1516,21 +1700,13 @@ void altivec_unavailable_tm(struct pt_regs *regs)
"MSR=%lx\n",
regs->nip, regs->msr);
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC;
- tm_recheckpoint(&current->thread, MSR_VEC);
+ current->thread.load_vec = 1;
+ tm_recheckpoint(&current->thread);
current->thread.used_vr = 1;
-
- if (regs->msr & MSR_FP) {
- msr_check_and_set(MSR_FP);
- load_fp_state(&current->thread.fp_state);
- regs->msr |= MSR_VSX;
- }
}
void vsx_unavailable_tm(struct pt_regs *regs)
{
- unsigned long orig_msr = regs->msr;
-
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
*
@@ -1544,29 +1720,13 @@ void vsx_unavailable_tm(struct pt_regs *regs)
current->thread.used_vsr = 1;
- /* If FP and VMX are already loaded, we have all the state we need */
- if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
- regs->msr |= MSR_VSX;
- return;
- }
-
/* This reclaims FP and/or VR regs if they're already enabled */
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
- MSR_VSX;
-
- /* This loads & recheckpoints FP and VRs; but we have
- * to be sure not to overwrite previously-valid state.
- */
- tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
-
- msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC));
+ current->thread.load_vec = 1;
+ current->thread.load_fp = 1;
- if (orig_msr & MSR_FP)
- load_fp_state(&current->thread.fp_state);
- if (orig_msr & MSR_VEC)
- load_vr_state(&current->thread.vr_state);
+ tm_recheckpoint(&current->thread);
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -1924,6 +2084,10 @@ struct ppc_emulated ppc_emulated = {
WARN_EMULATED_SETUP(mfdscr),
WARN_EMULATED_SETUP(mtdscr),
WARN_EMULATED_SETUP(lq_stq),
+ WARN_EMULATED_SETUP(lxvw4x),
+ WARN_EMULATED_SETUP(lxvh8x),
+ WARN_EMULATED_SETUP(lxvd2x),
+ WARN_EMULATED_SETUP(lxvb16x),
#endif
};
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 2f6eadd9408d..1fb9379dc683 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -97,8 +97,7 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
- if (hardlockup_panic)
- nmi_panic(regs, "Hard LOCKUP");
+ /* Do not panic from here because that can recurse into NMI IPI layer */
}
static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
@@ -134,15 +133,18 @@ static void watchdog_smp_panic(int cpu, u64 tb)
pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
cpu, cpumask_pr_args(&wd_smp_cpus_pending));
- /*
- * Try to trigger the stuck CPUs.
- */
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
- smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ if (!sysctl_hardlockup_all_cpu_backtrace) {
+ /*
+ * Try to trigger the stuck CPUs, unless we are going to
+ * get a backtrace on all of them anyway.
+ */
+ for_each_cpu(c, &wd_smp_cpus_pending) {
+ if (c == cpu)
+ continue;
+ smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ }
+ smp_flush_nmi_ipi(1000000);
}
- smp_flush_nmi_ipi(1000000);
/* Take the stuck CPUs out of the watch group */
set_cpumask_stuck(&wd_smp_cpus_pending, tb);
@@ -275,9 +277,12 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
+ u64 tb = get_tb();
- if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
- watchdog_timer_interrupt(cpu);
+ if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+ wd_smp_clear_cpu_pending(cpu, tb);
+ }
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 73bf1ebfa78f..31a362669fea 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -47,6 +47,7 @@
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
+#include <asm/asm-prototypes.h>
#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
@@ -1089,9 +1090,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
- /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+ /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
@@ -2117,15 +2119,6 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca;
long timeout = 10000;
- /*
- * ISA v3.0 idle routines do not set hwthread_state or test
- * hwthread_req, so they can not grab idle threads.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- WARN(1, "KVM: can not control sibling threads\n");
- return -EBUSY;
- }
-
tpaca = &paca[cpu];
/* Ensure the thread won't go into the kernel if it wakes */
@@ -2160,12 +2153,10 @@ static void kvmppc_release_hwthread(int cpu)
struct paca_struct *tpaca;
tpaca = &paca[cpu];
+ tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.kvm_split_mode = NULL;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- tpaca->kvm_hstate.hwthread_req = 0;
-
}
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
@@ -2615,6 +2606,9 @@ static void set_irq_happened(int trap)
case BOOK3S_INTERRUPT_HMI:
local_paca->irq_happened |= PACA_IRQ_HMI;
break;
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
+ replay_system_reset();
+ break;
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 17936f82d3c7..fb02209ff782 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -149,11 +149,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3
mtspr SPRN_DEC, r4
-BEGIN_FTR_SECTION
/* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13)
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/*
* For external interrupts we need to call the Linux
@@ -316,7 +314,6 @@ kvm_novcpu_exit:
* Relocation is off and most register values are lost.
* r13 points to the PACA.
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
- * This is not used by ISAv3.0B processors.
*/
.globl kvm_start_guest
kvm_start_guest:
@@ -435,9 +432,6 @@ kvm_secondary_got_guest:
* While waiting we also need to check if we get given a vcpu to run.
*/
kvm_no_guest:
-BEGIN_FTR_SECTION
- twi 31,0,0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 53f
@@ -2531,10 +2525,8 @@ kvm_do_nap:
clrrdi r0, r0, 1
mtspr SPRN_CTRLT, r0
-BEGIN_FTR_SECTION
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3480faaf1ef8..a3746b98ec11 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -644,8 +644,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_PPC_HTM:
- r = cpu_has_feature(CPU_FTR_TM_COMP) &&
- is_kvmppc_hv_enabled(kvm);
+ r = is_kvmppc_hv_enabled(kvm) &&
+ (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
break;
default:
r = 0;
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index f208f560aecd..8c3955e183d4 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -31,6 +31,8 @@ extern char system_call_common[];
#define XER_SO 0x80000000U
#define XER_OV 0x40000000U
#define XER_CA 0x20000000U
+#define XER_OV32 0x00080000U
+#define XER_CA32 0x00040000U
#ifdef CONFIG_PPC_FPU
/*
@@ -962,6 +964,16 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs,
op->ccval |= 0x20000000;
}
+static nokprobe_inline void set_ca32(struct instruction_op *op, bool val)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (val)
+ op->xerval |= XER_CA32;
+ else
+ op->xerval &= ~XER_CA32;
+ }
+}
+
static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
struct instruction_op *op, int rd,
unsigned long val1, unsigned long val2,
@@ -985,6 +997,9 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+
+ set_ca32(op, (unsigned int)val < (unsigned int)val1 ||
+ (carry_in && (unsigned int)val == (unsigned int)val1));
}
static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
@@ -1791,6 +1806,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
case 824: /* srawi */
@@ -1803,6 +1819,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
#ifdef __powerpc64__
@@ -1832,6 +1849,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
case 826: /* sradi with sh_5 = 0 */
@@ -1845,6 +1863,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval |= XER_CA;
else
op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index fb844d2f266e..915178423c3a 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,11 +14,11 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
-ifeq ($(CONFIG_PPC_STD_MMU_64),y)
+ifeq ($(CONFIG_PPC_BOOK3S_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
endif
@@ -31,7 +31,7 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-y += hugetlbpage.o
ifeq ($(CONFIG_HUGETLB_PAGE),y)
-obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index 5c4c93dcff19..14cfb11b09d0 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address = H_VMEMMAP_BASE;
#else
address_markers[9].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index c9282d27b203..c2e7dea59490 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -112,7 +112,7 @@ struct flag_info {
static const struct flag_info flag_array[] = {
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_PRIVILEGED,
.val = 0,
#else
@@ -147,7 +147,7 @@ static const struct flag_info flag_array[] = {
.set = "present",
.clear = " ",
}, {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = H_PAGE_HASHPTE,
.val = H_PAGE_HASHPTE,
#else
@@ -157,7 +157,7 @@ static const struct flag_info flag_array[] = {
.set = "hpte",
.clear = " ",
}, {
-#ifndef CONFIG_PPC_STD_MMU_64
+#ifndef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_GUARDED,
.val = _PAGE_GUARDED,
.set = "guarded",
@@ -174,7 +174,7 @@ static const struct flag_info flag_array[] = {
.set = "accessed",
.clear = " ",
}, {
-#ifndef CONFIG_PPC_STD_MMU_64
+#ifndef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_WRITETHRU,
.val = _PAGE_WRITETHRU,
.set = "write through",
@@ -450,7 +450,7 @@ static void populate_markers(void)
address_markers[i++].start_address = PHB_IO_END;
address_markers[i++].start_address = IOREMAP_BASE;
address_markers[i++].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
address_markers[i++].start_address = H_VMEMMAP_BASE;
#else
address_markers[i++].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67ec2e927253..655a5a9a183d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -21,6 +21,7 @@
#undef DEBUG
#undef DEBUG_LOW
+#define pr_fmt(fmt) "hash-mmu: " fmt
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/sched/mm.h>
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 588a521966ec..a07722531b32 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,11 +68,11 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#if H_PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
@@ -367,11 +367,20 @@ EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-#ifdef CONFIG_PPC_STD_MMU_64
-static bool disable_radix;
+#ifdef CONFIG_PPC_BOOK3S_64
+static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+
static int __init parse_disable_radix(char *p)
{
- disable_radix = true;
+ bool val;
+
+ if (strlen(p) == 0)
+ val = true;
+ else if (kstrtobool(p, &val))
+ return -EINVAL;
+
+ disable_radix = val;
+
return 0;
}
early_param("disable_radix", parse_disable_radix);
@@ -444,4 +453,4 @@ void __init mmu_early_init_devtree(void)
else
hash__early_init_devtree();
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0f613bc63c50..d60a62bf4fc7 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
struct mm_struct *mm) { }
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline void inc_mm_active_cpus(struct mm_struct *mm)
-{
- atomic_inc(&mm->context.active_cpus);
-}
-#else
-static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
-#endif
-
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 05e15386d4cb..6d724dab27c2 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -216,19 +216,34 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
+ if (radix_enabled())
+ WARN_ON(process_tb[mm->context.id].prtb0 != 0);
+ else
+ subpage_prot_free(mm);
+ destroy_pagetable_page(mm);
+ __destroy_context(mm->context.id);
+ mm->context.id = MMU_NO_CONTEXT;
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
if (radix_enabled()) {
/*
* Radix doesn't have a valid bit in the process table
* entries. However we know that at least P9 implementation
* will avoid caching an entry with an invalid RTS field,
* and 0 is invalid. So this will do.
+ *
+ * This runs before the "fullmm" tlb flush in exit_mmap,
+ * which does a RIC=2 tlbie to clear the process table
+ * entry. See the "fullmm" comments in tlb-radix.c.
+ *
+ * No barrier required here after the store because
+ * this process will do the invalidate, which starts with
+ * ptesync.
*/
process_tb[mm->context.id].prtb0 = 0;
- } else
- subpage_prot_free(mm);
- destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
- mm->context.id = MMU_NO_CONTEXT;
+ }
}
#ifdef CONFIG_PPC_RADIX_MMU
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a51df9ef529d..eb604b3574fa 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1148,11 +1148,33 @@ struct topology_update_data {
int new_nid;
};
+#define TOPOLOGY_DEF_TIMER_SECS 60
+
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled;
static int prrn_enabled;
static void reset_topology_timer(void);
+static int topology_timer_secs = 1;
+static int topology_inited;
+static int topology_update_needed;
+
+/*
+ * Change polling interval for associativity changes.
+ */
+int timed_topology_update(int nsecs)
+{
+ if (vphn_enabled) {
+ if (nsecs > 0)
+ topology_timer_secs = nsecs;
+ else
+ topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+
+ reset_topology_timer();
+ }
+
+ return 0;
+}
/*
* Store the current values of the associativity change counters in the
@@ -1246,6 +1268,11 @@ static long vphn_get_associativity(unsigned long cpu,
"hcall_vphn() experienced a hardware fault "
"preventing VPHN. Disabling polling...\n");
stop_topology_update();
+ break;
+ case H_SUCCESS:
+ dbg("VPHN hcall succeeded. Reset polling...\n");
+ timed_topology_update(0);
+ break;
}
return rc;
@@ -1323,8 +1350,11 @@ int numa_update_cpu_topology(bool cpus_locked)
struct device *dev;
int weight, new_nid, i = 0;
- if (!prrn_enabled && !vphn_enabled)
+ if (!prrn_enabled && !vphn_enabled) {
+ if (!topology_inited)
+ topology_update_needed = 1;
return 0;
+ }
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
@@ -1363,22 +1393,30 @@ int numa_update_cpu_topology(bool cpus_locked)
cpumask_andnot(&cpu_associativity_changes_mask,
&cpu_associativity_changes_mask,
cpu_sibling_mask(cpu));
+ dbg("Assoc chg gives same node %d for cpu%d\n",
+ new_nid, cpu);
cpu = cpu_last_thread_sibling(cpu);
continue;
}
for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
ud = &updates[i++];
+ ud->next = &updates[i];
ud->cpu = sibling;
ud->new_nid = new_nid;
ud->old_nid = numa_cpu_lookup_table[sibling];
cpumask_set_cpu(sibling, &updated_cpus);
- if (i < weight)
- ud->next = &updates[i];
}
cpu = cpu_last_thread_sibling(cpu);
}
+ /*
+ * Prevent processing of 'updates' from overflowing array
+ * where last entry filled in a 'next' pointer.
+ */
+ if (i)
+ updates[i-1].next = NULL;
+
pr_debug("Topology update for the following CPUs:\n");
if (cpumask_weight(&updated_cpus)) {
for (ud = &updates[0]; ud; ud = ud->next) {
@@ -1433,6 +1471,7 @@ int numa_update_cpu_topology(bool cpus_locked)
out:
kfree(updates);
+ topology_update_needed = 0;
return changed;
}
@@ -1468,7 +1507,7 @@ static struct timer_list topology_timer =
static void reset_topology_timer(void)
{
topology_timer.data = 0;
- topology_timer.expires = jiffies + 60 * HZ;
+ topology_timer.expires = jiffies + topology_timer_secs * HZ;
mod_timer(&topology_timer, topology_timer.expires);
}
@@ -1518,15 +1557,14 @@ int start_topology_update(void)
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
- vphn_enabled = 0;
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_register(&dt_update_nb);
#endif
}
- } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ }
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
lppaca_shared_proc(get_lppaca())) {
if (!vphn_enabled) {
- prrn_enabled = 0;
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
init_timer_deferrable(&topology_timer);
@@ -1549,7 +1587,8 @@ int stop_topology_update(void)
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_unregister(&dt_update_nb);
#endif
- } else if (vphn_enabled) {
+ }
+ if (vphn_enabled) {
vphn_enabled = 0;
rc = del_timer_sync(&topology_timer);
}
@@ -1612,9 +1651,17 @@ static int topology_update_init(void)
if (topology_updates_enabled)
start_topology_update();
+ if (vphn_enabled)
+ topology_schedule_update();
+
if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
return -ENOMEM;
+ topology_inited = 1;
+ if (topology_update_needed)
+ bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
+ nr_cpumask_bits);
+
return 0;
}
device_initcall(topology_update_init);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index ac0717a90ca6..2851282edcd4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,7 +57,7 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 906a86fe457b..ed60ad861dfa 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -309,10 +309,6 @@ slb_compare_rr_to_size:
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
rldimi r10,r9,ESID_BITS_1T,0
ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
- /*
- * bits above VSID_BITS_1T need to be ignored from r10
- * also combine VSID and flags
- */
li r10,MMU_SEGSIZE_1T
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index d304028641a2..63e277b6e60c 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -144,7 +144,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
#ifndef CONFIG_SMP
-static void radix__local_flush_all_mm(struct mm_struct *mm)
+void radix__local_flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
@@ -154,6 +154,7 @@ static void radix__local_flush_all_mm(struct mm_struct *mm)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
}
+EXPORT_SYMBOL(radix__local_flush_all_mm);
#endif /* CONFIG_SMP */
void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
@@ -163,7 +164,7 @@ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmadd
unsigned long ap = mmu_get_ap(psize);
preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
preempt_enable();
@@ -173,11 +174,10 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
{
#ifdef CONFIG_HUGETLB_PAGE
/* need the return fix for nohash.c */
- if (vma && is_vm_hugetlb_page(vma))
- return __local_flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__local_flush_hugetlb_page(vma, vmaddr);
#endif
- radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
@@ -186,36 +186,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_pid(pid, RIC_FLUSH_TLB);
else
_tlbiel_pid(pid, RIC_FLUSH_TLB);
-no_context:
preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
-static void radix__flush_all_mm(struct mm_struct *mm)
+void radix__flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbiel_pid(pid, RIC_FLUSH_ALL);
-no_context:
preempt_enable();
}
+EXPORT_SYMBOL(radix__flush_all_mm);
void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
{
@@ -229,26 +228,25 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
unsigned long pid;
unsigned long ap = mmu_get_ap(psize);
- preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto bail;
+ return;
+
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
else
_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
-bail:
preempt_enable();
}
void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
#ifdef CONFIG_HUGETLB_PAGE
- if (vma && is_vm_hugetlb_page(vma))
- return flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_page(vma, vmaddr);
#endif
- radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__flush_tlb_page);
@@ -300,10 +298,14 @@ void radix__tlb_flush(struct mmu_gather *tlb)
psize = radix_get_mmu_psize(page_size);
/*
* if page size is not something we understand, do a full mm flush
+ *
+ * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
+ * that flushes the process table entry cache upon process teardown.
+ * See the comment for radix in arch_exit_mmap().
*/
if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
- else if (tlb->need_flush_all) {
+ else if (tlb->fullmm || tlb->need_flush_all) {
tlb->need_flush_all = 0;
radix__flush_all_mm(mm);
} else
@@ -322,55 +324,53 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
{
unsigned long pid;
unsigned long addr;
- int local = mm_is_thread_local(mm);
+ bool local;
unsigned long ap = mmu_get_ap(psize);
unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
-
- preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto err_out;
+ return;
+ preempt_disable();
+ local = mm_is_thread_local(mm);
if (end == TLB_FLUSH_ALL ||
(end - start) > tlb_single_page_flush_ceiling * page_size) {
if (local)
_tlbiel_pid(pid, RIC_FLUSH_TLB);
else
_tlbie_pid(pid, RIC_FLUSH_TLB);
- goto err_out;
- }
- for (addr = start; addr < end; addr += page_size) {
+ } else {
+ for (addr = start; addr < end; addr += page_size) {
- if (local)
- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
- else
- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ if (local)
+ _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ else
+ _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ }
}
-err_out:
preempt_enable();
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
{
- int local = mm_is_thread_local(mm);
unsigned long ap = mmu_get_ap(mmu_virtual_psize);
unsigned long pid, end;
+ bool local;
-
- pid = mm ? mm->context.id : 0;
- preempt_disable();
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
/* 4k page size, just blow the world */
if (PAGE_SIZE == 0x1000) {
radix__flush_all_mm(mm);
- preempt_enable();
return;
}
+ preempt_disable();
+ local = mm_is_thread_local(mm);
/* Otherwise first do the PWC */
if (local)
_tlbiel_pid(pid, RIC_FLUSH_PWC);
@@ -385,7 +385,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
else
_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-no_context:
+
preempt_enable();
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 62fa7589db2b..8bdef7ed28a8 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -23,7 +23,7 @@
* [ nv gpr save area ] 8*8 |
* [ tail_call_cnt ] 8 |
* [ local_tmp_var ] 8 |
- * fp (r31) --> [ ebpf stack space ] 512 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
* [ frame header ] 32/112 |
* sp (r1) ---> [ stack pointer ] --------------
*/
@@ -32,8 +32,8 @@
#define BPF_PPC_STACK_SAVE (8*8)
/* for bpf JIT code internal usage */
#define BPF_PPC_STACK_LOCALS 16
-/* Ensure this is quadword aligned */
-#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + MAX_BPF_STACK + \
+/* stack frame excluding BPF stack, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
#ifndef __ASSEMBLY__
@@ -103,6 +103,7 @@ struct codegen_context {
*/
unsigned int seen;
unsigned int idx;
+ unsigned int stack_size;
};
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index a66e64b0b251..46d74e81aff1 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -69,7 +69,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
static int bpf_jit_stack_local(struct codegen_context *ctx)
{
if (bpf_has_stack_frame(ctx))
- return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK;
+ return STACK_FRAME_MIN_SIZE + ctx->stack_size;
else
return -(BPF_PPC_STACK_SAVE + 16);
}
@@ -82,8 +82,9 @@ static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
{
if (reg >= BPF_PPC_NVR_MIN && reg < 32)
- return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0)
- - (8 * (32 - reg));
+ return (bpf_has_stack_frame(ctx) ?
+ (BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
+ - (8 * (32 - reg));
pr_err("BPF JIT is asking about unknown registers");
BUG();
@@ -134,7 +135,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
}
- PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
+ PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
}
/*
@@ -161,7 +162,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, BPF_REG_FP))
PPC_ADDI(b2p[BPF_REG_FP], 1,
- STACK_FRAME_MIN_SIZE + MAX_BPF_STACK);
+ STACK_FRAME_MIN_SIZE + ctx->stack_size);
}
static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -183,7 +184,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
/* Tear down our stack frame */
if (bpf_has_stack_frame(ctx)) {
- PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
+ PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
if (ctx->seen & SEEN_FUNC) {
PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
PPC_MTLR(0);
@@ -1013,6 +1014,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
memset(&cgctx, 0, sizeof(struct codegen_context));
+ /* Make sure that the stack is quadword aligned. */
+ cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
+
/* Scouting faux-generate pass 0 */
if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
/* We hit something illegal or unsupported. */
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index c82497a31c54..264b6ab11978 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -555,9 +555,7 @@ static void cell_virtual_cntr(unsigned long data)
static void start_virt_cntrs(void)
{
- init_timer(&timer_virt_cntr);
- timer_virt_cntr.function = cell_virtual_cntr;
- timer_virt_cntr.data = 0UL;
+ setup_timer(&timer_virt_cntr, cell_virtual_cntr, 0UL);
timer_virt_cntr.expires = jiffies + HZ / 10;
add_timer(&timer_virt_cntr);
}
@@ -679,9 +677,7 @@ static void spu_evnt_swap(unsigned long data)
static void start_spu_event_swap(void)
{
- init_timer(&timer_spu_event_swap);
- timer_spu_event_swap.function = spu_evnt_swap;
- timer_spu_event_swap.data = 0UL;
+ setup_timer(&timer_spu_event_swap, spu_evnt_swap, 0UL);
timer_spu_event_swap.expires = jiffies + HZ / 25;
add_timer(&timer_spu_event_swap);
}
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9c88b82f6229..72238eedc360 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -540,7 +540,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
{
if (s1 < s2)
return 1;
- if (s2 > s1)
+ if (s1 > s2)
return -1;
return memcmp(d1, d2, s1);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 13663efc1d31..596bd9091478 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -294,10 +294,6 @@ config PPC_STD_MMU_32
def_bool y
depends on PPC_STD_MMU && PPC32
-config PPC_STD_MMU_64
- def_bool y
- depends on PPC_STD_MMU && PPC64
-
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
@@ -308,6 +304,19 @@ config PPC_RADIX_MMU
is only implemented by IBM Power9 CPUs, if you don't have one of them
you can probably disable this.
+config PPC_RADIX_MMU_DEFAULT
+ bool "Default to using the Radix MMU when possible"
+ depends on PPC_RADIX_MMU
+ default y
+ help
+ When the hardware supports the Radix MMU, default to using it unless
+ "disable_radix[=yes]" is specified on the kernel command line.
+
+ If this option is disabled, the Hash MMU will be used by default,
+ unless "disable_radix=no" is specified on the kernel command line.
+
+ If you're unsure, say Y.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -323,7 +332,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
- default y if PPC_STD_MMU_64
+ default y if PPC_BOOK3S_64
default n
config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 70183eb3d5c8..39a1d4225e0f 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -513,9 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
mutex_init(&host->mutex);
init_completion(&host->complete);
spin_lock_init(&host->lock);
- init_timer(&host->timeout_timer);
- host->timeout_timer.function = kw_i2c_timeout;
- host->timeout_timer.data = (unsigned long)host;
+ setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host);
psteps = of_get_property(np, "AAPL,address-step", NULL);
steps = psteps ? (*psteps) : 0x10;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8864065eba22..4650fb294e7a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -41,7 +41,6 @@
#include "powernv.h"
#include "pci.h"
-static bool pnv_eeh_nb_init = false;
static int eeh_event_irq = -EINVAL;
static int pnv_eeh_init(void)
@@ -197,31 +196,31 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
* been built. If the I/O cache staff has been built, EEH is
* ready to supply service.
*/
-static int pnv_eeh_post_init(void)
+int pnv_eeh_post_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
int ret = 0;
- /* Register OPAL event notifier */
- if (!pnv_eeh_nb_init) {
- eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
- if (eeh_event_irq < 0) {
- pr_err("%s: Can't register OPAL event interrupt (%d)\n",
- __func__, eeh_event_irq);
- return eeh_event_irq;
- }
+ /* Probe devices & build address cache */
+ eeh_probe_devices();
+ eeh_addr_cache_build();
- ret = request_irq(eeh_event_irq, pnv_eeh_event,
- IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
- if (ret < 0) {
- irq_dispose_mapping(eeh_event_irq);
- pr_err("%s: Can't request OPAL event interrupt (%d)\n",
- __func__, eeh_event_irq);
- return ret;
- }
+ /* Register OPAL event notifier */
+ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+ if (eeh_event_irq < 0) {
+ pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return eeh_event_irq;
+ }
- pnv_eeh_nb_init = true;
+ ret = request_irq(eeh_event_irq, pnv_eeh_event,
+ IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+ if (ret < 0) {
+ irq_dispose_mapping(eeh_event_irq);
+ pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return ret;
}
if (!eeh_enabled())
@@ -367,6 +366,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ /* Skip if we haven't probed yet */
+ if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE)
+ return NULL;
+
/* Initialize eeh device */
edev->class_code = pdn->class_code;
edev->mode &= 0xFFFFFF00;
@@ -1731,7 +1734,6 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
static struct eeh_ops pnv_eeh_ops = {
.name = "powernv",
.init = pnv_eeh_init,
- .post_init = pnv_eeh_post_init,
.probe = pnv_eeh_probe,
.set_option = pnv_eeh_set_option,
.get_pe_addr = pnv_eeh_get_pe_addr,
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index cf33769a7b72..18a355fa15e8 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -1,7 +1,7 @@
/*
* PowerNV OPAL asynchronous completion interfaces
*
- * Copyright 2013 IBM Corp.
+ * Copyright 2013-2017 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -23,40 +23,50 @@
#include <asm/machdep.h>
#include <asm/opal.h>
-#define N_ASYNC_COMPLETIONS 64
+enum opal_async_token_state {
+ ASYNC_TOKEN_UNALLOCATED = 0,
+ ASYNC_TOKEN_ALLOCATED,
+ ASYNC_TOKEN_DISPATCHED,
+ ASYNC_TOKEN_ABANDONED,
+ ASYNC_TOKEN_COMPLETED
+};
+
+struct opal_async_token {
+ enum opal_async_token_state state;
+ struct opal_msg response;
+};
-static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
-static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
static DEFINE_SPINLOCK(opal_async_comp_lock);
static struct semaphore opal_async_sem;
-static struct opal_msg *opal_async_responses;
static unsigned int opal_max_async_tokens;
+static struct opal_async_token *opal_async_tokens;
-int __opal_async_get_token(void)
+static int __opal_async_get_token(void)
{
unsigned long flags;
- int token;
+ int i, token = -EBUSY;
spin_lock_irqsave(&opal_async_comp_lock, flags);
- token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
- if (token >= opal_max_async_tokens) {
- token = -EBUSY;
- goto out;
- }
- if (__test_and_set_bit(token, opal_async_token_map)) {
- token = -EBUSY;
- goto out;
+ for (i = 0; i < opal_max_async_tokens; i++) {
+ if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
+ opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
+ token = i;
+ break;
+ }
}
- __clear_bit(token, opal_async_complete_map);
-
-out:
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
return token;
}
+/*
+ * Note: If the returned token is used in an opal call and opal returns
+ * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
+ * opal_async_wait_response_interruptible() at least once before calling another
+ * opal_async_* function
+ */
int opal_async_get_token_interruptible(void)
{
int token;
@@ -73,9 +83,10 @@ int opal_async_get_token_interruptible(void)
}
EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
-int __opal_async_release_token(int token)
+static int __opal_async_release_token(int token)
{
unsigned long flags;
+ int rc;
if (token < 0 || token >= opal_max_async_tokens) {
pr_err("%s: Passed token is out of range, token %d\n",
@@ -84,11 +95,26 @@ int __opal_async_release_token(int token)
}
spin_lock_irqsave(&opal_async_comp_lock, flags);
- __set_bit(token, opal_async_complete_map);
- __clear_bit(token, opal_async_token_map);
+ switch (opal_async_tokens[token].state) {
+ case ASYNC_TOKEN_COMPLETED:
+ case ASYNC_TOKEN_ALLOCATED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
+ rc = 0;
+ break;
+ /*
+ * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
+ * Mark a DISPATCHED token as ABANDONED so that the response handling
+ * code knows no one cares and that it can free it then.
+ */
+ case ASYNC_TOKEN_DISPATCHED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
+ /* Fall through */
+ default:
+ rc = 1;
+ }
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
- return 0;
+ return rc;
}
int opal_async_release_token(int token)
@@ -96,12 +122,10 @@ int opal_async_release_token(int token)
int ret;
ret = __opal_async_release_token(token);
- if (ret)
- return ret;
-
- up(&opal_async_sem);
+ if (!ret)
+ up(&opal_async_sem);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(opal_async_release_token);
@@ -117,22 +141,83 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
return -EINVAL;
}
- /* Wakeup the poller before we wait for events to speed things
+ /*
+ * There is no need to mark the token as dispatched, wait_event()
+ * will block until the token completes.
+ *
+ * Wakeup the poller before we wait for events to speed things
* up on platforms or simulators where the interrupts aren't
* functional.
*/
opal_wake_poller();
- wait_event(opal_async_wait, test_bit(token, opal_async_complete_map));
- memcpy(msg, &opal_async_responses[token], sizeof(*msg));
+ wait_event(opal_async_wait, opal_async_tokens[token].state
+ == ASYNC_TOKEN_COMPLETED);
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
return 0;
}
EXPORT_SYMBOL_GPL(opal_async_wait_response);
+int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
+{
+ unsigned long flags;
+ int ret;
+
+ if (token >= opal_max_async_tokens) {
+ pr_err("%s: Invalid token passed\n", __func__);
+ return -EINVAL;
+ }
+
+ if (!msg) {
+ pr_err("%s: Invalid message pointer passed\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * The first time this gets called we mark the token as DISPATCHED
+ * so that if wait_event_interruptible() returns not zero and the
+ * caller frees the token, we know not to actually free the token
+ * until the response comes.
+ *
+ * Only change if the token is ALLOCATED - it may have been
+ * completed even before the caller gets around to calling this
+ * the first time.
+ *
+ * There is also a dirty great comment at the token allocation
+ * function that if the opal call returns OPAL_ASYNC_COMPLETION to
+ * the caller then the caller *must* call this or the not
+ * interruptible version before doing anything else with the
+ * token.
+ */
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
+ opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ }
+
+ /*
+ * Wakeup the poller before we wait for events to speed things
+ * up on platforms or simulators where the interrupts aren't
+ * functional.
+ */
+ opal_wake_poller();
+ ret = wait_event_interruptible(opal_async_wait,
+ opal_async_tokens[token].state ==
+ ASYNC_TOKEN_COMPLETED);
+ if (!ret)
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
+
+/* Called from interrupt context */
static int opal_async_comp_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
struct opal_msg *comp_msg = msg;
+ enum opal_async_token_state state;
unsigned long flags;
uint64_t token;
@@ -140,11 +225,17 @@ static int opal_async_comp_event(struct notifier_block *nb,
return 0;
token = be64_to_cpu(comp_msg->params[0]);
- memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
spin_lock_irqsave(&opal_async_comp_lock, flags);
- __set_bit(token, opal_async_complete_map);
+ state = opal_async_tokens[token].state;
+ opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ if (state == ASYNC_TOKEN_ABANDONED) {
+ /* Free the token, no one else will */
+ opal_async_release_token(token);
+ return 0;
+ }
+ memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
wake_up(&opal_async_wait);
return 0;
@@ -178,32 +269,23 @@ int __init opal_async_comp_init(void)
}
opal_max_async_tokens = be32_to_cpup(async);
- if (opal_max_async_tokens > N_ASYNC_COMPLETIONS)
- opal_max_async_tokens = N_ASYNC_COMPLETIONS;
+ opal_async_tokens = kcalloc(opal_max_async_tokens,
+ sizeof(*opal_async_tokens), GFP_KERNEL);
+ if (!opal_async_tokens) {
+ err = -ENOMEM;
+ goto out_opal_node;
+ }
err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
&opal_async_comp_nb);
if (err) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, err);
+ kfree(opal_async_tokens);
goto out_opal_node;
}
- opal_async_responses = kzalloc(
- sizeof(*opal_async_responses) * opal_max_async_tokens,
- GFP_KERNEL);
- if (!opal_async_responses) {
- pr_err("%s: Out of memory, failed to do asynchronous "
- "completion init\n", __func__);
- err = -ENOMEM;
- goto out_opal_node;
- }
-
- /* Initialize to 1 less than the maximum tokens available, as we may
- * require to pop one during emergency through synchronous call to
- * __opal_async_get_token()
- */
- sema_init(&opal_async_sem, opal_max_async_tokens - 1);
+ sema_init(&opal_async_sem, opal_max_async_tokens);
out_opal_node:
of_node_put(opal_node);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d78fed728cdf..c9e1a4ff295c 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -1,5 +1,5 @@
/*
- * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
+ * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index ecdcba9d1220..9d1b8c0aaf93 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -174,8 +174,14 @@ void opal_event_shutdown(void)
/* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
- if (opal_irqs[i])
+ if (!opal_irqs[i])
+ continue;
+
+ if (in_interrupt())
+ disable_irq_nosync(opal_irqs[i]);
+ else
free_irq(opal_irqs[i], NULL);
+
opal_irqs[i] = 0;
}
}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 4495f428b500..d9916ea62305 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -1,5 +1,5 @@
/*
- * OPAL asynchronus Memory error handling support in PowreNV.
+ * OPAL asynchronus Memory error handling support in PowerNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index aa267f120033..0a7074bb91dc 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -19,13 +19,10 @@
*/
#include <linux/delay.h>
-#include <linux/mutex.h>
#include <linux/of_platform.h>
#include <asm/opal.h>
#include <asm/machdep.h>
-static DEFINE_MUTEX(opal_sensor_mutex);
-
/*
* This will return sensor information to driver based on the requested sensor
* handle. A handle is an opaque id for the powernv, read by the driver from the
@@ -38,13 +35,9 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
__be32 data;
token = opal_async_get_token_interruptible();
- if (token < 0) {
- pr_err("%s: Couldn't get the token, returning\n", __func__);
- ret = token;
- goto out;
- }
+ if (token < 0)
+ return token;
- mutex_lock(&opal_sensor_mutex);
ret = opal_sensor_read(sensor_hndl, token, &data);
switch (ret) {
case OPAL_ASYNC_COMPLETION:
@@ -52,7 +45,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
if (ret) {
pr_err("%s: Failed to wait for the async response, %d\n",
__func__, ret);
- goto out_token;
+ goto out;
}
ret = opal_error_code(opal_get_async_rc(msg));
@@ -73,10 +66,8 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
break;
}
-out_token:
- mutex_unlock(&opal_sensor_mutex);
- opal_async_release_token(token);
out:
+ opal_async_release_token(token);
return ret;
}
EXPORT_SYMBOL_GPL(opal_get_sensor_data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8c1ede2d3f7e..6f4b00a2ac46 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -94,7 +94,7 @@ opal_return:
* bytes (always BE) since MSR:LE will end up fixed up as a side
* effect of the rfid.
*/
- FIXUP_ENDIAN
+ FIXUP_ENDIAN_HV
ld r2,PACATOC(r13);
lwz r4,8(r1);
ld r5,PPC_LR_STKOFF(r1);
@@ -120,7 +120,7 @@ opal_real_call:
hrfid
opal_return_realmode:
- FIXUP_ENDIAN
+ FIXUP_ENDIAN_HV
ld r2,PACATOC(r13);
lwz r11,8(r1);
ld r12,PPC_LR_STKOFF(r1)
@@ -307,6 +307,7 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 65c79ecf5a4d..041ddbd1fc57 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -998,6 +998,7 @@ int opal_error_code(int rc)
case OPAL_PARAMETER: return -EINVAL;
case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
+ case OPAL_BUSY:
case OPAL_BUSY_EVENT: return -EBUSY;
case OPAL_NO_MEM: return -ENOMEM;
case OPAL_PERMISSION: return -EPERM;
@@ -1037,3 +1038,4 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
/* Export this for KVM */
EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
EXPORT_SYMBOL_GPL(opal_int_eoi);
+EXPORT_SYMBOL_GPL(opal_error_code);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57f9e55f4352..749055553064 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1002,9 +1002,12 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
}
/*
- * After doing so, there would be a "hole" in the /proc/iomem when
- * offset is a positive value. It looks like the device return some
- * mmio back to the system, which actually no one could use it.
+ * Since M64 BAR shares segments among all possible 256 PEs,
+ * we have to shift the beginning of PF IOV BAR to make it start from
+ * the segment which belongs to the PE number assigned to the first VF.
+ * This creates a "hole" in the /proc/iomem which could be used for
+ * allocating other resources so we reserve this area below and
+ * release when IOV is released.
*/
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
@@ -1018,7 +1021,22 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
i, &res2, res, (offset > 0) ? "En" : "Dis",
num_vfs, offset);
+
+ if (offset < 0) {
+ devm_release_resource(&dev->dev, &pdn->holes[i]);
+ memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
+ }
+
pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+ if (offset > 0) {
+ pdn->holes[i].start = res2.start;
+ pdn->holes[i].end = res2.start + size * offset - 1;
+ pdn->holes[i].flags = IORESOURCE_BUS;
+ pdn->holes[i].name = "pnv_iov_reserved";
+ devm_request_resource(&dev->dev, res->parent,
+ &pdn->holes[i]);
+ }
}
return 0;
}
@@ -2779,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
return -EINVAL;
- if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
+ if (!is_power_of_2(window_size))
return -EINVAL;
/* Adjust direct table size from window_size and levels */
@@ -3293,8 +3311,7 @@ static void pnv_pci_ioda_fixup(void)
pnv_pci_ioda_create_dbgfs();
#ifdef CONFIG_EEH
- eeh_init();
- eeh_addr_cache_build();
+ pnv_eeh_post_init();
#endif
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index a95273c524f6..56d1f272d4ad 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -234,6 +234,7 @@ extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+extern int pnv_eeh_post_init(void);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index bbb73aa0eb8f..1edfbc1e40f4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,6 +36,7 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/tm.h>
#include "powernv.h"
@@ -290,6 +291,7 @@ static void __init pnv_setup_machdep_opal(void)
ppc_md.restart = pnv_restart;
pm_power_off = pnv_power_off;
ppc_md.halt = pnv_halt;
+ /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
ppc_md.hmi_exception_early = opal_hmi_exception_early;
@@ -311,6 +313,28 @@ static int __init pnv_probe(void)
return 1;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void __init pnv_tm_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_OPAL) ||
+ !pvr_version_is(PVR_POWER9) ||
+ early_cpu_has_feature(CPU_FTR_TM))
+ return;
+
+ if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
+ return;
+
+ pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
+ cur_cpu_spec->cpu_features |= CPU_FTR_TM;
+ /* Make sure "normal" HTM is off (it should be) */
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
+ /* Turn on no suspend mode, and HTM no SC */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
+ PPC_FEATURE2_HTM_NOSC;
+ tm_suspend_disabled = true;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
/*
* Returns the cpu frequency for 'cpu' in Hz. This is used by
* /proc/cpuinfo
@@ -319,7 +343,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
{
unsigned long ret_freq;
- ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+ ret_freq = cpufreq_get(cpu) * 1000ul;
/*
* If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index c17f81e433f7..ba030669eca1 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -49,6 +49,13 @@
static void pnv_smp_setup_cpu(int cpu)
{
+ /*
+ * P9 workaround for CI vector load (see traps.c),
+ * enable the corresponding HMI interrupt
+ */
+ if (pvr_version_is(PVR_POWER9))
+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
+
if (xive_enabled())
xive_smp_setup_cpu();
else if (cpu != boot_cpuid)
@@ -290,6 +297,54 @@ static void __init pnv_smp_probe(void)
}
}
+static int pnv_system_reset_exception(struct pt_regs *regs)
+{
+ if (smp_handle_nmi_ipi(regs))
+ return 1;
+ return 0;
+}
+
+static int pnv_cause_nmi_ipi(int cpu)
+{
+ int64_t rc;
+
+ if (cpu >= 0) {
+ rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
+ if (rc != OPAL_SUCCESS)
+ return 0;
+ return 1;
+
+ } else if (cpu == NMI_IPI_ALL_OTHERS) {
+ bool success = true;
+ int c;
+
+
+ /*
+ * We do not use broadcasts (yet), because it's not clear
+ * exactly what semantics Linux wants or the firmware should
+ * provide.
+ */
+ for_each_online_cpu(c) {
+ if (c == smp_processor_id())
+ continue;
+
+ rc = opal_signal_system_reset(
+ get_hard_smp_processor_id(c));
+ if (rc != OPAL_SUCCESS)
+ success = false;
+ }
+ if (success)
+ return 1;
+
+ /*
+ * Caller will fall back to doorbells, which may pick
+ * up the remainders.
+ */
+ }
+
+ return 0;
+}
+
static struct smp_ops_t pnv_smp_ops = {
.message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
.cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
@@ -308,6 +363,10 @@ static struct smp_ops_t pnv_smp_ops = {
/* This is called very early during platform setup_arch */
void __init pnv_smp_init(void)
{
+ if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
+ ppc_md.system_reset_exception = pnv_system_reset_exception;
+ pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
+ }
smp_ops = &pnv_smp_ops;
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fadb95efbb9e..a7d14aa7bb7c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -363,6 +363,7 @@ static int dlpar_online_cpu(struct device_node *dn)
BUG_ON(get_cpu_current_state(cpu)
!= CPU_STATE_OFFLINE);
cpu_maps_update_done();
+ timed_topology_update(1);
rc = device_online(get_cpu_device(cpu));
if (rc)
goto out;
@@ -533,6 +534,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
set_preferred_offline_state(cpu,
CPU_STATE_OFFLINE);
cpu_maps_update_done();
+ timed_topology_update(1);
rc = device_offline(get_cpu_device(cpu));
if (rc)
goto out;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7c181467d0ad..69921f72e2da 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -55,23 +55,23 @@
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
- struct iommu_table_group *table_group = NULL;
- struct iommu_table *tbl = NULL;
- struct iommu_table_group_link *tgl = NULL;
+ struct iommu_table_group *table_group;
+ struct iommu_table *tbl;
+ struct iommu_table_group_link *tgl;
table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
node);
if (!table_group)
- goto fail_exit;
+ return NULL;
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
if (!tbl)
- goto fail_exit;
+ goto free_group;
tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
node);
if (!tgl)
- goto fail_exit;
+ goto free_table;
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
kref_init(&tbl->it_kref);
@@ -82,11 +82,10 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
return table_group;
-fail_exit:
- kfree(tgl);
- kfree(table_group);
+free_table:
kfree(tbl);
-
+free_group:
+ kfree(table_group);
return NULL;
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 495ba4e7336d..0ee4a469a4ae 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -93,7 +93,7 @@ void vpa_init(int cpu)
return;
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
@@ -106,7 +106,7 @@ void vpa_init(int cpu)
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
/*
* Register dispatch trace log, if one has been allocated.
@@ -129,7 +129,7 @@ void vpa_init(int cpu)
}
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
@@ -824,7 +824,7 @@ void arch_free_page(struct page *page, int order)
EXPORT_SYMBOL(arch_free_page);
#endif /* CONFIG_PPC_SMLPAR */
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_TRACEPOINTS
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 779fc2a1c8f7..b2706c483067 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -485,7 +485,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
parse_em_data(m);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 12277bc9fd9e..d86938260a86 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1592,6 +1592,8 @@ ATTRIBUTE_GROUPS(vio_dev);
void vio_unregister_device(struct vio_dev *viodev)
{
device_unregister(&viodev->dev);
+ if (viodev->family == VDEVICE)
+ irq_dispose_mapping(viodev->irq);
}
EXPORT_SYMBOL(vio_unregister_device);
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index c60e84e4558d..1b307c80b401 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -184,7 +184,7 @@ static int axon_ram_probe(struct platform_device *device)
static int axon_ram_bank_id = -1;
struct axon_ram_bank *bank;
struct resource resource;
- int rc = 0;
+ int rc;
axon_ram_bank_id++;
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 16f1edd78c40..535cf1f6941c 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -846,12 +846,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
u32 ipic_get_mcp_status(void)
{
- return ipic_read(primary_ipic->regs, IPIC_SERMR);
+ return ipic_read(primary_ipic->regs, IPIC_SERSR);
}
void ipic_clear_mcp_status(u32 mask)
{
- ipic_write(primary_ipic->regs, IPIC_SERMR, mask);
+ ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
}
/* Return an interrupt vector or 0 if no interrupt is pending. */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33351c6704b1..1b2d8cb49abb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -28,6 +28,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/ctype.h>
+#include <linux/highmem.h>
#include <asm/debugfs.h>
#include <asm/ptrace.h>
@@ -127,6 +128,7 @@ static void byterev(unsigned char *, int);
static void memex(void);
static int bsesc(void);
static void dump(void);
+static void show_pte(unsigned long);
static void prdump(unsigned long, long);
static int ppc_inst_dump(unsigned long, long, int);
static void dump_log_buf(void);
@@ -234,6 +236,7 @@ Commands:\n\
#endif
"\
dr dump stream of raw bytes\n\
+ dv dump virtual address translation \n\
dt dump the tracing buffers (uses printk)\n\
dtc dump the tracing buffers for current CPU (uses printk)\n\
"
@@ -278,6 +281,7 @@ Commands:\n\
#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E)
" u dump TLB\n"
#endif
+" U show uptime information\n"
" ? help\n"
" # n limit output to n lines per page (for dp, dpa, dl)\n"
" zr reboot\n\
@@ -530,14 +534,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
waiting:
secondary = 1;
+ spin_begin();
while (secondary && !xmon_gate) {
if (in_xmon == 0) {
- if (fromipi)
+ if (fromipi) {
+ spin_end();
goto leave;
+ }
secondary = test_and_set_bit(0, &in_xmon);
}
- barrier();
+ spin_cpu_relax();
+ touch_nmi_watchdog();
}
+ spin_end();
if (!secondary && !xmon_gate) {
/* we are the first cpu to come in */
@@ -568,21 +577,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
mb();
xmon_gate = 1;
barrier();
+ touch_nmi_watchdog();
}
cmdloop:
while (in_xmon) {
if (secondary) {
+ spin_begin();
if (cpu == xmon_owner) {
if (!test_and_set_bit(0, &xmon_taken)) {
secondary = 0;
+ spin_end();
continue;
}
/* missed it */
while (cpu == xmon_owner)
- barrier();
+ spin_cpu_relax();
}
- barrier();
+ spin_cpu_relax();
+ touch_nmi_watchdog();
} else {
cmd = cmds(regs);
if (cmd != 0) {
@@ -896,6 +909,26 @@ static void remove_cpu_bpts(void)
write_ciabr(0);
}
+/* Based on uptime_proc_show(). */
+static void
+show_uptime(void)
+{
+ struct timespec uptime;
+
+ if (setjmp(bus_error_jmp) == 0) {
+ catch_memory_errors = 1;
+ sync();
+
+ get_monotonic_boottime(&uptime);
+ printf("Uptime: %lu.%.2lu seconds\n", (unsigned long)uptime.tv_sec,
+ ((unsigned long)uptime.tv_nsec / (NSEC_PER_SEC/100)));
+
+ sync();
+ __delay(200); \
+ }
+ catch_memory_errors = 0;
+}
+
static void set_lpp_cmd(void)
{
unsigned long lpp;
@@ -1031,6 +1064,9 @@ cmds(struct pt_regs *excp)
dump_tlb_book3e();
break;
#endif
+ case 'U':
+ show_uptime();
+ break;
default:
printf("Unrecognized command: ");
do {
@@ -2279,7 +2315,7 @@ static void dump_tracing(void)
static void dump_one_paca(int cpu)
{
struct paca_struct *p;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
int i = 0;
#endif
@@ -2320,7 +2356,7 @@ static void dump_one_paca(int cpu)
DUMP(p, hw_cpu_id, "x");
DUMP(p, cpu_start, "x");
DUMP(p, kexec_state, "x");
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
for (i = 0; i < SLB_NUM_BOLTED; i++) {
u64 esid, vsid;
@@ -2351,6 +2387,7 @@ static void dump_one_paca(int cpu)
#endif
DUMP(p, __current, "p");
DUMP(p, kstack, "lx");
+ printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
DUMP(p, stab_rr, "lx");
DUMP(p, saved_r1, "lx");
DUMP(p, trap_save, "x");
@@ -2475,6 +2512,11 @@ static void dump_xives(void)
unsigned long num;
int c;
+ if (!xive_enabled()) {
+ printf("Xive disabled on this system\n");
+ return;
+ }
+
c = inchar();
if (c == 'a') {
dump_all_xives();
@@ -2574,6 +2616,9 @@ dump(void)
dump_log_buf();
} else if (c == 'o') {
dump_opal_msglog();
+ } else if (c == 'v') {
+ /* dump virtual to physical translation */
+ show_pte(adrs);
} else if (c == 'r') {
scanhex(&ndump);
if (ndump == 0)
@@ -2907,6 +2952,116 @@ static void show_task(struct task_struct *tsk)
tsk->comm);
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void format_pte(void *ptep, unsigned long pte)
+{
+ printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
+ printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
+
+ printf("Flags = %s%s%s%s%s\n",
+ (pte & _PAGE_ACCESSED) ? "Accessed " : "",
+ (pte & _PAGE_DIRTY) ? "Dirty " : "",
+ (pte & _PAGE_READ) ? "Read " : "",
+ (pte & _PAGE_WRITE) ? "Write " : "",
+ (pte & _PAGE_EXEC) ? "Exec " : "");
+}
+
+static void show_pte(unsigned long addr)
+{
+ unsigned long tskv = 0;
+ struct task_struct *tsk = NULL;
+ struct mm_struct *mm;
+ pgd_t *pgdp, *pgdir;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ if (!scanhex(&tskv))
+ mm = &init_mm;
+ else
+ tsk = (struct task_struct *)tskv;
+
+ if (tsk == NULL)
+ mm = &init_mm;
+ else
+ mm = tsk->active_mm;
+
+ if (setjmp(bus_error_jmp) != 0) {
+ catch_memory_errors = 0;
+ printf("*** Error dumping pte for task %p\n", tsk);
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ if (mm == &init_mm) {
+ pgdp = pgd_offset_k(addr);
+ pgdir = pgd_offset_k(0);
+ } else {
+ pgdp = pgd_offset(mm, addr);
+ pgdir = pgd_offset(mm, 0);
+ }
+
+ if (pgd_none(*pgdp)) {
+ printf("no linux page table for address\n");
+ return;
+ }
+
+ printf("pgd @ 0x%016lx\n", pgdir);
+
+ if (pgd_huge(*pgdp)) {
+ format_pte(pgdp, pgd_val(*pgdp));
+ return;
+ }
+ printf("pgdp @ 0x%016lx = 0x%016lx\n", pgdp, pgd_val(*pgdp));
+
+ pudp = pud_offset(pgdp, addr);
+
+ if (pud_none(*pudp)) {
+ printf("No valid PUD\n");
+ return;
+ }
+
+ if (pud_huge(*pudp)) {
+ format_pte(pudp, pud_val(*pudp));
+ return;
+ }
+
+ printf("pudp @ 0x%016lx = 0x%016lx\n", pudp, pud_val(*pudp));
+
+ pmdp = pmd_offset(pudp, addr);
+
+ if (pmd_none(*pmdp)) {
+ printf("No valid PMD\n");
+ return;
+ }
+
+ if (pmd_huge(*pmdp)) {
+ format_pte(pmdp, pmd_val(*pmdp));
+ return;
+ }
+ printf("pmdp @ 0x%016lx = 0x%016lx\n", pmdp, pmd_val(*pmdp));
+
+ ptep = pte_offset_map(pmdp, addr);
+ if (pte_none(*ptep)) {
+ printf("no valid PTE\n");
+ return;
+ }
+
+ format_pte(ptep, pte_val(*ptep));
+
+ sync();
+ __delay(200);
+ catch_memory_errors = 0;
+}
+#else
+static void show_pte(unsigned long addr)
+{
+ printf("show_pte not yet implemented\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
static void show_tasks(void)
{
unsigned long tskv;
@@ -3224,7 +3379,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
printf("%s", after);
}
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
void dump_segments(void)
{
int i;
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a0c44d16bf30..7c11bad5cded 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
#include "cxl.h"
@@ -331,9 +332,12 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
/* ensure this mm_struct can't be freed */
cxl_context_mm_count_get(ctx);
- /* decrement the use count */
- if (ctx->mm)
+ if (ctx->mm) {
+ /* decrement the use count from above */
mmput(ctx->mm);
+ /* make TLBIs for this context global */
+ mm_context_add_copro(ctx->mm);
+ }
}
/*
@@ -342,13 +346,19 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
*/
cxl_ctx_get();
+ /* See the comment in afu_ioctl_start_work() */
+ smp_mb();
+
if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
put_pid(ctx->pid);
ctx->pid = NULL;
cxl_adapter_context_put(ctx->afu->adapter);
cxl_ctx_put();
- if (task)
+ if (task) {
cxl_context_mm_count_put(ctx);
+ if (ctx->mm)
+ mm_context_remove_copro(ctx->mm);
+ }
goto out;
}
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 8c32040b9c09..12a41b2753f0 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
#include <asm/cputable.h>
#include <asm/current.h>
#include <asm/copro.h>
@@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)
/* Decrease the mm count on the context */
cxl_context_mm_count_put(ctx);
+ if (ctx->mm)
+ mm_context_remove_copro(ctx->mm);
ctx->mm = NULL;
return 0;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b1afeccbb97f..e46a4062904a 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -100,9 +100,12 @@ static const cxl_p1_reg_t CXL_XSL_FEC = {0x0158};
static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168};
/* PSL registers - CAIA 2 */
static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020};
+static const cxl_p1_reg_t CXL_XSL9_INV = {0x0110};
+static const cxl_p1_reg_t CXL_XSL9_DBG = {0x0130};
+static const cxl_p1_reg_t CXL_XSL9_DEF = {0x0140};
static const cxl_p1_reg_t CXL_XSL9_DSNCTL = {0x0168};
static const cxl_p1_reg_t CXL_PSL9_FIR1 = {0x0300};
-static const cxl_p1_reg_t CXL_PSL9_FIR2 = {0x0308};
+static const cxl_p1_reg_t CXL_PSL9_FIR_MASK = {0x0308};
static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310};
static const cxl_p1_reg_t CXL_PSL9_DEBUG = {0x0320};
static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348};
@@ -112,6 +115,7 @@ static const cxl_p1_reg_t CXL_PSL9_TRACECFG = {0x0368};
static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378};
static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380};
static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388};
+static const cxl_p1_reg_t CXL_PSL9_CTCCFG = {0x0390};
static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398};
static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588};
static const cxl_p1_reg_t CXL_XSL9_ILPP = {0x0590};
@@ -414,6 +418,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
+#define CXL_PSL9_TRACEID_MAX 0xAU
+#define CXL_PSL9_TRACESTATE_FIN 0x3U
+
enum cxl_context_status {
CLOSED,
OPENED,
@@ -938,8 +945,6 @@ int cxl_debugfs_adapter_add(struct cxl *adapter);
void cxl_debugfs_adapter_remove(struct cxl *adapter);
int cxl_debugfs_afu_add(struct cxl_afu *afu);
void cxl_debugfs_afu_remove(struct cxl_afu *afu);
-void cxl_stop_trace_psl9(struct cxl *cxl);
-void cxl_stop_trace_psl8(struct cxl *cxl);
void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir);
void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir);
void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir);
@@ -975,14 +980,6 @@ static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu)
{
}
-static inline void cxl_stop_trace_psl9(struct cxl *cxl)
-{
-}
-
-static inline void cxl_stop_trace_psl8(struct cxl *cxl)
-{
-}
-
static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter,
struct dentry *dir)
{
@@ -1070,7 +1067,8 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx);
-void cxl_native_err_irq_dump_regs(struct cxl *adapter);
+void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter);
+void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter);
int cxl_pci_vphb_add(struct cxl_afu *afu);
void cxl_pci_vphb_remove(struct cxl_afu *afu);
void cxl_release_mapping(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
index eae9d749f967..1643850d2302 100644
--- a/drivers/misc/cxl/debugfs.c
+++ b/drivers/misc/cxl/debugfs.c
@@ -15,28 +15,6 @@
static struct dentry *cxl_debugfs;
-void cxl_stop_trace_psl9(struct cxl *adapter)
-{
- /* Stop the trace */
- cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x4480000000000000ULL);
-}
-
-void cxl_stop_trace_psl8(struct cxl *adapter)
-{
- int slice;
-
- /* Stop the trace */
- cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
-
- /* Stop the slice traces */
- spin_lock(&adapter->afu_list_lock);
- for (slice = 0; slice < adapter->slices; slice++) {
- if (adapter->afu[slice])
- cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, 0x8000000000000000LL);
- }
- spin_unlock(&adapter->afu_list_lock);
-}
-
/* Helpers to export CXL mmaped IO registers via debugfs */
static int debugfs_io_u64_get(void *data, u64 *val)
{
@@ -62,9 +40,14 @@ static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode,
void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir)
{
debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1));
- debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR2));
+ debugfs_create_io_x64("fir_mask", 0400, dir,
+ _cxl_p1_addr(adapter, CXL_PSL9_FIR_MASK));
debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL));
debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG));
+ debugfs_create_io_x64("debug", 0600, dir,
+ _cxl_p1_addr(adapter, CXL_PSL9_DEBUG));
+ debugfs_create_io_x64("xsl-debug", 0600, dir,
+ _cxl_p1_addr(adapter, CXL_XSL9_DBG));
}
void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir)
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index f17f72ea0545..70dbb6de102c 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -220,22 +220,11 @@ static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
{
- u64 crs; /* Translation Checkout Response Status */
-
if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
return true;
- if (cxl_is_power9()) {
- crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK);
- if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
- (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
- (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
- (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
- (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
- (crs == CXL_PSL9_DSISR_An_URTCH)) {
- return true;
- }
- }
+ if (cxl_is_power9())
+ return true;
return false;
}
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 4bfad9f6dc9f..76c0b0ca9388 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
#include <asm/cputable.h>
#include <asm/current.h>
#include <asm/copro.h>
@@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
/* ensure this mm_struct can't be freed */
cxl_context_mm_count_get(ctx);
- /* decrement the use count */
- if (ctx->mm)
+ if (ctx->mm) {
+ /* decrement the use count from above */
mmput(ctx->mm);
+ /* make TLBIs for this context global */
+ mm_context_add_copro(ctx->mm);
+ }
/*
* Increment driver use count. Enables global TLBIs for hash
@@ -230,6 +234,20 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
*/
cxl_ctx_get();
+ /*
+ * A barrier is needed to make sure all TLBIs are global
+ * before we attach and the context starts being used by the
+ * adapter.
+ *
+ * Needed after mm_context_add_copro() for radix and
+ * cxl_ctx_get() for hash/p8.
+ *
+ * The barrier should really be mb(), since it involves a
+ * device. However, it's only useful when we have local
+ * vs. global TLBIs, i.e SMP=y. So keep smp_mb().
+ */
+ smp_mb();
+
trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
@@ -240,6 +258,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
ctx->pid = NULL;
cxl_ctx_put();
cxl_context_mm_count_put(ctx);
+ if (ctx->mm)
+ mm_context_remove_copro(ctx->mm);
goto out;
}
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 4a82c313cf71..02b6b45b4c20 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -897,6 +897,14 @@ int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)
afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+ ctx->elem->software_state = cpu_to_be32(CXL_PE_SOFTWARE_STATE_V);
+ /*
+ * Ideally we should do a wmb() here to make sure the changes to the
+ * PE are visible to the card before we call afu_enable.
+ * On ppc64 though all mmios are preceded by a 'sync' instruction hence
+ * we dont dont need one here.
+ */
+
result = cxl_ops->afu_reset(afu);
if (result)
return result;
@@ -1077,13 +1085,11 @@ static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info)
void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx)
{
- u64 fir1, fir2, serr;
+ u64 fir1, serr;
fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1);
- fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR2);
dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
- dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
if (ctx->afu->adapter->native->sl_ops->register_serr_irq) {
serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
cxl_afu_decode_psl_serr(ctx->afu, serr);
@@ -1257,14 +1263,23 @@ static irqreturn_t native_slice_irq_err(int irq, void *data)
return IRQ_HANDLED;
}
-void cxl_native_err_irq_dump_regs(struct cxl *adapter)
+void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter)
+{
+ u64 fir1;
+
+ fir1 = cxl_p1_read(adapter, CXL_PSL9_FIR1);
+ dev_crit(&adapter->dev, "PSL_FIR: 0x%016llx\n", fir1);
+}
+
+void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter)
{
u64 fir1, fir2;
fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
-
- dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2);
+ dev_crit(&adapter->dev,
+ "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n",
+ fir1, fir2);
}
static irqreturn_t native_irq_err(int irq, void *data)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 3ba04f371380..bb7fd3f4edab 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -401,7 +401,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
*capp_unit_id = get_capp_unit_id(np, *phb_index);
of_node_put(np);
if (!*capp_unit_id) {
- pr_err("cxl: invalid capp unit id\n");
+ pr_err("cxl: invalid capp unit id (phb_index: %d)\n",
+ *phb_index);
return -ENODEV;
}
@@ -475,37 +476,37 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
psl_fircntl |= 0x1ULL; /* ce_thresh */
cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl);
- /* vccredits=0x1 pcklat=0x4 */
- cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0000000000001810ULL);
-
- /*
- * For debugging with trace arrays.
- * Configure RX trace 0 segmented mode.
- * Configure CT trace 0 segmented mode.
- * Configure LA0 trace 0 segmented mode.
- * Configure LA1 trace 0 segmented mode.
+ /* Setup the PSL to transmit packets on the PCIe before the
+ * CAPP is enabled
*/
- cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000000ULL);
- cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000003ULL);
- cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000005ULL);
- cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000006ULL);
+ cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000002A10ULL);
/*
* A response to an ASB_Notify request is returned by the
* system as an MMIO write to the address defined in
- * the PSL_TNR_ADDR register
+ * the PSL_TNR_ADDR register.
+ * keep the Reset Value: 0x00020000E0000000
*/
- /* PSL_TNR_ADDR */
- /* NORST */
- cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL);
+ /* Enable XSL rty limit */
+ cxl_p1_write(adapter, CXL_XSL9_DEF, 0x51F8000000000005ULL);
+
+ /* Change XSL_INV dummy read threshold */
+ cxl_p1_write(adapter, CXL_XSL9_INV, 0x0000040007FFC200ULL);
+
+ if (phb_index == 3) {
+ /* disable machines 31-47 and 20-27 for DMA */
+ cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000FF3FFFF0000ULL);
+ }
- /* allocate the apc machines */
- cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL);
+ /* Snoop machines */
+ cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL);
- /* Disable vc dd1 fix */
- if (cxl_is_power9_dd1())
- cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL);
+ if (cxl_is_power9_dd1()) {
+ /* Disabling deadlock counter CAR */
+ cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
+ } else
+ cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x4000000000000000ULL);
return 0;
}
@@ -1746,6 +1747,44 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
pci_disable_device(pdev);
}
+static void cxl_stop_trace_psl9(struct cxl *adapter)
+{
+ int traceid;
+ u64 trace_state, trace_mask;
+ struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+ /* read each tracearray state and issue mmio to stop them is needed */
+ for (traceid = 0; traceid <= CXL_PSL9_TRACEID_MAX; ++traceid) {
+ trace_state = cxl_p1_read(adapter, CXL_PSL9_CTCCFG);
+ trace_mask = (0x3ULL << (62 - traceid * 2));
+ trace_state = (trace_state & trace_mask) >> (62 - traceid * 2);
+ dev_dbg(&dev->dev, "cxl: Traceid-%d trace_state=0x%0llX\n",
+ traceid, trace_state);
+
+ /* issue mmio if the trace array isn't in FIN state */
+ if (trace_state != CXL_PSL9_TRACESTATE_FIN)
+ cxl_p1_write(adapter, CXL_PSL9_TRACECFG,
+ 0x8400000000000000ULL | traceid);
+ }
+}
+
+static void cxl_stop_trace_psl8(struct cxl *adapter)
+{
+ int slice;
+
+ /* Stop the trace */
+ cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
+
+ /* Stop the slice traces */
+ spin_lock(&adapter->afu_list_lock);
+ for (slice = 0; slice < adapter->slices; slice++) {
+ if (adapter->afu[slice])
+ cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE,
+ 0x8000000000000000LL);
+ }
+ spin_unlock(&adapter->afu_list_lock);
+}
+
static const struct cxl_service_layer_ops psl9_ops = {
.adapter_regs_init = init_implementation_adapter_regs_psl9,
.invalidate_all = cxl_invalidate_all_psl9,
@@ -1762,6 +1801,7 @@ static const struct cxl_service_layer_ops psl9_ops = {
.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
+ .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
.debugfs_stop_trace = cxl_stop_trace_psl9,
.write_timebase_ctrl = write_timebase_ctrl_psl9,
.timebase_read = timebase_read_psl9,
@@ -1785,7 +1825,7 @@ static const struct cxl_service_layer_ops psl8_ops = {
.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8,
.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8,
.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8,
- .err_irq_dump_registers = cxl_native_err_irq_dump_regs,
+ .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl8,
.debugfs_stop_trace = cxl_stop_trace_psl8,
.write_timebase_ctrl = write_timebase_ctrl_psl8,
.timebase_read = timebase_read_psl8,
diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c
index f5396f26ddb4..26f9feaa5d17 100644
--- a/drivers/mtd/devices/powernv_flash.c
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -47,6 +47,11 @@ enum flash_op {
FLASH_OP_ERASE,
};
+/*
+ * Don't return -ERESTARTSYS if we can't get a token, the MTD core
+ * might have split up the call from userspace and called into the
+ * driver more than once, we'll already have done some amount of work.
+ */
static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
loff_t offset, size_t len, size_t *retlen, u_char *buf)
{
@@ -63,7 +68,8 @@ static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
if (token < 0) {
if (token != -ERESTARTSYS)
dev_err(dev, "Failed to get an async token\n");
-
+ else
+ token = -EINTR;
return token;
}
@@ -78,32 +84,53 @@ static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
rc = opal_flash_erase(info->id, offset, len, token);
break;
default:
- BUG_ON(1);
- }
-
- if (rc != OPAL_ASYNC_COMPLETION) {
- dev_err(dev, "opal_flash_async_op(op=%d) failed (rc %d)\n",
- op, rc);
+ WARN_ON_ONCE(1);
opal_async_release_token(token);
return -EIO;
}
- rc = opal_async_wait_response(token, &msg);
- opal_async_release_token(token);
- if (rc) {
- dev_err(dev, "opal async wait failed (rc %d)\n", rc);
- return -EIO;
+ if (rc == OPAL_ASYNC_COMPLETION) {
+ rc = opal_async_wait_response_interruptible(token, &msg);
+ if (rc) {
+ /*
+ * If we return the mtd core will free the
+ * buffer we've just passed to OPAL but OPAL
+ * will continue to read or write from that
+ * memory.
+ * It may be tempting to ultimately return 0
+ * if we're doing a read or a write since we
+ * are going to end up waiting until OPAL is
+ * done. However, because the MTD core sends
+ * us the userspace request in chunks, we need
+ * it to know we've been interrupted.
+ */
+ rc = -EINTR;
+ if (opal_async_wait_response(token, &msg))
+ dev_err(dev, "opal_async_wait_response() failed\n");
+ goto out;
+ }
+ rc = opal_get_async_rc(msg);
}
- rc = opal_get_async_rc(msg);
- if (rc == OPAL_SUCCESS) {
- rc = 0;
- if (retlen)
- *retlen = len;
- } else {
- rc = -EIO;
- }
+ /*
+ * OPAL does mutual exclusion on the flash, it will return
+ * OPAL_BUSY.
+ * During firmware updates by the service processor OPAL may
+ * be (temporarily) prevented from accessing the flash, in
+ * this case OPAL will also return OPAL_BUSY.
+ * Both cases aren't errors exactly but the flash could have
+ * changed, userspace should be informed.
+ */
+ if (rc != OPAL_SUCCESS && rc != OPAL_BUSY)
+ dev_err(dev, "opal_flash_async_op(op=%d) failed (rc %d)\n",
+ op, rc);
+
+ if (rc == OPAL_SUCCESS && retlen)
+ *retlen = len;
+ rc = opal_error_code(rc);
+out:
+ opal_async_release_token(token);
return rc;
}
@@ -220,21 +247,20 @@ static int powernv_flash_probe(struct platform_device *pdev)
int ret;
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!data)
+ return -ENOMEM;
+
data->mtd.priv = data;
ret = of_property_read_u32(dev->of_node, "ibm,opal-id", &(data->id));
if (ret) {
dev_err(dev, "no device property 'ibm,opal-id'\n");
- goto out;
+ return ret;
}
ret = powernv_flash_set_driver_info(dev, &data->mtd);
if (ret)
- goto out;
+ return ret;
dev_set_drvdata(dev, data);
@@ -243,10 +269,7 @@ static int powernv_flash_probe(struct platform_device *pdev)
* with an ffs partition at the start, it should prove easier for users
* to deal with partitions or not as they see fit
*/
- ret = mtd_device_register(&data->mtd, NULL, 0);
-
-out:
- return ret;
+ return mtd_device_register(&data->mtd, NULL, 0);
}
/**
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index f4241339edd2..87f1f0252299 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -10,6 +10,7 @@
*/
#define _GNU_SOURCE
+#include <errno.h>
#include <sched.h>
#include <string.h>
#include <stdio.h>
@@ -75,6 +76,7 @@ static void touch(void)
static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
{
+ int rc;
pthread_t tid;
cpu_set_t cpuset;
pthread_attr_t attr;
@@ -82,14 +84,23 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
- pthread_attr_init(&attr);
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
- if (pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset)) {
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
perror("pthread_attr_setaffinity_np");
exit(1);
}
- if (pthread_create(&tid, &attr, fn, arg)) {
+ rc = pthread_create(&tid, &attr, fn, arg);
+ if (rc) {
+ errno = rc;
perror("pthread_create");
exit(1);
}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 17fb1b43c320..1899bd85121f 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -53,6 +53,8 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
}
while ((dp = readdir(sysfs))) {
+ int len;
+
if (!(dp->d_type & DT_DIR))
continue;
if (!strcmp(dp->d_name, "cpuidle"))
@@ -60,7 +62,9 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
if (!strstr(dp->d_name, "cpu"))
continue;
- sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name);
+ len = snprintf(file, LEN_MAX, "%s%s/dscr", CPU_PATH, dp->d_name);
+ if (len >= LEN_MAX)
+ continue;
if (access(file, F_OK))
continue;
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 2f1f7b013293..241a4a4ee0e4 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -12,3 +12,4 @@ tm-signal-context-chk-gpr
tm-signal-context-chk-vmx
tm-signal-context-chk-vsx
tm-vmx-unavail
+tm-unavailable
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 7bfcd454fb2a..24855c0eee53 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -2,7 +2,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
tm-signal-context-chk-vmx tm-signal-context-chk-vsx
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
- tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \
+ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \
$(SIGNAL_CONTEXT_CHK_TESTS)
include ../../lib.mk
@@ -16,6 +16,7 @@ $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
+$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
new file mode 100644
index 000000000000..96c37f84ce54
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2017, Gustavo Romero, Breno Leitao, Cyril Bur, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Force FP, VEC and VSX unavailable exception during transaction in all
+ * possible scenarios regarding the MSR.FP and MSR.VEC state, e.g. when FP
+ * is enable and VEC is disable, when FP is disable and VEC is enable, and
+ * so on. Then we check if the restored state is correctly set for the
+ * FP and VEC registers to the previous state we set just before we entered
+ * in TM, i.e. we check if it corrupts somehow the recheckpointed FP and
+ * VEC/Altivec registers on abortion due to an unavailable exception in TM.
+ * N.B. In this test we do not test all the FP/Altivec/VSX registers for
+ * corruption, but only for registers vs0 and vs32, which are respectively
+ * representatives of FP and VEC/Altivec reg sets.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+
+#include "tm.h"
+
+#define DEBUG 0
+
+/* Unavailable exceptions to test in HTM */
+#define FP_UNA_EXCEPTION 0
+#define VEC_UNA_EXCEPTION 1
+#define VSX_UNA_EXCEPTION 2
+
+#define NUM_EXCEPTIONS 3
+
+struct Flags {
+ int touch_fp;
+ int touch_vec;
+ int result;
+ int exception;
+} flags;
+
+bool expecting_failure(void)
+{
+ if (flags.touch_fp && flags.exception == FP_UNA_EXCEPTION)
+ return false;
+
+ if (flags.touch_vec && flags.exception == VEC_UNA_EXCEPTION)
+ return false;
+
+ /*
+ * If both FP and VEC are touched it does not mean that touching VSX
+ * won't raise an exception. However since FP and VEC state are already
+ * correctly loaded, the transaction is not aborted (i.e.
+ * treclaimed/trecheckpointed) and MSR.VSX is just set as 1, so a TM
+ * failure is not expected also in this case.
+ */
+ if ((flags.touch_fp && flags.touch_vec) &&
+ flags.exception == VSX_UNA_EXCEPTION)
+ return false;
+
+ return true;
+}
+
+/* Check if failure occurred whilst in transaction. */
+bool is_failure(uint64_t condition_reg)
+{
+ /*
+ * When failure handling occurs, CR0 is set to 0b1010 (0xa). Otherwise
+ * transaction completes without failure and hence reaches out 'tend.'
+ * that sets CR0 to 0b0100 (0x4).
+ */
+ return ((condition_reg >> 28) & 0xa) == 0xa;
+}
+
+void *ping(void *input)
+{
+
+ /*
+ * Expected values for vs0 and vs32 after a TM failure. They must never
+ * change, otherwise they got corrupted.
+ */
+ uint64_t high_vs0 = 0x5555555555555555;
+ uint64_t low_vs0 = 0xffffffffffffffff;
+ uint64_t high_vs32 = 0x5555555555555555;
+ uint64_t low_vs32 = 0xffffffffffffffff;
+
+ /* Counter for busy wait */
+ uint64_t counter = 0x1ff000000;
+
+ /*
+ * Variable to keep a copy of CR register content taken just after we
+ * leave the transactional state.
+ */
+ uint64_t cr_ = 0;
+
+ /*
+ * Wait a bit so thread can get its name "ping". This is not important
+ * to reproduce the issue but it's nice to have for systemtap debugging.
+ */
+ if (DEBUG)
+ sleep(1);
+
+ printf("If MSR.FP=%d MSR.VEC=%d: ", flags.touch_fp, flags.touch_vec);
+
+ if (flags.exception != FP_UNA_EXCEPTION &&
+ flags.exception != VEC_UNA_EXCEPTION &&
+ flags.exception != VSX_UNA_EXCEPTION) {
+ printf("No valid exception specified to test.\n");
+ return NULL;
+ }
+
+ asm (
+ /* Prepare to merge low and high. */
+ " mtvsrd 33, %[high_vs0] ;"
+ " mtvsrd 34, %[low_vs0] ;"
+
+ /*
+ * Adjust VS0 expected value after an TM failure,
+ * i.e. vs0 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+ */
+ " xxmrghd 0, 33, 34 ;"
+
+ /*
+ * Adjust VS32 expected value after an TM failure,
+ * i.e. vs32 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+ */
+ " xxmrghd 32, 33, 34 ;"
+
+ /*
+ * Wait an amount of context switches so load_fp and load_vec
+ * overflow and MSR.FP, MSR.VEC, and MSR.VSX become zero (off).
+ */
+ " mtctr %[counter] ;"
+
+ /* Decrement CTR branch if CTR non zero. */
+ "1: bdnz 1b ;"
+
+ /*
+ * Check if we want to touch FP prior to the test in order
+ * to set MSR.FP = 1 before provoking an unavailable
+ * exception in TM.
+ */
+ " cmpldi %[touch_fp], 0 ;"
+ " beq no_fp ;"
+ " fadd 10, 10, 10 ;"
+ "no_fp: ;"
+
+ /*
+ * Check if we want to touch VEC prior to the test in order
+ * to set MSR.VEC = 1 before provoking an unavailable
+ * exception in TM.
+ */
+ " cmpldi %[touch_vec], 0 ;"
+ " beq no_vec ;"
+ " vaddcuw 10, 10, 10 ;"
+ "no_vec: ;"
+
+ /*
+ * Perhaps it would be a better idea to do the
+ * compares outside transactional context and simply
+ * duplicate code.
+ */
+ " tbegin. ;"
+ " beq trans_fail ;"
+
+ /* Do we do FP Unavailable? */
+ " cmpldi %[exception], %[ex_fp] ;"
+ " bne 1f ;"
+ " fadd 10, 10, 10 ;"
+ " b done ;"
+
+ /* Do we do VEC Unavailable? */
+ "1: cmpldi %[exception], %[ex_vec] ;"
+ " bne 2f ;"
+ " vaddcuw 10, 10, 10 ;"
+ " b done ;"
+
+ /*
+ * Not FP or VEC, therefore VSX. Ensure this
+ * instruction always generates a VSX Unavailable.
+ * ISA 3.0 is tricky here.
+ * (xxmrghd will on ISA 2.07 and ISA 3.0)
+ */
+ "2: xxmrghd 10, 10, 10 ;"
+
+ "done: tend. ;"
+
+ "trans_fail: ;"
+
+ /* Give values back to C. */
+ " mfvsrd %[high_vs0], 0 ;"
+ " xxsldwi 3, 0, 0, 2 ;"
+ " mfvsrd %[low_vs0], 3 ;"
+ " mfvsrd %[high_vs32], 32 ;"
+ " xxsldwi 3, 32, 32, 2 ;"
+ " mfvsrd %[low_vs32], 3 ;"
+
+ /* Give CR back to C so that it can check what happened. */
+ " mfcr %[cr_] ;"
+
+ : [high_vs0] "+r" (high_vs0),
+ [low_vs0] "+r" (low_vs0),
+ [high_vs32] "=r" (high_vs32),
+ [low_vs32] "=r" (low_vs32),
+ [cr_] "+r" (cr_)
+ : [touch_fp] "r" (flags.touch_fp),
+ [touch_vec] "r" (flags.touch_vec),
+ [exception] "r" (flags.exception),
+ [ex_fp] "i" (FP_UNA_EXCEPTION),
+ [ex_vec] "i" (VEC_UNA_EXCEPTION),
+ [ex_vsx] "i" (VSX_UNA_EXCEPTION),
+ [counter] "r" (counter)
+
+ : "cr0", "ctr", "v10", "vs0", "vs10", "vs3", "vs32", "vs33",
+ "vs34", "fr10"
+
+ );
+
+ /*
+ * Check if we were expecting a failure and it did not occur by checking
+ * CR0 state just after we leave the transaction. Either way we check if
+ * vs0 or vs32 got corrupted.
+ */
+ if (expecting_failure() && !is_failure(cr_)) {
+ printf("\n\tExpecting the transaction to fail, %s",
+ "but it didn't\n\t");
+ flags.result++;
+ }
+
+ /* Check if we were not expecting a failure and a it occurred. */
+ if (!expecting_failure() && is_failure(cr_)) {
+ printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
+ failure_code());
+ return (void *) -1;
+ }
+
+ /*
+ * Check if TM failed due to the cause we were expecting. 0xda is a
+ * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
+ */
+ if (is_failure(cr_) && !failure_is_unavailable()) {
+ printf("\n\tUnexpected failure cause 0x%02lx\n\t",
+ failure_code());
+ return (void *) -1;
+ }
+
+ /* 0x4 is a success and 0xa is a fail. See comment in is_failure(). */
+ if (DEBUG)
+ printf("CR0: 0x%1lx ", cr_ >> 28);
+
+ /* Check FP (vs0) for the expected value. */
+ if (high_vs0 != 0x5555555555555555 || low_vs0 != 0xFFFFFFFFFFFFFFFF) {
+ printf("FP corrupted!");
+ printf(" high = %#16" PRIx64 " low = %#16" PRIx64 " ",
+ high_vs0, low_vs0);
+ flags.result++;
+ } else
+ printf("FP ok ");
+
+ /* Check VEC (vs32) for the expected value. */
+ if (high_vs32 != 0x5555555555555555 || low_vs32 != 0xFFFFFFFFFFFFFFFF) {
+ printf("VEC corrupted!");
+ printf(" high = %#16" PRIx64 " low = %#16" PRIx64,
+ high_vs32, low_vs32);
+ flags.result++;
+ } else
+ printf("VEC ok");
+
+ putchar('\n');
+
+ return NULL;
+}
+
+/* Thread to force context switch */
+void *pong(void *not_used)
+{
+ /* Wait thread get its name "pong". */
+ if (DEBUG)
+ sleep(1);
+
+ /* Classed as an interactive-like thread. */
+ while (1)
+ sched_yield();
+}
+
+/* Function that creates a thread and launches the "ping" task. */
+void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
+{
+ int retries = 2;
+ void *ret_value;
+ pthread_t t0;
+
+ flags.touch_fp = fp;
+ flags.touch_vec = vec;
+
+ /*
+ * Without luck it's possible that the transaction is aborted not due to
+ * the unavailable exception caught in the middle as we expect but also,
+ * for instance, due to a context switch or due to a KVM reschedule (if
+ * it's running on a VM). Thus we try a few times before giving up,
+ * checking if the failure cause is the one we expect.
+ */
+ do {
+ /* Bind 'ping' to CPU 0, as specified in 'attr'. */
+ pthread_create(&t0, attr, ping, (void *) &flags);
+ pthread_setname_np(t0, "ping");
+ pthread_join(t0, &ret_value);
+ retries--;
+ } while (ret_value != NULL && retries);
+
+ if (!retries) {
+ flags.result = 1;
+ if (DEBUG)
+ printf("All transactions failed unexpectedly\n");
+
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int exception; /* FP = 0, VEC = 1, VSX = 2 */
+ pthread_t t1;
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ /* Init pthread attribute. */
+ pthread_attr_init(&attr);
+
+ /* Set CPU 0 mask into the pthread attribute. */
+ pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+
+ pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+ pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */
+
+ flags.result = 0;
+
+ for (exception = 0; exception < NUM_EXCEPTIONS; exception++) {
+ printf("Checking if FP/VEC registers are sane after");
+
+ if (exception == FP_UNA_EXCEPTION)
+ printf(" a FP unavailable exception...\n");
+
+ else if (exception == VEC_UNA_EXCEPTION)
+ printf(" a VEC unavailable exception...\n");
+
+ else
+ printf(" a VSX unavailable exception...\n");
+
+ flags.exception = exception;
+
+ test_fp_vec(0, 0, &attr);
+ test_fp_vec(1, 0, &attr);
+ test_fp_vec(0, 1, &attr);
+ test_fp_vec(1, 1, &attr);
+
+ }
+
+ if (flags.result > 0) {
+ printf("result: failed!\n");
+ exit(1);
+ } else {
+ printf("result: success\n");
+ exit(0);
+ }
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index 0ffff04433c5..df4204247d45 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -47,6 +47,11 @@ static inline bool failure_is_syscall(void)
return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
}
+static inline bool failure_is_unavailable(void)
+{
+ return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
+}
+
static inline bool failure_is_nesting(void)
{
return (__builtin_get_texasru() & 0x400000);