summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2024-11-14 13:07:28 +0100
committerCatalin Marinas <catalin.marinas@arm.com>2024-11-14 13:07:28 +0100
commit437330d90c507be109a161667a77eaf61be0edac (patch)
tree9fd353932523647566dc89927e15c5e3f74d0bc2
parentMerge branches 'for-next/gcs', 'for-next/probes', 'for-next/asm-offsets', 'fo... (diff)
parentarm64: mops: Document requirements for hypervisors (diff)
downloadlinux-437330d90c507be109a161667a77eaf61be0edac.tar.xz
linux-437330d90c507be109a161667a77eaf61be0edac.zip
Merge branch 'for-next/mops' into for-next/core
* for-next/mops: : More FEAT_MOPS (memcpy instructions) uses - in-kernel routines arm64: mops: Document requirements for hypervisors arm64: lib: Use MOPS for copy_page() and clear_page() arm64: lib: Use MOPS for memcpy() routines arm64: mops: Document booting requirement for HCR_EL2.MCE2 arm64: mops: Handle MOPS exceptions from EL1 arm64: probes: Disable kprobes/uprobes on MOPS instructions # Conflicts: # arch/arm64/kernel/entry-common.c
-rw-r--r--Documentation/arch/arm64/booting.rst3
-rw-r--r--Documentation/arch/arm64/index.rst1
-rw-r--r--Documentation/arch/arm64/mops.rst44
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/debug-monitors.h1
-rw-r--r--arch/arm64/include/asm/exception.h1
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/kernel/debug-monitors.c5
-rw-r--r--arch/arm64/kernel/entry-common.c12
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c7
-rw-r--r--arch/arm64/kernel/traps.c7
-rw-r--r--arch/arm64/lib/clear_page.S13
-rw-r--r--arch/arm64/lib/copy_page.S13
-rw-r--r--arch/arm64/lib/memcpy.S19
-rw-r--r--arch/arm64/lib/memset.S20
15 files changed, 146 insertions, 4 deletions
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index 938781f59468..3278fb4bf219 100644
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -388,6 +388,9 @@ Before jumping into the kernel, the following conditions must be met:
- HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
+ - HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
+ must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
+
For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
- If EL3 is present:
diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst
index e02247dcb87e..6a012c98bdcd 100644
--- a/Documentation/arch/arm64/index.rst
+++ b/Documentation/arch/arm64/index.rst
@@ -22,6 +22,7 @@ ARM64 Architecture
legacy_instructions
memory
memory-tagging-extension
+ mops
perf
pointer-authentication
ptdump
diff --git a/Documentation/arch/arm64/mops.rst b/Documentation/arch/arm64/mops.rst
new file mode 100644
index 000000000000..2ef5b147f8dc
--- /dev/null
+++ b/Documentation/arch/arm64/mops.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Memory copy/set instructions (MOPS)
+===================================
+
+A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
+instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
+
+A main or epilogue instruction can take a MOPS exception for various reasons,
+for example when a task is migrated to a CPU with a different MOPS
+implementation, or when the instruction's alignment and size requirements are
+not met. The software exception handler is then expected to reset the registers
+and restart execution from the prologue instruction. Normally this is handled
+by the kernel.
+
+For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
+the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
+
+.. _arm64_mops_hyp:
+
+Hypervisor requirements
+-----------------------
+
+A hypervisor running a Linux guest must handle all MOPS exceptions from the
+guest kernel, as Linux may not be able to handle the exception at all times.
+For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
+to another physical CPU with a different MOPS implementation.
+
+To do this, the hypervisor must:
+
+ - Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
+
+ - Have an exception handler that implements the algorithm from the Arm ARM
+ rules CNTMJ and MWFQH.
+
+ - Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
+ potential step of the current instruction.
+
+ Note: Clearing PSTATE.SS is needed so that a single step exception is taken
+ on the next instruction (the prologue instruction). Otherwise prologue
+ would get silently stepped over and the single step exception taken on the
+ main instruction. Note that if the guest instruction is not being stepped
+ then clearing PSTATE.SS has no effect.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5afd028116c9..cd900e640b10 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2160,6 +2160,9 @@ config ARM64_EPAN
if the cpu does not implement the feature.
endmenu # "ARMv8.7 architectural features"
+config AS_HAS_MOPS
+ def_bool $(as-instr,.arch_extension mops)
+
menu "ARMv8.9 architectural features"
config ARM64_POE
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 13d437bcbf58..8f6ba31b8658 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -105,6 +105,7 @@ void kernel_enable_single_step(struct pt_regs *regs);
void kernel_disable_single_step(void);
int kernel_active_single_step(void);
void kernel_rewind_single_step(struct pt_regs *regs);
+void kernel_fastforward_single_step(struct pt_regs *regs);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int reinstall_suspended_bps(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 674518464718..d48fc16584cd 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -75,6 +75,7 @@ void do_el0_svc_compat(struct pt_regs *regs);
void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
void do_el0_mops(struct pt_regs *regs, unsigned long esr);
+void do_el1_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
void do_signal(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 89bc18989b90..e390c432f546 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -353,6 +353,7 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 4713a4c65b1b..58f047de3e1c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -440,6 +440,11 @@ void kernel_rewind_single_step(struct pt_regs *regs)
set_regs_spsr_ss(regs);
}
+void kernel_fastforward_single_step(struct pt_regs *regs)
+{
+ clear_regs_spsr_ss(regs);
+}
+
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index fe74813009bd..b260ddc4d3e9 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -472,6 +472,15 @@ static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
exit_to_kernel_mode(regs);
}
+static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_mops(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -517,6 +526,9 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_GCS:
el1_gcs(regs, esr);
break;
+ case ESR_ELx_EC_MOPS:
+ el1_mops(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_CUR:
case ESR_ELx_EC_SOFTSTP_CUR:
case ESR_ELx_EC_WATCHPT_CUR:
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index e05249f57075..6438bf62e753 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* Instructions which load PC relative literals are not going to work
* when executed from an XOL slot. Instructions doing an exclusive
* load/store are not going to complete successfully when single-step
- * exception handling happens in the middle of the sequence.
+ * exception handling happens in the middle of the sequence. Memory
+ * copy/set instructions require that all three instructions be placed
+ * consecutively in memory.
*/
if (aarch64_insn_uses_literal(insn) ||
- aarch64_insn_is_exclusive(insn))
+ aarch64_insn_is_exclusive(insn) ||
+ aarch64_insn_is_mops(insn))
return false;
return true;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index fdbcf047108c..ee318f6df647 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -541,6 +541,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr)
user_fastforward_single_step(current);
}
+void do_el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_mops_reset_regs(&regs->user_regs, esr);
+
+ kernel_fastforward_single_step(regs);
+}
+
#define __user_cache_maint(insn, address, res) \
if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index ebde40e7fa2b..bd6f7d5eb6eb 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -15,6 +15,19 @@
* x0 - dest
*/
SYM_FUNC_START(__pi_clear_page)
+#ifdef CONFIG_AS_HAS_MOPS
+ .arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+ b .Lno_mops
+alternative_else_nop_endif
+
+ mov x1, #PAGE_SIZE
+ setpn [x0]!, x1!, xzr
+ setmn [x0]!, x1!, xzr
+ seten [x0]!, x1!, xzr
+ ret
+.Lno_mops:
+#endif
mrs x1, dczid_el0
tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */
and w1, w1, #0xf
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 6a56d7cf309d..e6374e7e5511 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,6 +18,19 @@
* x1 - src
*/
SYM_FUNC_START(__pi_copy_page)
+#ifdef CONFIG_AS_HAS_MOPS
+ .arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+ b .Lno_mops
+alternative_else_nop_endif
+
+ mov x2, #PAGE_SIZE
+ cpypwn [x0]!, [x1]!, x2!
+ cpymwn [x0]!, [x1]!, x2!
+ cpyewn [x0]!, [x1]!, x2!
+ ret
+.Lno_mops:
+#endif
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 4ab48d49c451..9b99106fb95f 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -57,7 +57,7 @@
The loop tail is handled by always copying 64 bytes from the end.
*/
-SYM_FUNC_START(__pi_memcpy)
+SYM_FUNC_START_LOCAL(__pi_memcpy_generic)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
@@ -238,7 +238,24 @@ L(copy64_from_start):
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstin]
ret
+SYM_FUNC_END(__pi_memcpy_generic)
+
+#ifdef CONFIG_AS_HAS_MOPS
+ .arch_extension mops
+SYM_FUNC_START(__pi_memcpy)
+alternative_if_not ARM64_HAS_MOPS
+ b __pi_memcpy_generic
+alternative_else_nop_endif
+
+ mov dst, dstin
+ cpyp [dst]!, [src]!, count!
+ cpym [dst]!, [src]!, count!
+ cpye [dst]!, [src]!, count!
+ ret
SYM_FUNC_END(__pi_memcpy)
+#else
+SYM_FUNC_ALIAS(__pi_memcpy, __pi_memcpy_generic)
+#endif
SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
EXPORT_SYMBOL(__memcpy)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index a5aebe82ad73..97157da65ec6 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -26,6 +26,7 @@
*/
dstin .req x0
+val_x .req x1
val .req w1
count .req x2
tmp1 .req x3
@@ -42,7 +43,7 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
-SYM_FUNC_START(__pi_memset)
+SYM_FUNC_START_LOCAL(__pi_memset_generic)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
@@ -201,7 +202,24 @@ SYM_FUNC_START(__pi_memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
+SYM_FUNC_END(__pi_memset_generic)
+
+#ifdef CONFIG_AS_HAS_MOPS
+ .arch_extension mops
+SYM_FUNC_START(__pi_memset)
+alternative_if_not ARM64_HAS_MOPS
+ b __pi_memset_generic
+alternative_else_nop_endif
+
+ mov dst, dstin
+ setp [dst]!, count!, val_x
+ setm [dst]!, count!, val_x
+ sete [dst]!, count!, val_x
+ ret
SYM_FUNC_END(__pi_memset)
+#else
+SYM_FUNC_ALIAS(__pi_memset, __pi_memset_generic)
+#endif
SYM_FUNC_ALIAS(__memset, __pi_memset)
EXPORT_SYMBOL(__memset)