From 56e62a73702836017564eaacd5212e4d0fa1c01d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 21 Nov 2020 11:14:56 +0100 Subject: s390: convert to generic entry This patch converts s390 to use the generic entry infrastructure from kernel/entry/*. There are a few special things on s390: - PIF_PER_TRAP is moved to TIF_PER_TRAP as the generic code doesn't know about our PIF flags in exit_to_user_mode_loop(). - The old code had several ways to restart syscalls: a) PIF_SYSCALL_RESTART, which was only set during execve to force a restart after upgrading a process (usually qemu-kvm) to pgste page table extensions. b) PIF_SYSCALL, which is set by do_signal() to indicate that the current syscall should be restarted. This is changed so that do_signal() now also uses PIF_SYSCALL_RESTART. Continuing to use PIF_SYSCALL doesn't work with the generic code, and changing it to PIF_SYSCALL_RESTART makes PIF_SYSCALL and PIF_SYSCALL_RESTART more unique. - On s390 calling sys_sigreturn or sys_rt_sigreturn is implemented by executing a svc instruction on the process stack which causes a fault. While handling that fault the fault code sets PIF_SYSCALL to hand over processing to the syscall code on exit to usermode. The patch introduces PIF_SYSCALL_RET_SET, which is set if ptrace sets a return value for a syscall. The s390x ptrace ABI uses r2 both for the syscall number and return value, so ptrace cannot set the syscall number + return value at the same time. The flag makes handling that a bit easier. do_syscall() will just skip executing the syscall if PIF_SYSCALL_RET_SET is set. CONFIG_DEBUG_ASCE was removd in favour of the generic CONFIG_DEBUG_ENTRY. CR1/7/13 will be checked both on kernel entry and exit to contain the correct asces. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/syscall.c | 172 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 arch/s390/kernel/syscall.c (limited to 'arch/s390/kernel/syscall.c') diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c new file mode 100644 index 000000000000..25c0fb19b0a5 --- /dev/null +++ b/arch/s390/kernel/syscall.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Thomas Spatzier (tspat@de.ibm.com) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/s390 + * platform. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "entry.h" + +/* + * Perform the mmap() system call. Linux for S/390 isn't able to handle more + * than 5 system call parameters, so this system call uses a memory block + * for parameter passing. + */ + +struct s390_mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) +{ + struct s390_mmap_arg_struct a; + int error = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); +out: + return error; +} + +#ifdef CONFIG_SYSVIPC +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls. + */ +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) +{ + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return ksys_ipc(call, first, second, third, ptr, third); +} +#endif /* CONFIG_SYSVIPC */ + +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) +{ + unsigned int ret = current->personality; + + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; + + if (personality != 0xffffffff) + set_personality(personality); + + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; + + return ret; +} + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} + +void do_syscall(struct pt_regs *regs) +{ + unsigned long nr; + + nr = regs->int_code & 0xffff; + if (!nr) { + nr = regs->gprs[1] & 0xffff; + regs->int_code &= ~0xffffUL; + regs->int_code |= nr; + } + + regs->gprs[2] = nr; + + nr = syscall_enter_from_user_mode_work(regs, nr); + + /* + * In the s390 ptrace ABI, both the syscall number and the return value + * use gpr2. However, userspace puts the syscall number either in the + * svc instruction itself, or uses gpr1. To make at least skipping syscalls + * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here + * and if set, the syscall will be skipped. + */ + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { + regs->gprs[2] = -ENOSYS; + if (likely(nr < NR_syscalls)) { + regs->gprs[2] = current->thread.sys_call_table[nr]( + regs->orig_gpr2, regs->gprs[3], + regs->gprs[4], regs->gprs[5], + regs->gprs[6], regs->gprs[7]); + } + } else { + clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); + } + syscall_exit_to_user_mode_work(regs); +} + +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) +{ + enter_from_user_mode(regs); + + memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long)); + memcpy(®s->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code)); + regs->psw = S390_lowcore.svc_old_psw; + + update_timer_sys(); + + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + + if (per_trap) + set_thread_flag(TIF_PER_TRAP); + + for (;;) { + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); + do_syscall(regs); + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART)) + break; + local_irq_enable(); + } + exit_to_user_mode(); +} -- cgit v1.2.3 From 3a790cc1c9ef1b7b613cf648e6fb756a842caa16 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 18 Jan 2021 09:35:38 +0100 Subject: s390: pass struct pt_regs instead of registers to syscalls Instead of fetching all registers from struct pt_regs and passing them to the syscall wrappers, let the system call wrappers only fetch the values really required. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 4 +- arch/s390/include/asm/syscall_wrapper.h | 114 ++++++++++++++++++++------------ arch/s390/kernel/syscall.c | 8 +-- 3 files changed, 75 insertions(+), 51 deletions(-) (limited to 'arch/s390/kernel/syscall.c') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index fa67b66bf144..023a15dc25a3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -40,9 +40,7 @@ #include #include -typedef long (*sys_call_ptr_t)(unsigned long, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); +typedef long (*sys_call_ptr_t)(struct pt_regs *regs); static inline void set_cpu_flag(int flag) { diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 5364bfc866e0..ad2c996e7e93 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -7,6 +7,33 @@ #ifndef _ASM_S390_SYSCALL_WRAPPER_H #define _ASM_S390_SYSCALL_WRAPPER_H +#define __SC_TYPE(t, a) t + +#define SYSCALL_PT_ARG6(regs, m, t1, t2, t3, t4, t5, t6)\ + SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5), \ + m(t6, (regs->gprs[7])) + +#define SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4), \ + m(t5, (regs->gprs[6])) + +#define SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(regs, m, t1, t2, t3), \ + m(t4, (regs->gprs[5])) + +#define SYSCALL_PT_ARG3(regs, m, t1, t2, t3) \ + SYSCALL_PT_ARG2(regs, m, t1, t2), \ + m(t3, (regs->gprs[4])) + +#define SYSCALL_PT_ARG2(regs, m, t1, t2) \ + SYSCALL_PT_ARG1(regs, m, t1), \ + m(t2, (regs->gprs[3])) + +#define SYSCALL_PT_ARG1(regs, m, t1) \ + m(t1, (regs->orig_gpr2)) + +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + #ifdef CONFIG_COMPAT #define __SC_COMPAT_TYPE(t, a) \ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a @@ -29,14 +56,15 @@ (t)__ReS; \ }) -#define __S390_SYS_STUBx(x, name, ...) \ - long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ - long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __s390x_sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__));\ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ +#define __S390_SYS_STUBx(x, name, ...) \ + long __s390_sys##name(struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ + long __s390_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \ + __SC_COMPAT_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ } /* @@ -65,23 +93,24 @@ SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - __diag_push(); \ - __diag_ignore(GCC, 8, "-Wattribute-alias", \ - "Type aliasing is used to sanitize syscall arguments");\ - long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(__se_compat_sys##name)))); \ - ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ - static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ - } \ - __diag_pop(); \ +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + long __s390_compat_sys##name(struct pt_regs *regs); \ + long __s390_compat_sys##name(struct pt_regs *regs) \ + __attribute__((alias(__stringify(__se_compat_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __se_compat_sys##name(struct pt_regs *regs); \ + long __se_compat_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_compat_sys##name(SYSCALL_PT_ARGS(x, regs, __SC_DELOUSE, \ + __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) /* @@ -113,23 +142,24 @@ #endif /* CONFIG_COMPAT */ -#define __SYSCALL_DEFINEx(x, name, ...) \ - __diag_push(); \ - __diag_ignore(GCC, 8, "-Wattribute-alias", \ - "Type aliasing is used to sanitize syscall arguments");\ - long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(__se_sys##name)))); \ - ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ - long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - __S390_SYS_STUBx(x, name, __VA_ARGS__) \ - long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ - } \ - __diag_pop(); \ +#define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + long __s390x_sys##name(struct pt_regs *regs) \ + __attribute__((alias(__stringify(__se_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __se_sys##name(struct pt_regs *regs); \ + __S390_SYS_STUBx(x, name, __VA_ARGS__) \ + long __se_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \ + __SC_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 25c0fb19b0a5..bc8e650e377d 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -132,12 +132,8 @@ void do_syscall(struct pt_regs *regs) */ if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { regs->gprs[2] = -ENOSYS; - if (likely(nr < NR_syscalls)) { - regs->gprs[2] = current->thread.sys_call_table[nr]( - regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5], - regs->gprs[6], regs->gprs[7]); - } + if (likely(nr < NR_syscalls)) + regs->gprs[2] = current->thread.sys_call_table[nr](regs); } else { clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); } -- cgit v1.2.3