diff options
author | Damien Miller <djm@mindrot.org> | 2023-02-03 06:33:09 +0100 |
---|---|---|
committer | Damien Miller <djm@mindrot.org> | 2023-02-06 03:26:42 +0100 |
commit | 195313dfe10a23c82e9d56d5fdd2f59beee1bdcf (patch) | |
tree | 23f78567741a9fffda976e08499319ea522bb62b /sandbox-seccomp-filter.c | |
parent | crank versions in RPM specs (diff) | |
download | openssh-195313dfe10a23c82e9d56d5fdd2f59beee1bdcf.tar.xz openssh-195313dfe10a23c82e9d56d5fdd2f59beee1bdcf.zip |
harden Linux seccomp sandbox
Linux mmap(2) and madvise(2) syscalls support quite a number of funky
flags that we don't expect that sshd/libc will ever need. We can
exclude this kernel attack surface by filtering the mmap(2) flags
and the madvise(2) advice arguments.
Similarly, the sandboxed process in sshd is a single-threaded program
that does not use shared memory for synchronisation or communication.
Therefore, there should be no reason for the advanced priority
inheritance futex(2) operations to be necessary. These can also be
excluded.
Motivated by Jann Horn pointing out that there have been kernel bugs
in nearby Linux kernel code, e.g. CVE-2020-29368, CVE-2020-29374 and
CVE-2022-42703.
Feedback Jann Horn, ok dtucker@
Diffstat (limited to 'sandbox-seccomp-filter.c')
-rw-r--r-- | sandbox-seccomp-filter.c | 79 |
1 files changed, 74 insertions, 5 deletions
diff --git a/sandbox-seccomp-filter.c b/sandbox-seccomp-filter.c index 4ab49eb6e..78c266231 100644 --- a/sandbox-seccomp-filter.c +++ b/sandbox-seccomp-filter.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012 Will Drewry <wad@dataspill.org> + * Copyright (c) 2015,2017,2019,2020,2023 Damien Miller <djm@mindrot.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -48,6 +49,7 @@ #include <sys/mman.h> #include <sys/syscall.h> +#include <linux/futex.h> #include <linux/net.h> #include <linux/audit.h> #include <linux/filter.h> @@ -132,6 +134,67 @@ /* reload syscall number; all rules expect it in accumulator */ \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ offsetof(struct seccomp_data, nr)) +/* Deny unless syscall argument contains only values in mask */ +#define SC_DENY_UNLESS_ARG_MASK(_nr, _arg_nr, _arg_mask, _errno) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \ + /* load, mask and test syscall argument, low word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 3), \ + /* load, mask and test syscall argument, high word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \ + ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 1, 0), \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)), \ + /* reload syscall number; all rules expect it in accumulator */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, nr)) +#define SC_DENY_UNLESS_MASK(_nr, _arg_nr, _arg_val, _errno) \ +/* Special handling for futex(2) that combines a bitmap and operation number */ +#if defined(__NR_futex) || defined(__NR_futex_time64) +#define SC_FUTEX_MASK (FUTEX_PRIVATE_FLAG|FUTEX_CLOCK_REALTIME) +#define SC_ALLOW_FUTEX_OP(_nr, _op) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \ + /* load syscall argument, low word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[1]) + ARG_LO_OFFSET), \ + /* mask off allowed bitmap values, low word */ \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~(SC_FUTEX_MASK & 0xFFFFFFFF)), \ + /* test operation number, low word */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ((_op) & 0xFFFFFFFF), 0, 4), \ + /* load syscall argument, high word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[1]) + ARG_HI_OFFSET), \ + /* mask off allowed bitmap values, high word */ \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \ + ~(((uint32_t)((uint64_t)SC_FUTEX_MASK >> 32)) & 0xFFFFFFFF)), \ + /* test operation number, high word */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ + (((uint32_t)((uint64_t)(_op) >> 32)) & 0xFFFFFFFF), 0, 1), \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ + /* reload syscall number; all rules expect it in accumulator */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)) + +/* Use this for both __NR_futex and __NR_futex_time64 */ +# define SC_FUTEX(_nr) \ + SC_ALLOW_FUTEX_OP(__NR_futex, FUTEX_WAIT), \ + SC_ALLOW_FUTEX_OP(__NR_futex, FUTEX_WAIT_BITSET), \ + SC_ALLOW_FUTEX_OP(__NR_futex, FUTEX_WAKE), \ + SC_ALLOW_FUTEX_OP(__NR_futex, FUTEX_WAKE_BITSET), \ + SC_ALLOW_FUTEX_OP(__NR_futex, FUTEX_REQUEUE), \ + SC_ALLOW_FUTEX_OP(__NR_futex, FUTEX_CMP_REQUEUE) +#endif /* __NR_futex || __NR_futex_time64 */ + +#if defined(__NR_mmap) || defined(__NR_mmap2) +/* Use this for both __NR_mmap and __NR_mmap2 variants */ +# define SC_MMAP(_nr) \ + SC_DENY_UNLESS_ARG_MASK(_nr, 3, \ + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_FIXED_NOREPLACE, EINVAL), \ + SC_ALLOW_ARG_MASK(_nr, 2, PROT_READ|PROT_WRITE|PROT_NONE) +#endif /* __NR_mmap || __NR_mmap2 */ /* Syscall filtering set for preauth. */ static const struct sock_filter preauth_insns[] = { @@ -211,10 +274,10 @@ static const struct sock_filter preauth_insns[] = { SC_ALLOW(__NR_exit_group), #endif #ifdef __NR_futex - SC_ALLOW(__NR_futex), + SC_FUTEX(__NR_futex), #endif #ifdef __NR_futex_time64 - SC_ALLOW(__NR_futex_time64), + SC_FUTEX(__NR_futex_time64), #endif #ifdef __NR_geteuid SC_ALLOW(__NR_geteuid), @@ -244,13 +307,19 @@ static const struct sock_filter preauth_insns[] = { SC_ALLOW(__NR_getuid32), #endif #ifdef __NR_madvise - SC_ALLOW(__NR_madvise), + SC_ALLOW_ARG(__NR_madvise, 2, MADV_NORMAL), + SC_ALLOW_ARG(__NR_madvise, 2, MADV_FREE), + SC_ALLOW_ARG(__NR_madvise, 2, MADV_DONTNEED), + SC_ALLOW_ARG(__NR_madvise, 2, MADV_DONTFORK), + SC_ALLOW_ARG(__NR_madvise, 2, MADV_DONTDUMP), + SC_ALLOW_ARG(__NR_madvise, 2, MADV_WIPEONFORK), + SC_DENY(__NR_madvise, EINVAL), #endif #ifdef __NR_mmap - SC_ALLOW_ARG_MASK(__NR_mmap, 2, PROT_READ|PROT_WRITE|PROT_NONE), + SC_MMAP(__NR_mmap), #endif #ifdef __NR_mmap2 - SC_ALLOW_ARG_MASK(__NR_mmap2, 2, PROT_READ|PROT_WRITE|PROT_NONE), + SC_MMAP(__NR_mmap2), #endif #ifdef __NR_mprotect SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE), |