summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorKumar Kartikeya Dwivedi <memxor@gmail.com>2024-12-13 23:19:28 +0100
committerAlexei Starovoitov <ast@kernel.org>2024-12-14 01:24:53 +0100
commit838a10bd2ebfe11a60dd67687533a7cfc220cc86 (patch)
tree3cc4d93c038f8836b2577d53964c369346e659a6 /kernel
parentbpf: Revert "bpf: Mark raw_tp arguments with PTR_MAYBE_NULL" (diff)
downloadlinux-838a10bd2ebfe11a60dd67687533a7cfc220cc86.tar.xz
linux-838a10bd2ebfe11a60dd67687533a7cfc220cc86.zip
bpf: Augment raw_tp arguments with PTR_MAYBE_NULL
Arguments to a raw tracepoint are tagged as trusted, which carries the semantics that the pointer will be non-NULL. However, in certain cases, a raw tracepoint argument may end up being NULL. More context about this issue is available in [0]. Thus, there is a discrepancy between the reality, that raw_tp arguments can actually be NULL, and the verifier's knowledge, that they are never NULL, causing explicit NULL check branch to be dead code eliminated. A previous attempt [1], i.e. the second fixed commit, was made to simulate symbolic execution as if in most accesses, the argument is a non-NULL raw_tp, except for conditional jumps. This tried to suppress branch prediction while preserving compatibility, but surfaced issues with production programs that were difficult to solve without increasing verifier complexity. A more complete discussion of issues and fixes is available at [2]. Fix this by maintaining an explicit list of tracepoints where the arguments are known to be NULL, and mark the positional arguments as PTR_MAYBE_NULL. Additionally, capture the tracepoints where arguments are known to be ERR_PTR, and mark these arguments as scalar values to prevent potential dereference. Each hex digit is used to encode NULL-ness (0x1) or ERR_PTR-ness (0x2), shifted by the zero-indexed argument number x 4. This can be represented as follows: 1st arg: 0x1 2nd arg: 0x10 3rd arg: 0x100 ... and so on (likewise for ERR_PTR case). In the future, an automated pass will be used to produce such a list, or insert __nullable annotations automatically for tracepoints. Each compilation unit will be analyzed and results will be collated to find whether a tracepoint pointer is definitely not null, maybe null, or an unknown state where verifier conservatively marks it PTR_MAYBE_NULL. A proof of concept of this tool from Eduard is available at [3]. Note that in case we don't find a specification in the raw_tp_null_args array and the tracepoint belongs to a kernel module, we will conservatively mark the arguments as PTR_MAYBE_NULL. This is because unlike for in-tree modules, out-of-tree module tracepoints may pass NULL freely to the tracepoint. We don't protect against such tracepoints passing ERR_PTR (which is uncommon anyway), lest we mark all such arguments as SCALAR_VALUE. While we are it, let's adjust the test raw_tp_null to not perform dereference of the skb->mark, as that won't be allowed anymore, and make it more robust by using inline assembly to test the dead code elimination behavior, which should still stay the same. [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb [1]: https://lore.kernel.org/all/20241104171959.2938862-1-memxor@gmail.com [2]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com [3]: https://github.com/eddyz87/llvm-project/tree/nullness-for-tracepoint-params Reported-by: Juri Lelli <juri.lelli@redhat.com> # original bug Reported-by: Manu Bretelle <chantra@meta.com> # bugs in masking fix Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs") Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL") Reviewed-by: Eduard Zingerman <eddyz87@gmail.com> Co-developed-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20241213221929.3495062-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c138
1 files changed, 138 insertions, 0 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c4aa304028ce..e5a5f023cedd 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6439,6 +6439,101 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
return off;
}
+struct bpf_raw_tp_null_args {
+ const char *func;
+ u64 mask;
+};
+
+static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
+ /* sched */
+ { "sched_pi_setprio", 0x10 },
+ /* ... from sched_numa_pair_template event class */
+ { "sched_stick_numa", 0x100 },
+ { "sched_swap_numa", 0x100 },
+ /* afs */
+ { "afs_make_fs_call", 0x10 },
+ { "afs_make_fs_calli", 0x10 },
+ { "afs_make_fs_call1", 0x10 },
+ { "afs_make_fs_call2", 0x10 },
+ { "afs_protocol_error", 0x1 },
+ { "afs_flock_ev", 0x10 },
+ /* cachefiles */
+ { "cachefiles_lookup", 0x1 | 0x200 },
+ { "cachefiles_unlink", 0x1 },
+ { "cachefiles_rename", 0x1 },
+ { "cachefiles_prep_read", 0x1 },
+ { "cachefiles_mark_active", 0x1 },
+ { "cachefiles_mark_failed", 0x1 },
+ { "cachefiles_mark_inactive", 0x1 },
+ { "cachefiles_vfs_error", 0x1 },
+ { "cachefiles_io_error", 0x1 },
+ { "cachefiles_ondemand_open", 0x1 },
+ { "cachefiles_ondemand_copen", 0x1 },
+ { "cachefiles_ondemand_close", 0x1 },
+ { "cachefiles_ondemand_read", 0x1 },
+ { "cachefiles_ondemand_cread", 0x1 },
+ { "cachefiles_ondemand_fd_write", 0x1 },
+ { "cachefiles_ondemand_fd_release", 0x1 },
+ /* ext4, from ext4__mballoc event class */
+ { "ext4_mballoc_discard", 0x10 },
+ { "ext4_mballoc_free", 0x10 },
+ /* fib */
+ { "fib_table_lookup", 0x100 },
+ /* filelock */
+ /* ... from filelock_lock event class */
+ { "posix_lock_inode", 0x10 },
+ { "fcntl_setlk", 0x10 },
+ { "locks_remove_posix", 0x10 },
+ { "flock_lock_inode", 0x10 },
+ /* ... from filelock_lease event class */
+ { "break_lease_noblock", 0x10 },
+ { "break_lease_block", 0x10 },
+ { "break_lease_unblock", 0x10 },
+ { "generic_delete_lease", 0x10 },
+ { "time_out_leases", 0x10 },
+ /* host1x */
+ { "host1x_cdma_push_gather", 0x10000 },
+ /* huge_memory */
+ { "mm_khugepaged_scan_pmd", 0x10 },
+ { "mm_collapse_huge_page_isolate", 0x1 },
+ { "mm_khugepaged_scan_file", 0x10 },
+ { "mm_khugepaged_collapse_file", 0x10 },
+ /* kmem */
+ { "mm_page_alloc", 0x1 },
+ { "mm_page_pcpu_drain", 0x1 },
+ /* .. from mm_page event class */
+ { "mm_page_alloc_zone_locked", 0x1 },
+ /* netfs */
+ { "netfs_failure", 0x10 },
+ /* power */
+ { "device_pm_callback_start", 0x10 },
+ /* qdisc */
+ { "qdisc_dequeue", 0x1000 },
+ /* rxrpc */
+ { "rxrpc_recvdata", 0x1 },
+ { "rxrpc_resend", 0x10 },
+ /* sunrpc */
+ { "xs_stream_read_data", 0x1 },
+ /* ... from xprt_cong_event event class */
+ { "xprt_reserve_cong", 0x10 },
+ { "xprt_release_cong", 0x10 },
+ { "xprt_get_cong", 0x10 },
+ { "xprt_put_cong", 0x10 },
+ /* tcp */
+ { "tcp_send_reset", 0x11 },
+ /* tegra_apb_dma */
+ { "tegra_dma_tx_status", 0x100 },
+ /* timer_migration */
+ { "tmigr_update_events", 0x1 },
+ /* writeback, from writeback_folio_template event class */
+ { "writeback_dirty_folio", 0x10 },
+ { "folio_wait_writeback", 0x10 },
+ /* rdma */
+ { "mr_integ_alloc", 0x2000 },
+ /* bpf_testmod */
+ { "bpf_testmod_test_read", 0x0 },
+};
+
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -6449,6 +6544,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
+ bool ptr_err_raw_tp = false;
const char *tag_value;
u32 nr_args, arg;
int i, ret;
@@ -6597,6 +6693,39 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
info->reg_type |= PTR_MAYBE_NULL;
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
+ struct btf *btf = prog->aux->attach_btf;
+ const struct btf_type *t;
+ const char *tname;
+
+ /* BTF lookups cannot fail, return false on error */
+ t = btf_type_by_id(btf, prog->aux->attach_btf_id);
+ if (!t)
+ return false;
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname)
+ return false;
+ /* Checked by bpf_check_attach_target */
+ tname += sizeof("btf_trace_") - 1;
+ for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) {
+ /* Is this a func with potential NULL args? */
+ if (strcmp(tname, raw_tp_null_args[i].func))
+ continue;
+ if (raw_tp_null_args[i].mask & (0x1 << (arg * 4)))
+ info->reg_type |= PTR_MAYBE_NULL;
+ /* Is the current arg IS_ERR? */
+ if (raw_tp_null_args[i].mask & (0x2 << (arg * 4)))
+ ptr_err_raw_tp = true;
+ break;
+ }
+ /* If we don't know NULL-ness specification and the tracepoint
+ * is coming from a loadable module, be conservative and mark
+ * argument as PTR_MAYBE_NULL.
+ */
+ if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf))
+ info->reg_type |= PTR_MAYBE_NULL;
+ }
+
if (tgt_prog) {
enum bpf_prog_type tgt_type;
@@ -6641,6 +6770,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
tname, arg, info->btf_id, btf_type_str(t),
__btf_name_by_offset(btf, t->name_off));
+
+ /* Perform all checks on the validity of type for this argument, but if
+ * we know it can be IS_ERR at runtime, scrub pointer type and mark as
+ * scalar.
+ */
+ if (ptr_err_raw_tp) {
+ bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg);
+ info->reg_type = SCALAR_VALUE;
+ }
return true;
}
EXPORT_SYMBOL_GPL(btf_ctx_access);