diff options
author | Daniel Salzman <daniel.salzman@nic.cz> | 2022-12-17 12:43:44 +0100 |
---|---|---|
committer | Daniel Salzman <daniel.salzman@nic.cz> | 2022-12-19 20:45:25 +0100 |
commit | 2637da194b927e9dc6e7dcc6b661c1c41c2ddfd1 (patch) | |
tree | b82bf9d9becd807a48545d631b0624b522d28e57 | |
parent | tests-extra: set BIND to use full format for its zone files (diff) | |
download | knot-2637da194b927e9dc6e7dcc6b661c1c41c2ddfd1.tar.xz knot-2637da194b927e9dc6e7dcc6b661c1c41c2ddfd1.zip |
contrib: remove embedded libbpf 0.0.6, which no longer works with BTF and old kernels
Almost every distribution already has a newer libbpf in repository.
53 files changed, 21 insertions, 25635 deletions
diff --git a/Knot.files b/Knot.files index ee922b06a..150040f77 100644 --- a/Knot.files +++ b/Knot.files @@ -25,47 +25,6 @@ src/contrib/getline.c src/contrib/getline.h src/contrib/json.c src/contrib/json.h -src/contrib/libbpf/bpf/bpf.c -src/contrib/libbpf/bpf/bpf.h -src/contrib/libbpf/bpf/bpf_core_read.h -src/contrib/libbpf/bpf/bpf_endian.h -src/contrib/libbpf/bpf/bpf_helper_defs.h -src/contrib/libbpf/bpf/bpf_helpers.h -src/contrib/libbpf/bpf/bpf_prog_linfo.c -src/contrib/libbpf/bpf/bpf_tracing.h -src/contrib/libbpf/bpf/btf.c -src/contrib/libbpf/bpf/btf.h -src/contrib/libbpf/bpf/btf_dump.c -src/contrib/libbpf/bpf/hashmap.c -src/contrib/libbpf/bpf/hashmap.h -src/contrib/libbpf/bpf/libbpf.c -src/contrib/libbpf/bpf/libbpf.h -src/contrib/libbpf/bpf/libbpf_errno.c -src/contrib/libbpf/bpf/libbpf_internal.h -src/contrib/libbpf/bpf/libbpf_probes.c -src/contrib/libbpf/bpf/libbpf_util.h -src/contrib/libbpf/bpf/netlink.c -src/contrib/libbpf/bpf/nlattr.c -src/contrib/libbpf/bpf/nlattr.h -src/contrib/libbpf/bpf/str_error.c -src/contrib/libbpf/bpf/str_error.h -src/contrib/libbpf/bpf/xsk.c -src/contrib/libbpf/bpf/xsk.h -src/contrib/libbpf/include/asm/barrier.h -src/contrib/libbpf/include/linux/compiler.h -src/contrib/libbpf/include/linux/err.h -src/contrib/libbpf/include/linux/filter.h -src/contrib/libbpf/include/linux/kernel.h -src/contrib/libbpf/include/linux/list.h -src/contrib/libbpf/include/linux/overflow.h -src/contrib/libbpf/include/linux/ring_buffer.h -src/contrib/libbpf/include/linux/types.h -src/contrib/libbpf/include/uapi/linux/bpf.h -src/contrib/libbpf/include/uapi/linux/bpf_common.h -src/contrib/libbpf/include/uapi/linux/btf.h -src/contrib/libbpf/include/uapi/linux/if_link.h -src/contrib/libbpf/include/uapi/linux/if_xdp.h -src/contrib/libbpf/include/uapi/linux/netlink.h src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c src/contrib/libngtcp2/ngtcp2/crypto/shared.c src/contrib/libngtcp2/ngtcp2/crypto/shared.h diff --git a/Knot.includes b/Knot.includes index b93f4ee3b..4fbaa6ebd 100644 --- a/Knot.includes +++ b/Knot.includes @@ -1,13 +1,6 @@ src src/contrib src/contrib/dnstap -src/contrib/libbpf -src/contrib/libbpf/bpf -src/contrib/libbpf/include -src/contrib/libbpf/include/asm -src/contrib/libbpf/include/linux -src/contrib/libbpf/include/uapi -src/contrib/libbpf/include/uapi/linux src/contrib/libngtcp2 src/contrib/libngtcp2/ngtcp2 src/contrib/libngtcp2/ngtcp2/crypto diff --git a/configure.ac b/configure.ac index 8fecd828a..5a415f683 100644 --- a/configure.ac +++ b/configure.ac @@ -202,29 +202,18 @@ AC_ARG_ENABLE([xdp], [], [enable_xdp=auto]) AS_CASE([$enable_xdp], - [auto], [PKG_CHECK_MODULES([libbpf], [libbpf >= 0.0.6], [enable_xdp=yes], [enable_xdp=no])], - [yes], [PKG_CHECK_MODULES([libbpf], [libbpf >= 0.0.6], [enable_xdp=yes], - [PKG_CHECK_MODULES([libelf], [libelf], - [AC_CHECK_FUNC([reallocarray], - [enable_xdp=embedded - embedded_libbpf_CFLAGS="-I\$(top_srcdir)/src/contrib/libbpf/include -I\$(top_srcdir)/src/contrib/libbpf/include/uapi" - embedded_libbpf_LIBS=$libelf_LIBS - libbpf_CFLAGS="-I\$(top_srcdir)/src/contrib/libbpf -I\$(top_srcdir)/src/contrib/libbpf/include/uapi"], - [enable_xdp=no - AC_MSG_WARN([reallocarray not available])])], - [AC_MSG_ERROR([libelf is required])])])], + [auto], [PKG_CHECK_MODULES([libbpf], [libbpf], [enable_xdp=yes], [enable_xdp=no])], + [yes], [PKG_CHECK_MODULES([libbpf], [libbpf], [enable_xdp=yes], [ + AC_MSG_WARN([libbpf not available])])], [no], [], [*], [AC_MSG_ERROR([Invalid value of --enable-xdp.])] ) -AM_CONDITIONAL([EMBEDDED_LIBBPF], [test "$enable_xdp" = "embedded"]) AM_CONDITIONAL([ENABLE_XDP], [test "$enable_xdp" != "no"]) -AC_SUBST([embedded_libbpf_CFLAGS]) -AC_SUBST([embedded_libbpf_LIBS]) AS_IF([test "$enable_xdp" == "yes"], [ PKG_CHECK_MODULES([libxdp], [libxdp], [enable_xdp=libxdp], [enable_xdp=yes]) AS_IF([test "$enable_xdp" == "libxdp"], [ - AC_DEFINE([USE_LIBXDP], [1], [Use external libxdp and libbpf.]) + AC_DEFINE([USE_LIBXDP], [1], [Use libxdp.]) libbpf_CFLAGS="$libbpf_CFLAGS $libxdp_CFLAGS" libbpf_LIBS="$libbpf_LIBS $libxdp_LIBS" ])] diff --git a/distro/pkg/deb/control b/distro/pkg/deb/control index 8750c2192..a576ab732 100644 --- a/distro/pkg/deb/control +++ b/distro/pkg/deb/control @@ -13,6 +13,7 @@ Build-Depends: automake, debhelper (>= 11), dh-python, + libbpf-dev, libcap-ng-dev, libedit-dev, libelf-dev, diff --git a/distro/pkg/deb/copyright b/distro/pkg/deb/copyright index 7e0f90f92..794b280e4 100644 --- a/distro/pkg/deb/copyright +++ b/distro/pkg/deb/copyright @@ -35,18 +35,6 @@ Copyright: 2014, Farsight Security, Inc. <software@farsightsecurity.com> 2011-2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> License: GPL-3+ -Files: src/contrib/libbpf/* -Copyright: 2013-2015 Alexei Starovoitov <ast@kernel.org> - 2015 Wang Nan <wangnan0@huawei.com> - 2015 Huawei Inc. - 2017 Nicira, Inc. - 2019 Isovalent, Inc. - 2019 Netronome Systems, Inc. - 2003-2013 Thomas Graf <tgraf@suug.ch> - 2018-2019 Intel Corporation. - 2018-2019 Facebook -License: LGPL-2.1 - Files: src/contrib/libngtcp2/* Copyright: 2016-2022 ngtcp2 contributors 2012-2017 nghttp2 contributors @@ -128,23 +116,6 @@ License: LGPL-2.0 Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. -License: LGPL-2.1 - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - . - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - . - You should have received a copy of the GNU Lesser General Public License - along with this program; If not, see <http://www.gnu.org/licenses/>. - . - On Debian systems, the complete text of the GNU Lesser General Public - License version 2.1 can be found in `/usr/share/common-licenses/LGPL-2.1'. - License: 0BSD Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above diff --git a/distro/pkg/deb/rules b/distro/pkg/deb/rules index bfa0bbab2..8792f3ec4 100755 --- a/distro/pkg/deb/rules +++ b/distro/pkg/deb/rules @@ -80,6 +80,8 @@ override_dh_auto_install-arch: echo "XDP disabled"; \ touch $(CURDIR)/debian/tmp/usr/share/man/man8/kxdpgun.8; \ printf '#!/bin/sh\n\necho "kxdpgun not available"\n' > $(CURDIR)/debian/tmp/usr/sbin/kxdpgun; \ + echo "Stripping the XDP symbols"; \ + sed -i -E '/knot_xdp_|knot_eth_|knot_tcp_[crst]|knot_xquic_|KNOT_XDP_/d' $(LIBKNOT_SYMBOLS); \ fi override_dh_auto_install-indep: diff --git a/distro/pkg/rpm/knot.spec b/distro/pkg/rpm/knot.spec index d9f627f54..3800b305a 100644 --- a/distro/pkg/rpm/knot.spec +++ b/distro/pkg/rpm/knot.spec @@ -32,7 +32,7 @@ BuildRequires: libtool BuildRequires: make BuildRequires: gcc BuildRequires: pkgconfig(liburcu) -BuildRequires: pkgconfig(gnutls) >= 3.3 +BuildRequires: pkgconfig(gnutls) BuildRequires: pkgconfig(libedit) # Optional dependencies @@ -47,6 +47,8 @@ BuildRequires: pkgconfig(libfstrm) BuildRequires: pkgconfig(libprotobuf-c) # geoip dependencies BuildRequires: pkgconfig(libmaxminddb) +# XDP dependencies +BuildRequires: pkgconfig(libbpf) # Distro-dependent dependencies %if 0%{?suse_version} @@ -54,33 +56,20 @@ BuildRequires: python3-Sphinx BuildRequires: lmdb-devel BuildRequires: protobuf-c Requires(pre): pwdutils +%if 0%{?sle_version} != 150400 +BuildRequires: pkgconfig(libxdp) %endif -%if 0%{?rhel} && 0%{?rhel} <= 7 -BuildRequires: python-sphinx -BuildRequires: lmdb-devel %endif -%if 0%{?fedora} || 0%{?rhel} > 7 +%if 0%{?fedora} || 0%{?rhel} BuildRequires: python3-sphinx BuildRequires: pkgconfig(lmdb) -%endif - -%if 0%{?centos} == 7 || 0%{?rhel} == 7 -%define configure_xdp --enable-xdp=no -%else -%define use_xdp 1 -%if 0%{?rhel} == 8 || 0%{?suse_version} -# Use the embedded libbpf -%define use_xdp 1 -%define configure_xdp --enable-xdp=yes --enable-quic=yes -BuildRequires: pkgconfig(libelf) -%else -# XDP is auto-enabled when libbpf is present -%define configure_xdp --enable-quic=yes -BuildRequires: pkgconfig(libbpf) >= 0.0.6 -%if 0%{?fedora} >= 36 +%if 0%{?fedora} BuildRequires: pkgconfig(libxdp) %endif %endif + +%if 0%{?rhel} >= 9 || 0%{?suse_version} || 0%{?fedora} +%define configure_quic --enable-quic=yes %endif Requires(post): systemd %{_sbindir}/runuser @@ -176,7 +165,7 @@ CFLAGS="%{optflags} -DNDEBUG -Wno-unused" --with-moduledir=%{_libdir}/knot/modules-%{BASE_VERSION} \ --with-storage=/var/lib/knot \ %{?configure_db_sizes} \ - %{?configure_xdp} \ + %{?configure_quic} \ --disable-static \ --enable-dnstap=yes \ --with-module-dnstap=shared \ @@ -288,10 +277,8 @@ getent passwd knot >/dev/null || \ %{_bindir}/kdig %{_bindir}/khost %{_bindir}/knsupdate -%if 0%{?use_xdp} %{_sbindir}/kxdpgun %{_mandir}/man8/kxdpgun.* -%endif %{_mandir}/man1/kdig.* %{_mandir}/man1/khost.* %{_mandir}/man1/knsupdate.* diff --git a/src/contrib/Makefile.inc b/src/contrib/Makefile.inc index fe376f26f..2ca6aae01 100644 --- a/src/contrib/Makefile.inc +++ b/src/contrib/Makefile.inc @@ -13,8 +13,6 @@ EXTRA_DIST += \ contrib/licenses/0BSD \ contrib/licenses/BSD-3-Clause \ contrib/licenses/LGPL-2.0 \ - contrib/licenses/LGPL-2.1 \ - contrib/libbpf/LICENSE \ contrib/libngtcp2/LICENSE \ contrib/openbsd/LICENSE \ contrib/ucw/LICENSE \ @@ -83,57 +81,6 @@ libcontrib_la_SOURCES = \ contrib/vpool/vpool.c \ contrib/vpool/vpool.h -if EMBEDDED_LIBBPF -noinst_LTLIBRARIES += libembbpf.la - -libembbpf_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(embedded_libbpf_CFLAGS) -libembbpf_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) -libembbpf_LIBS = libembbpf.la $(embedded_libbpf_LIBS) - -libembbpf_la_SOURCES = \ - contrib/libbpf/include/asm/barrier.h \ - contrib/libbpf/include/linux/compiler.h \ - contrib/libbpf/include/linux/err.h \ - contrib/libbpf/include/linux/filter.h \ - contrib/libbpf/include/linux/kernel.h \ - contrib/libbpf/include/linux/list.h \ - contrib/libbpf/include/linux/overflow.h \ - contrib/libbpf/include/linux/ring_buffer.h \ - contrib/libbpf/include/linux/types.h \ - contrib/libbpf/include/uapi/linux/bpf_common.h \ - contrib/libbpf/include/uapi/linux/bpf.h \ - contrib/libbpf/include/uapi/linux/btf.h \ - contrib/libbpf/include/uapi/linux/if_link.h \ - contrib/libbpf/include/uapi/linux/if_xdp.h \ - contrib/libbpf/include/uapi/linux/netlink.h \ - contrib/libbpf/bpf/bpf.c \ - contrib/libbpf/bpf/bpf.h \ - contrib/libbpf/bpf/bpf_core_read.h \ - contrib/libbpf/bpf/bpf_endian.h \ - contrib/libbpf/bpf/bpf_helper_defs.h \ - contrib/libbpf/bpf/bpf_helpers.h \ - contrib/libbpf/bpf/bpf_prog_linfo.c \ - contrib/libbpf/bpf/bpf_tracing.h \ - contrib/libbpf/bpf/btf.c \ - contrib/libbpf/bpf/btf.h \ - contrib/libbpf/bpf/btf_dump.c \ - contrib/libbpf/bpf/hashmap.c \ - contrib/libbpf/bpf/hashmap.h \ - contrib/libbpf/bpf/libbpf.c \ - contrib/libbpf/bpf/libbpf.h \ - contrib/libbpf/bpf/libbpf_errno.c \ - contrib/libbpf/bpf/libbpf_internal.h \ - contrib/libbpf/bpf/libbpf_probes.c \ - contrib/libbpf/bpf/libbpf_util.h \ - contrib/libbpf/bpf/netlink.c \ - contrib/libbpf/bpf/nlattr.c \ - contrib/libbpf/bpf/nlattr.h \ - contrib/libbpf/bpf/str_error.c \ - contrib/libbpf/bpf/str_error.h \ - contrib/libbpf/bpf/xsk.c \ - contrib/libbpf/bpf/xsk.h -endif EMBEDDED_LIBBPF - if HAVE_LIBDNSTAP noinst_LTLIBRARIES += libdnstap.la diff --git a/src/contrib/libbpf/LICENSE b/src/contrib/libbpf/LICENSE deleted file mode 100644 index 149c7b0ed..000000000 --- a/src/contrib/libbpf/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../licenses/LGPL-2.1
\ No newline at end of file diff --git a/src/contrib/libbpf/bpf/bpf.c b/src/contrib/libbpf/bpf/bpf.c deleted file mode 100644 index 98596e153..000000000 --- a/src/contrib/libbpf/bpf/bpf.c +++ /dev/null @@ -1,710 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * common eBPF ELF operations. - * - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - */ - -#include <stdlib.h> -#include <string.h> -#include <memory.h> -#include <unistd.h> -#include <asm/unistd.h> -#include <errno.h> -#include <linux/bpf.h> -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" - -/* - * When building perf, unistd.h is overridden. __NR_bpf is - * required to be defined explicitly. - */ -#ifndef __NR_bpf -# if defined(__i386__) -# define __NR_bpf 357 -# elif defined(__x86_64__) -# define __NR_bpf 321 -# elif defined(__aarch64__) -# define __NR_bpf 280 -# elif defined(__sparc__) -# define __NR_bpf 349 -# elif defined(__s390__) -# define __NR_bpf 351 -# elif defined(__arc__) -# define __NR_bpf 280 -# else -# error __NR_bpf not defined. libbpf does not support your arch. -# endif -#endif - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, - unsigned int size) -{ - return syscall(__NR_bpf, cmd, attr, size); -} - -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) -{ - int fd; - - do { - fd = sys_bpf(BPF_PROG_LOAD, attr, size); - } while (fd < 0 && errno == EAGAIN); - - return fd; -} - -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - union bpf_attr attr; - - memset(&attr, '\0', sizeof(attr)); - - attr.map_type = create_attr->map_type; - attr.key_size = create_attr->key_size; - attr.value_size = create_attr->value_size; - attr.max_entries = create_attr->max_entries; - attr.map_flags = create_attr->map_flags; - if (create_attr->name) - memcpy(attr.map_name, create_attr->name, - min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); - attr.numa_node = create_attr->numa_node; - attr.btf_fd = create_attr->btf_fd; - attr.btf_key_type_id = create_attr->btf_key_type_id; - attr.btf_value_type_id = create_attr->btf_value_type_id; - attr.map_ifindex = create_attr->map_ifindex; - attr.inner_map_fd = create_attr->inner_map_fd; - - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); -} - -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) -{ - struct bpf_create_map_attr map_attr = {}; - - map_attr.name = name; - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; - if (node >= 0) { - map_attr.numa_node = node; - map_attr.map_flags |= BPF_F_NUMA_NODE; - } - - return bpf_create_map_xattr(&map_attr); -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) -{ - struct bpf_create_map_attr map_attr = {}; - - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; - - return bpf_create_map_xattr(&map_attr); -} - -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags) -{ - struct bpf_create_map_attr map_attr = {}; - - map_attr.name = name; - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; - - return bpf_create_map_xattr(&map_attr); -} - -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node) -{ - union bpf_attr attr; - - memset(&attr, '\0', sizeof(attr)); - - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = 4; - attr.inner_map_fd = inner_map_fd; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - if (name) - memcpy(attr.map_name, name, - min(strlen(name), BPF_OBJ_NAME_LEN - 1)); - - if (node >= 0) { - attr.map_flags |= BPF_F_NUMA_NODE; - attr.numa_node = node; - } - - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); -} - -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags) -{ - return bpf_create_map_in_map_node(map_type, name, key_size, - inner_map_fd, max_entries, map_flags, - -1); -} - -static void * -alloc_zero_tailing_info(const void *orecord, __u32 cnt, - __u32 actual_rec_size, __u32 expected_rec_size) -{ - __u64 info_len = (__u64)actual_rec_size * cnt; - void *info, *nrecord; - int i; - - info = malloc(info_len); - if (!info) - return NULL; - - /* zero out bytes kernel does not understand */ - nrecord = info; - for (i = 0; i < cnt; i++) { - memcpy(nrecord, orecord, expected_rec_size); - memset(nrecord + expected_rec_size, 0, - actual_rec_size - expected_rec_size); - orecord += actual_rec_size; - nrecord += actual_rec_size; - } - - return info; -} - -int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) -{ - void *finfo = NULL, *linfo = NULL; - union bpf_attr attr; - __u32 log_level; - int fd; - - if (!load_attr || !log_buf != !log_buf_sz) - return -EINVAL; - - log_level = load_attr->log_level; - if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) - return -EINVAL; - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = load_attr->prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_TRACING) { - attr.attach_btf_id = load_attr->attach_btf_id; - attr.attach_prog_fd = load_attr->attach_prog_fd; - } else { - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = load_attr->kern_version; - } - attr.insn_cnt = (__u32)load_attr->insns_cnt; - attr.insns = ptr_to_u64(load_attr->insns); - attr.license = ptr_to_u64(load_attr->license); - - attr.log_level = log_level; - if (log_level) { - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; - } else { - attr.log_buf = ptr_to_u64(NULL); - attr.log_size = 0; - } - - attr.prog_btf_fd = load_attr->prog_btf_fd; - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - attr.func_info = ptr_to_u64(load_attr->func_info); - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - attr.line_info = ptr_to_u64(load_attr->line_info); - if (load_attr->name) - memcpy(attr.prog_name, load_attr->name, - min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); - attr.prog_flags = load_attr->prog_flags; - - fd = sys_bpf_prog_load(&attr, sizeof(attr)); - if (fd >= 0) - return fd; - - /* After bpf_prog_load, the kernel may modify certain attributes - * to give user space a hint how to deal with loading failure. - * Check to see whether we can make some changes and load again. - */ - while (errno == E2BIG && (!finfo || !linfo)) { - if (!finfo && attr.func_info_cnt && - attr.func_info_rec_size < load_attr->func_info_rec_size) { - /* try with corrected func info records */ - finfo = alloc_zero_tailing_info(load_attr->func_info, - load_attr->func_info_cnt, - load_attr->func_info_rec_size, - attr.func_info_rec_size); - if (!finfo) - goto done; - - attr.func_info = ptr_to_u64(finfo); - attr.func_info_rec_size = load_attr->func_info_rec_size; - } else if (!linfo && attr.line_info_cnt && - attr.line_info_rec_size < - load_attr->line_info_rec_size) { - linfo = alloc_zero_tailing_info(load_attr->line_info, - load_attr->line_info_cnt, - load_attr->line_info_rec_size, - attr.line_info_rec_size); - if (!linfo) - goto done; - - attr.line_info = ptr_to_u64(linfo); - attr.line_info_rec_size = load_attr->line_info_rec_size; - } else { - break; - } - - fd = sys_bpf_prog_load(&attr, sizeof(attr)); - - if (fd >= 0) - goto done; - } - - if (log_level || !log_buf) - goto done; - - /* Try again with log */ - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; - attr.log_level = 1; - log_buf[0] = 0; - fd = sys_bpf_prog_load(&attr, sizeof(attr)); -done: - free(finfo); - free(linfo); - return fd; -} - -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz) -{ - struct bpf_load_program_attr load_attr; - - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = 0; - load_attr.name = NULL; - load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; - load_attr.license = license; - load_attr.kern_version = kern_version; - - return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); -} - -int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, __u32 prog_flags, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz, - int log_level) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = type; - attr.insn_cnt = (__u32)insns_cnt; - attr.insns = ptr_to_u64(insns); - attr.license = ptr_to_u64(license); - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; - attr.log_level = log_level; - log_buf[0] = 0; - attr.kern_version = kern_version; - attr.prog_flags = prog_flags; - - return sys_bpf_prog_load(&attr, sizeof(attr)); -} - -int bpf_map_update_elem(int fd, const void *key, const void *value, - __u64 flags) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - attr.key = ptr_to_u64(key); - attr.value = ptr_to_u64(value); - attr.flags = flags; - - return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_lookup_elem(int fd, const void *key, void *value) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - attr.key = ptr_to_u64(key); - attr.value = ptr_to_u64(value); - - return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - attr.key = ptr_to_u64(key); - attr.value = ptr_to_u64(value); - attr.flags = flags; - - return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - attr.key = ptr_to_u64(key); - attr.value = ptr_to_u64(value); - - return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_delete_elem(int fd, const void *key) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - attr.key = ptr_to_u64(key); - - return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); -} - -int bpf_map_get_next_key(int fd, const void *key, void *next_key) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - attr.key = ptr_to_u64(key); - attr.next_key = ptr_to_u64(next_key); - - return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); -} - -int bpf_map_freeze(int fd) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_fd = fd; - - return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); -} - -int bpf_obj_pin(int fd, const char *pathname) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.pathname = ptr_to_u64((void *)pathname); - attr.bpf_fd = fd; - - return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); -} - -int bpf_obj_get(const char *pathname) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.pathname = ptr_to_u64((void *)pathname); - - return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); -} - -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, - unsigned int flags) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; - attr.attach_type = type; - attr.attach_flags = flags; - - return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); -} - -int bpf_prog_detach(int target_fd, enum bpf_attach_type type) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_type = type; - - return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); -} - -int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; - attr.attach_type = type; - - return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); -} - -int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) -{ - union bpf_attr attr; - int ret; - - memset(&attr, 0, sizeof(attr)); - attr.query.target_fd = target_fd; - attr.query.attach_type = type; - attr.query.query_flags = query_flags; - attr.query.prog_cnt = *prog_cnt; - attr.query.prog_ids = ptr_to_u64(prog_ids); - - ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); - if (attach_flags) - *attach_flags = attr.query.attach_flags; - *prog_cnt = attr.query.prog_cnt; - return ret; -} - -int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, - void *data_out, __u32 *size_out, __u32 *retval, - __u32 *duration) -{ - union bpf_attr attr; - int ret; - - memset(&attr, 0, sizeof(attr)); - attr.test.prog_fd = prog_fd; - attr.test.data_in = ptr_to_u64(data); - attr.test.data_out = ptr_to_u64(data_out); - attr.test.data_size_in = size; - attr.test.repeat = repeat; - - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); - if (size_out) - *size_out = attr.test.data_size_out; - if (retval) - *retval = attr.test.retval; - if (duration) - *duration = attr.test.duration; - return ret; -} - -int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) -{ - union bpf_attr attr; - int ret; - - if (!test_attr->data_out && test_attr->data_size_out > 0) - return -EINVAL; - - memset(&attr, 0, sizeof(attr)); - attr.test.prog_fd = test_attr->prog_fd; - attr.test.data_in = ptr_to_u64(test_attr->data_in); - attr.test.data_out = ptr_to_u64(test_attr->data_out); - attr.test.data_size_in = test_attr->data_size_in; - attr.test.data_size_out = test_attr->data_size_out; - attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); - attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); - attr.test.ctx_size_in = test_attr->ctx_size_in; - attr.test.ctx_size_out = test_attr->ctx_size_out; - attr.test.repeat = test_attr->repeat; - - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); - test_attr->data_size_out = attr.test.data_size_out; - test_attr->ctx_size_out = attr.test.ctx_size_out; - test_attr->retval = attr.test.retval; - test_attr->duration = attr.test.duration; - return ret; -} - -static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) -{ - union bpf_attr attr; - int err; - - memset(&attr, 0, sizeof(attr)); - attr.start_id = start_id; - - err = sys_bpf(cmd, &attr, sizeof(attr)); - if (!err) - *next_id = attr.next_id; - - return err; -} - -int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) -{ - return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID); -} - -int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) -{ - return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID); -} - -int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id) -{ - return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID); -} - -int bpf_prog_get_fd_by_id(__u32 id) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.prog_id = id; - - return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); -} - -int bpf_map_get_fd_by_id(__u32 id) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.map_id = id; - - return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); -} - -int bpf_btf_get_fd_by_id(__u32 id) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.btf_id = id; - - return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); -} - -int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) -{ - union bpf_attr attr; - int err; - - memset(&attr, 0, sizeof(attr)); - attr.info.bpf_fd = prog_fd; - attr.info.info_len = *info_len; - attr.info.info = ptr_to_u64(info); - - err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); - if (!err) - *info_len = attr.info.info_len; - - return err; -} - -int bpf_raw_tracepoint_open(const char *name, int prog_fd) -{ - union bpf_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.raw_tracepoint.name = ptr_to_u64(name); - attr.raw_tracepoint.prog_fd = prog_fd; - - return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); -} - -int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, - bool do_log) -{ - union bpf_attr attr = {}; - int fd; - - attr.btf = ptr_to_u64(btf); - attr.btf_size = btf_size; - -retry: - if (do_log && log_buf && log_buf_size) { - attr.btf_log_level = 1; - attr.btf_log_size = log_buf_size; - attr.btf_log_buf = ptr_to_u64(log_buf); - } - - fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); - if (fd == -1 && !do_log && log_buf && log_buf_size) { - do_log = true; - goto retry; - } - - return fd; -} - -int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, - __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, - __u64 *probe_addr) -{ - union bpf_attr attr = {}; - int err; - - attr.task_fd_query.pid = pid; - attr.task_fd_query.fd = fd; - attr.task_fd_query.flags = flags; - attr.task_fd_query.buf = ptr_to_u64(buf); - attr.task_fd_query.buf_len = *buf_len; - - err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); - *buf_len = attr.task_fd_query.buf_len; - *prog_id = attr.task_fd_query.prog_id; - *fd_type = attr.task_fd_query.fd_type; - *probe_offset = attr.task_fd_query.probe_offset; - *probe_addr = attr.task_fd_query.probe_addr; - - return err; -} diff --git a/src/contrib/libbpf/bpf/bpf.h b/src/contrib/libbpf/bpf/bpf.h deleted file mode 100644 index 3c791fa8e..000000000 --- a/src/contrib/libbpf/bpf/bpf.h +++ /dev/null @@ -1,184 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * common eBPF ELF operations. - * - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - */ -#ifndef __LIBBPF_BPF_H -#define __LIBBPF_BPF_H - -#include <linux/bpf.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - -struct bpf_create_map_attr { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - __u32 inner_map_fd; -}; - -LIBBPF_API int -bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags, int node); -LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags); -LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags); -LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags, int node); -LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags); - -struct bpf_load_program_attr { - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - const char *name; - const struct bpf_insn *insns; - size_t insns_cnt; - const char *license; - union { - __u32 kern_version; - __u32 attach_prog_fd; - }; - union { - __u32 prog_ifindex; - __u32 attach_btf_id; - }; - __u32 prog_btf_fd; - __u32 func_info_rec_size; - const void *func_info; - __u32 func_info_cnt; - __u32 line_info_rec_size; - const void *line_info; - __u32 line_info_cnt; - __u32 log_level; - __u32 prog_flags; -}; - -/* Flags to direct loading requirements */ -#define MAPS_RELAX_COMPAT 0x01 - -/* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ -LIBBPF_API int -bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); -LIBBPF_API int bpf_load_program(enum bpf_prog_type type, - const struct bpf_insn *insns, size_t insns_cnt, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz); -LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, - const struct bpf_insn *insns, - size_t insns_cnt, __u32 prog_flags, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, - int log_level); - -LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, - __u64 flags); - -LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); -LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, - __u64 flags); -LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, - void *value); -LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); -LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); -LIBBPF_API int bpf_map_freeze(int fd); -LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); -LIBBPF_API int bpf_obj_get(const char *pathname); -LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, - enum bpf_attach_type type, unsigned int flags); -LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, - enum bpf_attach_type type); - -struct bpf_prog_test_run_attr { - int prog_fd; - int repeat; - const void *data_in; - __u32 data_size_in; - void *data_out; /* optional */ - __u32 data_size_out; /* in: max length of data_out - * out: length of data_out */ - __u32 retval; /* out: return code of the BPF program */ - __u32 duration; /* out: average per repetition in ns */ - const void *ctx_in; /* optional */ - __u32 ctx_size_in; - void *ctx_out; /* optional */ - __u32 ctx_size_out; /* in: max length of ctx_out - * out: length of cxt_out */ -}; - -LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); - -/* - * bpf_prog_test_run does not check that data_out is large enough. Consider - * using bpf_prog_test_run_xattr instead. - */ -LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, - __u32 size, void *data_out, __u32 *size_out, - __u32 *retval, __u32 *duration); -LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); -LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); -LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); -LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); -LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); -LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); -LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); -LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, - __u32 query_flags, __u32 *attach_flags, - __u32 *prog_ids, __u32 *prog_cnt); -LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); -LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, - __u32 log_buf_size, bool do_log); -LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, - __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, - __u64 *probe_offset, __u64 *probe_addr); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __LIBBPF_BPF_H */ diff --git a/src/contrib/libbpf/bpf/bpf_core_read.h b/src/contrib/libbpf/bpf/bpf_core_read.h deleted file mode 100644 index 7009dc90e..000000000 --- a/src/contrib/libbpf/bpf/bpf_core_read.h +++ /dev/null @@ -1,263 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_CORE_READ_H__ -#define __BPF_CORE_READ_H__ - -/* - * enum bpf_field_info_kind is passed as a second argument into - * __builtin_preserve_field_info() built-in to get a specific aspect of - * a field, captured as a first argument. __builtin_preserve_field_info(field, - * info_kind) returns __u32 integer and produces BTF field relocation, which - * is understood and processed by libbpf during BPF object loading. See - * selftests/bpf for examples. - */ -enum bpf_field_info_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, - BPF_FIELD_LSHIFT_U64 = 4, - BPF_FIELD_RSHIFT_U64 = 5, -}; - -#define __CORE_RELO(src, field, info) \ - __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ - bpf_probe_read((void *)dst, \ - __CORE_RELO(src, fld, BYTE_SIZE), \ - (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) -#else -/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so - * for big-endian we need to adjust destination pointer accordingly, based on - * field byte size - */ -#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ - bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ - __CORE_RELO(src, fld, BYTE_SIZE), \ - (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) -#endif - -/* - * Extract bitfield, identified by s->field, and return its value as u64. - * All this is done in relocatable manner, so bitfield changes such as - * signedness, bit size, offset changes, this will be handled automatically. - * This version of macro is using bpf_probe_read() to read underlying integer - * storage. Macro functions as an expression and its return type is - * bpf_probe_read()'s return value: 0, on success, <0 on error. - */ -#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ - unsigned long long val = 0; \ - \ - __CORE_BITFIELD_PROBE_READ(&val, s, field); \ - val <<= __CORE_RELO(s, field, LSHIFT_U64); \ - if (__CORE_RELO(s, field, SIGNED)) \ - val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ - else \ - val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ - val; \ -}) - -/* - * Extract bitfield, identified by s->field, and return its value as u64. - * This version of macro is using direct memory reads and should be used from - * BPF program types that support such functionality (e.g., typed raw - * tracepoints). - */ -#define BPF_CORE_READ_BITFIELD(s, field) ({ \ - const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ - unsigned long long val; \ - \ - switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ - case 1: val = *(const unsigned char *)p; \ - case 2: val = *(const unsigned short *)p; \ - case 4: val = *(const unsigned int *)p; \ - case 8: val = *(const unsigned long long *)p; \ - } \ - val <<= __CORE_RELO(s, field, LSHIFT_U64); \ - if (__CORE_RELO(s, field, SIGNED)) \ - val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ - else \ - val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ - val; \ -}) - -/* - * Convenience macro to check that field actually exists in target kernel's. - * Returns: - * 1, if matching field is present in target kernel; - * 0, if no matching field found. - */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) - -/* - * Convenience macro to get byte size of a field. Works for integers, - * struct/unions, pointers, arrays, and enums. - */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) - -/* - * bpf_core_read() abstracts away bpf_probe_read() call and captures offset - * relocation for source address using __builtin_preserve_access_index() - * built-in, provided by Clang. - * - * __builtin_preserve_access_index() takes as an argument an expression of - * taking an address of a field within struct/union. It makes compiler emit - * a relocation, which records BTF type ID describing root struct/union and an - * accessor string which describes exact embedded field that was used to take - * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. - * - * This relocation allows libbpf to adjust BPF instruction to use correct - * actual field offset, based on target kernel BTF type that matches original - * (local) BTF, used to record relocation. - */ -#define bpf_core_read(dst, sz, src) \ - bpf_probe_read(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) - -/* - * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() - * additionally emitting BPF CO-RE field relocation for specified source - * argument. - */ -#define bpf_core_read_str(dst, sz, src) \ - bpf_probe_read_str(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) - -#define ___concat(a, b) a ## b -#define ___apply(fn, n) ___concat(fn, n) -#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N - -/* - * return number of provided arguments; used for switch-based variadic macro - * definitions (see ___last, ___arrow, etc below) - */ -#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -/* - * return 0 if no arguments are passed, N - otherwise; used for - * recursively-defined macros to specify termination (0) case, and generic - * (N) case (e.g., ___read_ptrs, ___core_read) - */ -#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) - -#define ___last1(x) x -#define ___last2(a, x) x -#define ___last3(a, b, x) x -#define ___last4(a, b, c, x) x -#define ___last5(a, b, c, d, x) x -#define ___last6(a, b, c, d, e, x) x -#define ___last7(a, b, c, d, e, f, x) x -#define ___last8(a, b, c, d, e, f, g, x) x -#define ___last9(a, b, c, d, e, f, g, h, x) x -#define ___last10(a, b, c, d, e, f, g, h, i, x) x -#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) - -#define ___nolast2(a, _) a -#define ___nolast3(a, b, _) a, b -#define ___nolast4(a, b, c, _) a, b, c -#define ___nolast5(a, b, c, d, _) a, b, c, d -#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e -#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f -#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g -#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h -#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i -#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) - -#define ___arrow1(a) a -#define ___arrow2(a, b) a->b -#define ___arrow3(a, b, c) a->b->c -#define ___arrow4(a, b, c, d) a->b->c->d -#define ___arrow5(a, b, c, d, e) a->b->c->d->e -#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f -#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g -#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h -#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i -#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j -#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) - -#define ___type(...) typeof(___arrow(__VA_ARGS__)) - -#define ___read(read_fn, dst, src_type, src, accessor) \ - read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) - -/* "recursively" read a sequence of inner pointers using local __t var */ -#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); -#define ___rd_last(...) \ - ___read(bpf_core_read, &__t, \ - ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); -#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) -#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___read_ptrs(src, ...) \ - ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) - -#define ___core_read0(fn, dst, src, a) \ - ___read(fn, dst, ___type(src), src, a); -#define ___core_readN(fn, dst, src, ...) \ - ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ - ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ - ___last(__VA_ARGS__)); -#define ___core_read(fn, dst, src, a, ...) \ - ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ - src, a, ##__VA_ARGS__) - -/* - * BPF_CORE_READ_INTO() is a more performance-conscious variant of - * BPF_CORE_READ(), in which final field is read into user-provided storage. - * See BPF_CORE_READ() below for more details on general usage. - */ -#define BPF_CORE_READ_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ - }) - -/* - * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as - * BPF_CORE_READ() for intermediate pointers, but then executes (and returns - * corresponding error code) bpf_core_read_str() for final string read. - */ -#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ - }) - -/* - * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially - * when there are few pointer chasing steps. - * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: - * int x = s->a.b.c->d.e->f->g; - * can be succinctly achieved using BPF_CORE_READ as: - * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); - * - * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF - * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: - * 1. const void *__t = s->a.b.c; - * 2. __t = __t->d.e; - * 3. __t = __t->f; - * 4. return __t->g; - * - * Equivalence is logical, because there is a heavy type casting/preservation - * involved, as well as all the reads are happening through bpf_probe_read() - * calls using __builtin_preserve_access_index() to emit CO-RE relocations. - * - * N.B. Only up to 9 "field accessors" are supported, which should be more - * than enough for any practical purpose. - */ -#define BPF_CORE_READ(src, a, ...) \ - ({ \ - ___type(src, a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ - __r; \ - }) - -#endif - diff --git a/src/contrib/libbpf/bpf/bpf_endian.h b/src/contrib/libbpf/bpf/bpf_endian.h deleted file mode 100644 index fbe280084..000000000 --- a/src/contrib/libbpf/bpf/bpf_endian.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_ENDIAN__ -#define __BPF_ENDIAN__ - -#include <linux/stddef.h> -#include <linux/swab.h> - -/* LLVM's BPF target selects the endianness of the CPU - * it compiles on, or the user specifies (bpfel/bpfeb), - * respectively. The used __BYTE_ORDER__ is defined by - * the compiler, we cannot rely on __BYTE_ORDER from - * libc headers, since it doesn't reflect the actual - * requested byte order. - * - * Note, LLVM's BPF target has different __builtin_bswapX() - * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE - * in bpfel and bpfeb case, which means below, that we map - * to cpu_to_be16(). We could use it unconditionally in BPF - * case, but better not rely on it, so that this header here - * can be used from application and BPF program side, which - * use different targets. - */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define __bpf_ntohs(x) __builtin_bswap16(x) -# define __bpf_htons(x) __builtin_bswap16(x) -# define __bpf_constant_ntohs(x) ___constant_swab16(x) -# define __bpf_constant_htons(x) ___constant_swab16(x) -# define __bpf_ntohl(x) __builtin_bswap32(x) -# define __bpf_htonl(x) __builtin_bswap32(x) -# define __bpf_constant_ntohl(x) ___constant_swab32(x) -# define __bpf_constant_htonl(x) ___constant_swab32(x) -# define __bpf_be64_to_cpu(x) __builtin_bswap64(x) -# define __bpf_cpu_to_be64(x) __builtin_bswap64(x) -# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x) -# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define __bpf_ntohs(x) (x) -# define __bpf_htons(x) (x) -# define __bpf_constant_ntohs(x) (x) -# define __bpf_constant_htons(x) (x) -# define __bpf_ntohl(x) (x) -# define __bpf_htonl(x) (x) -# define __bpf_constant_ntohl(x) (x) -# define __bpf_constant_htonl(x) (x) -# define __bpf_be64_to_cpu(x) (x) -# define __bpf_cpu_to_be64(x) (x) -# define __bpf_constant_be64_to_cpu(x) (x) -# define __bpf_constant_cpu_to_be64(x) (x) -#else -# error "Fix your compiler's __BYTE_ORDER__?!" -#endif - -#define bpf_htons(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_htons(x) : __bpf_htons(x)) -#define bpf_ntohs(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_ntohs(x) : __bpf_ntohs(x)) -#define bpf_htonl(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_htonl(x) : __bpf_htonl(x)) -#define bpf_ntohl(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_ntohl(x) : __bpf_ntohl(x)) -#define bpf_cpu_to_be64(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) -#define bpf_be64_to_cpu(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) - -#endif /* __BPF_ENDIAN__ */ diff --git a/src/contrib/libbpf/bpf/bpf_helper_defs.h b/src/contrib/libbpf/bpf/bpf_helper_defs.h deleted file mode 100644 index 1f357f667..000000000 --- a/src/contrib/libbpf/bpf/bpf_helper_defs.h +++ /dev/null @@ -1,2759 +0,0 @@ -/* This is auto-generated file. See bpf_helpers_doc.py for details. */ - -/* Forward declarations of BPF structs */ -struct bpf_fib_lookup; -struct bpf_perf_event_data; -struct bpf_perf_event_value; -struct bpf_sock; -struct bpf_sock_addr; -struct bpf_sock_ops; -struct bpf_sock_tuple; -struct bpf_spin_lock; -struct bpf_sysctl; -struct bpf_tcp_sock; -struct bpf_tunnel_key; -struct bpf_xfrm_state; -struct pt_regs; -struct sk_reuseport_md; -struct sockaddr; -struct tcphdr; -struct __sk_buff; -struct sk_msg_md; -struct xdp_md; - -/* - * bpf_map_lookup_elem - * - * Perform a lookup in *map* for an entry associated to *key*. - * - * Returns - * Map value associated to *key*, or **NULL** if no entry was - * found. - */ -static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1; - -/* - * bpf_map_update_elem - * - * Add or update the value of the entry associated to *key* in - * *map* with *value*. *flags* is one of: - * - * **BPF_NOEXIST** - * The entry for *key* must not exist in the map. - * **BPF_EXIST** - * The entry for *key* must already exist in the map. - * **BPF_ANY** - * No condition on the existence of the entry for *key*. - * - * Flag value **BPF_NOEXIST** cannot be used for maps of types - * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all - * elements always exist), the helper would return an error. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2; - -/* - * bpf_map_delete_elem - * - * Delete entry with *key* from *map*. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3; - -/* - * bpf_probe_read - * - * For tracing programs, safely attempt to read *size* bytes from - * kernel space address *unsafe_ptr* and store the data in *dst*. - * - * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() - * instead. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4; - -/* - * bpf_ktime_get_ns - * - * Return the time elapsed since system boot, in nanoseconds. - * - * Returns - * Current *ktime*. - */ -static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5; - -/* - * bpf_trace_printk - * - * This helper is a "printk()-like" facility for debugging. It - * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if - * available. It can take up to three additional **u64** - * arguments (as an eBPF helpers, the total number of arguments is - * limited to five). - * - * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. - * The format of the trace is customizable, and the exact output - * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the - * *README* file under the same directory). However, it usually - * defaults to something like: - * - * :: - * - * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> - * - * In the above: - * - * * ``telnet`` is the name of the current task. - * * ``470`` is the PID of the current task. - * * ``001`` is the CPU number on which the task is - * running. - * * In ``.N..``, each character refers to a set of - * options (whether irqs are enabled, scheduling - * options, whether hard/softirqs are running, level of - * preempt_disabled respectively). **N** means that - * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** - * are set. - * * ``419421.045894`` is a timestamp. - * * ``0x00000001`` is a fake value used by BPF for the - * instruction pointer register. - * * ``<formatted msg>`` is the message formatted with - * *fmt*. - * - * The conversion specifiers supported by *fmt* are similar, but - * more limited than for printk(). They are **%d**, **%i**, - * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, - * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size - * of field, padding with zeroes, etc.) is available, and the - * helper will return **-EINVAL** (but print nothing) if it - * encounters an unknown specifier. - * - * Also, note that **bpf_trace_printk**\ () is slow, and should - * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and - * states that the helper should not be used "for production use" - * the first time this helper is used (or more precisely, when - * **trace_printk**\ () buffers are allocated). For passing values - * to user space, perf events should be preferred. - * - * Returns - * The number of bytes written to the buffer, or a negative error - * in case of failure. - */ -static int (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6; - -/* - * bpf_get_prandom_u32 - * - * Get a pseudo-random number. - * - * From a security point of view, this helper uses its own - * pseudo-random internal state, and cannot be used to infer the - * seed of other random functions in the kernel. However, it is - * essential to note that the generator used by the helper is not - * cryptographically secure. - * - * Returns - * A random 32-bit unsigned value. - */ -static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7; - -/* - * bpf_get_smp_processor_id - * - * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the - * SMP processor id is stable during all the execution of the - * program. - * - * Returns - * The SMP id of the processor running the program. - */ -static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8; - -/* - * bpf_skb_store_bytes - * - * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9; - -/* - * bpf_l3_csum_replace - * - * Recompute the layer 3 (e.g. IP) checksum for the packet - * associated to *skb*. Computation is incremental, so the helper - * must know the former value of the header field that was - * modified (*from*), the new value of this field (*to*), and the - * number of bytes (2 or 4) for this field, stored in *size*. - * Alternatively, it is possible to store the difference between - * the previous and the new values of the header field in *to*, by - * setting *from* and *size* to 0. For both methods, *offset* - * indicates the location of the IP checksum within the packet. - * - * This helper works in combination with **bpf_csum_diff**\ (), - * which does not update the checksum in-place, but offers more - * flexibility and can handle sizes larger than 2 or 4 for the - * checksum to update. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10; - -/* - * bpf_l4_csum_replace - * - * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the - * packet associated to *skb*. Computation is incremental, so the - * helper must know the former value of the header field that was - * modified (*from*), the new value of this field (*to*), and the - * number of bytes (2 or 4) for this field, stored on the lowest - * four bits of *flags*. Alternatively, it is possible to store - * the difference between the previous and the new values of the - * header field in *to*, by setting *from* and the four lowest - * bits of *flags* to 0. For both methods, *offset* indicates the - * location of the IP checksum within the packet. In addition to - * the size of the field, *flags* can be added (bitwise OR) actual - * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left - * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and - * for updates resulting in a null checksum the value is set to - * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. - * - * This helper works in combination with **bpf_csum_diff**\ (), - * which does not update the checksum in-place, but offers more - * flexibility and can handle sizes larger than 2 or 4 for the - * checksum to update. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11; - -/* - * bpf_tail_call - * - * This special helper is used to trigger a "tail call", or in - * other words, to jump into another eBPF program. The same stack - * frame is used (but values on stack and in registers for the - * caller are not accessible to the callee). This mechanism allows - * for program chaining, either for raising the maximum number of - * available eBPF instructions, or to execute given programs in - * conditional blocks. For security reasons, there is an upper - * limit to the number of successive tail calls that can be - * performed. - * - * Upon call of this helper, the program attempts to jump into a - * program referenced at index *index* in *prog_array_map*, a - * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes - * *ctx*, a pointer to the context. - * - * If the call succeeds, the kernel immediately runs the first - * instruction of the new program. This is not a function call, - * and it never returns to the previous program. If the call - * fails, then the helper has no effect, and the caller continues - * to run its subsequent instructions. A call can fail if the - * destination program for the jump does not exist (i.e. *index* - * is superior to the number of entries in *prog_array_map*), or - * if the maximum number of tail calls has been reached for this - * chain of programs. This limit is defined in the kernel by the - * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12; - -/* - * bpf_clone_redirect - * - * Clone and redirect the packet associated to *skb* to another - * net device of index *ifindex*. Both ingress and egress - * interfaces can be used for redirection. The **BPF_F_INGRESS** - * value in *flags* is used to make the distinction (ingress path - * is selected if the flag is present, egress path otherwise). - * This is the only flag supported for now. - * - * In comparison with **bpf_redirect**\ () helper, - * **bpf_clone_redirect**\ () has the associated cost of - * duplicating the packet buffer, but this can be executed out of - * the eBPF program. Conversely, **bpf_redirect**\ () is more - * efficient, but it is handled through an action code where the - * redirection happens only after the eBPF program has returned. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13; - -/* - * bpf_get_current_pid_tgid - * - * - * Returns - * A 64-bit integer containing the current tgid and pid, and - * created as such: - * *current_task*\ **->tgid << 32 \|** - * *current_task*\ **->pid**. - */ -static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14; - -/* - * bpf_get_current_uid_gid - * - * - * Returns - * A 64-bit integer containing the current GID and UID, and - * created as such: *current_gid* **<< 32 \|** *current_uid*. - */ -static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15; - -/* - * bpf_get_current_comm - * - * Copy the **comm** attribute of the current task into *buf* of - * *size_of_buf*. The **comm** attribute contains the name of - * the executable (excluding the path) for the current task. The - * *size_of_buf* must be strictly positive. On success, the - * helper makes sure that the *buf* is NUL-terminated. On failure, - * it is filled with zeroes. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16; - -/* - * bpf_get_cgroup_classid - * - * Retrieve the classid for the current task, i.e. for the net_cls - * cgroup to which *skb* belongs. - * - * This helper can be used on TC egress path, but not on ingress. - * - * The net_cls cgroup provides an interface to tag network packets - * based on a user-provided identifier for all traffic coming from - * the tasks belonging to the related cgroup. See also the related - * kernel documentation, available from the Linux sources in file - * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. - * - * The Linux kernel has two versions for cgroups: there are - * cgroups v1 and cgroups v2. Both are available to users, who can - * use a mixture of them, but note that the net_cls cgroup is for - * cgroup v1 only. This makes it incompatible with BPF programs - * run on cgroups, which is a cgroup-v2-only feature (a socket can - * only hold data for one version of cgroups at a time). - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to - * "**y**" or to "**m**". - * - * Returns - * The classid, or 0 for the default unconfigured classid. - */ -static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17; - -/* - * bpf_skb_vlan_push - * - * Push a *vlan_tci* (VLAN tag control information) of protocol - * *vlan_proto* to the packet associated to *skb*, then update - * the checksum. Note that if *vlan_proto* is different from - * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to - * be **ETH_P_8021Q**. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18; - -/* - * bpf_skb_vlan_pop - * - * Pop a VLAN header from the packet associated to *skb*. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19; - -/* - * bpf_skb_get_tunnel_key - * - * Get tunnel metadata. This helper takes a pointer *key* to an - * empty **struct bpf_tunnel_key** of **size**, that will be - * filled with tunnel metadata for the packet associated to *skb*. - * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which - * indicates that the tunnel is based on IPv6 protocol instead of - * IPv4. - * - * The **struct bpf_tunnel_key** is an object that generalizes the - * principal parameters used by various tunneling protocols into a - * single struct. This way, it can be used to easily make a - * decision based on the contents of the encapsulation header, - * "summarized" in this struct. In particular, it holds the IP - * address of the remote end (IPv4 or IPv6, depending on the case) - * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, - * this struct exposes the *key*\ **->tunnel_id**, which is - * generally mapped to a VNI (Virtual Network Identifier), making - * it programmable together with the **bpf_skb_set_tunnel_key**\ - * () helper. - * - * Let's imagine that the following code is part of a program - * attached to the TC ingress interface, on one end of a GRE - * tunnel, and is supposed to filter out all messages coming from - * remote ends with IPv4 address other than 10.0.0.1: - * - * :: - * - * int ret; - * struct bpf_tunnel_key key = {}; - * - * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - * if (ret < 0) - * return TC_ACT_SHOT; // drop packet - * - * if (key.remote_ipv4 != 0x0a000001) - * return TC_ACT_SHOT; // drop packet - * - * return TC_ACT_OK; // accept packet - * - * This interface can also be used with all encapsulation devices - * that can operate in "collect metadata" mode: instead of having - * one network device per specific configuration, the "collect - * metadata" mode only requires a single device where the - * configuration can be extracted from this helper. - * - * This can be used together with various tunnels such as VXLan, - * Geneve, GRE or IP in IP (IPIP). - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20; - -/* - * bpf_skb_set_tunnel_key - * - * Populate tunnel metadata for packet associated to *skb.* The - * tunnel metadata is set to the contents of *key*, of *size*. The - * *flags* can be set to a combination of the following values: - * - * **BPF_F_TUNINFO_IPV6** - * Indicate that the tunnel is based on IPv6 protocol - * instead of IPv4. - * **BPF_F_ZERO_CSUM_TX** - * For IPv4 packets, add a flag to tunnel metadata - * indicating that checksum computation should be skipped - * and checksum set to zeroes. - * **BPF_F_DONT_FRAGMENT** - * Add a flag to tunnel metadata indicating that the - * packet should not be fragmented. - * **BPF_F_SEQ_NUMBER** - * Add a flag to tunnel metadata indicating that a - * sequence number should be added to tunnel header before - * sending the packet. This flag was added for GRE - * encapsulation, but might be used with other protocols - * as well in the future. - * - * Here is a typical usage on the transmit path: - * - * :: - * - * struct bpf_tunnel_key key; - * populate key ... - * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); - * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); - * - * See also the description of the **bpf_skb_get_tunnel_key**\ () - * helper for additional information. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21; - -/* - * bpf_perf_event_read - * - * Read the value of a perf event counter. This helper relies on a - * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of - * the perf event counter is selected when *map* is updated with - * perf event file descriptors. The *map* is an array whose size - * is the number of available CPUs, and each cell contains a value - * relative to one CPU. The value to retrieve is indicated by - * *flags*, that contains the index of the CPU to look up, masked - * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to - * **BPF_F_CURRENT_CPU** to indicate that the value for the - * current CPU should be retrieved. - * - * Note that before Linux 4.13, only hardware perf event can be - * retrieved. - * - * Also, be aware that the newer helper - * **bpf_perf_event_read_value**\ () is recommended over - * **bpf_perf_event_read**\ () in general. The latter has some ABI - * quirks where error and counter value are used as a return code - * (which is wrong to do since ranges may overlap). This issue is - * fixed with **bpf_perf_event_read_value**\ (), which at the same - * time provides more features over the **bpf_perf_event_read**\ - * () interface. Please refer to the description of - * **bpf_perf_event_read_value**\ () for details. - * - * Returns - * The value of the perf event counter read from the map, or a - * negative error code in case of failure. - */ -static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22; - -/* - * bpf_redirect - * - * Redirect the packet to another net device of index *ifindex*. - * This helper is somewhat similar to **bpf_clone_redirect**\ - * (), except that the packet is not cloned, which provides - * increased performance. - * - * Except for XDP, both ingress and egress interfaces can be used - * for redirection. The **BPF_F_INGRESS** value in *flags* is used - * to make the distinction (ingress path is selected if the flag - * is present, egress path otherwise). Currently, XDP only - * supports redirection to the egress interface, and accepts no - * flag at all. - * - * The same effect can be attained with the more generic - * **bpf_redirect_map**\ (), which requires specific maps to be - * used but offers better performance. - * - * Returns - * For XDP, the helper returns **XDP_REDIRECT** on success or - * **XDP_ABORTED** on error. For other program types, the values - * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on - * error. - */ -static int (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23; - -/* - * bpf_get_route_realm - * - * Retrieve the realm or the route, that is to say the - * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the - * one used with the net_cls cgroup (see description for - * **bpf_get_cgroup_classid**\ () helper), but here this tag is - * held by a route (a destination entry), not by a task. - * - * Retrieving this identifier works with the clsact TC egress hook - * (see also **tc-bpf(8)**), or alternatively on conventional - * classful egress qdiscs, but not on TC ingress path. In case of - * clsact TC egress hook, this has the advantage that, internally, - * the destination entry has not been dropped yet in the transmit - * path. Therefore, the destination entry does not need to be - * artificially held via **netif_keep_dst**\ () for a classful - * qdisc until the *skb* is freed. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_IP_ROUTE_CLASSID** configuration option. - * - * Returns - * The realm of the route for the packet associated to *skb*, or 0 - * if none was found. - */ -static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24; - -/* - * bpf_perf_event_output - * - * Write raw *data* blob into a special BPF perf event held by - * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf - * event must have the following attributes: **PERF_SAMPLE_RAW** - * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and - * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. - * - * The *flags* are used to indicate the index in *map* for which - * the value must be put, masked with **BPF_F_INDEX_MASK**. - * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** - * to indicate that the index of the current CPU core should be - * used. - * - * The value to write, of *size*, is passed through eBPF stack and - * pointed by *data*. - * - * The context of the program *ctx* needs also be passed to the - * helper. - * - * On user space, a program willing to read the values needs to - * call **perf_event_open**\ () on the perf event (either for - * one or for all CPUs) and to store the file descriptor into the - * *map*. This must be done before the eBPF program can send data - * into it. An example is available in file - * *samples/bpf/trace_output_user.c* in the Linux kernel source - * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). - * - * **bpf_perf_event_output**\ () achieves better performance - * than **bpf_trace_printk**\ () for sharing data with user - * space, and is much better suitable for streaming data from eBPF - * programs. - * - * Note that this helper is not restricted to tracing use cases - * and can be used with programs attached to TC or XDP as well, - * where it allows for passing data to user space listeners. Data - * can be: - * - * * Only custom structs, - * * Only the packet payload, or - * * A combination of both. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25; - -/* - * bpf_skb_load_bytes - * - * This helper was provided as an easy way to load data from a - * packet. It can be used to load *len* bytes from *offset* from - * the packet associated to *skb*, into the buffer pointed by - * *to*. - * - * Since Linux 4.7, usage of this helper has mostly been replaced - * by "direct packet access", enabling packet data to be - * manipulated with *skb*\ **->data** and *skb*\ **->data_end** - * pointing respectively to the first byte of packet data and to - * the byte after the last byte of packet data. However, it - * remains useful if one wishes to read large quantities of data - * at once from a packet into the eBPF stack. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26; - -/* - * bpf_get_stackid - * - * Walk a user or a kernel stack and return its id. To achieve - * this, the helper needs *ctx*, which is a pointer to the context - * on which the tracing program is executed, and a pointer to a - * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. - * - * The last argument, *flags*, holds the number of stack frames to - * skip (from 0 to 255), masked with - * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set - * a combination of the following flags: - * - * **BPF_F_USER_STACK** - * Collect a user space stack instead of a kernel stack. - * **BPF_F_FAST_STACK_CMP** - * Compare stacks by hash only. - * **BPF_F_REUSE_STACKID** - * If two different stacks hash into the same *stackid*, - * discard the old one. - * - * The stack id retrieved is a 32 bit long integer handle which - * can be further combined with other data (including other stack - * ids) and used as a key into maps. This can be useful for - * generating a variety of graphs (such as flame graphs or off-cpu - * graphs). - * - * For walking a stack, this helper is an improvement over - * **bpf_probe_read**\ (), which can be used with unrolled loops - * but is not efficient and consumes a lot of eBPF instructions. - * Instead, **bpf_get_stackid**\ () can collect up to - * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that - * this limit can be controlled with the **sysctl** program, and - * that it should be manually increased in order to profile long - * user stacks (such as stacks for Java programs). To do so, use: - * - * :: - * - * # sysctl kernel.perf_event_max_stack=<new value> - * - * Returns - * The positive or null stack id on success, or a negative error - * in case of failure. - */ -static int (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27; - -/* - * bpf_csum_diff - * - * Compute a checksum difference, from the raw buffer pointed by - * *from*, of length *from_size* (that must be a multiple of 4), - * towards the raw buffer pointed by *to*, of size *to_size* - * (same remark). An optional *seed* can be added to the value - * (this can be cascaded, the seed may come from a previous call - * to the helper). - * - * This is flexible enough to be used in several ways: - * - * * With *from_size* == 0, *to_size* > 0 and *seed* set to - * checksum, it can be used when pushing new data. - * * With *from_size* > 0, *to_size* == 0 and *seed* set to - * checksum, it can be used when removing data from a packet. - * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it - * can be used to compute a diff. Note that *from_size* and - * *to_size* do not need to be equal. - * - * This helper can be used in combination with - * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to - * which one can feed in the difference computed with - * **bpf_csum_diff**\ (). - * - * Returns - * The checksum result, or a negative error code in case of - * failure. - */ -static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28; - -/* - * bpf_skb_get_tunnel_opt - * - * Retrieve tunnel options metadata for the packet associated to - * *skb*, and store the raw tunnel option data to the buffer *opt* - * of *size*. - * - * This helper can be used with encapsulation devices that can - * operate in "collect metadata" mode (please refer to the related - * note in the description of **bpf_skb_get_tunnel_key**\ () for - * more details). A particular example where this can be used is - * in combination with the Geneve encapsulation protocol, where it - * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) - * and retrieving arbitrary TLVs (Type-Length-Value headers) from - * the eBPF program. This allows for full customization of these - * headers. - * - * Returns - * The size of the option data retrieved. - */ -static int (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29; - -/* - * bpf_skb_set_tunnel_opt - * - * Set tunnel options metadata for the packet associated to *skb* - * to the option data contained in the raw buffer *opt* of *size*. - * - * See also the description of the **bpf_skb_get_tunnel_opt**\ () - * helper for additional information. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30; - -/* - * bpf_skb_change_proto - * - * Change the protocol of the *skb* to *proto*. Currently - * supported are transition from IPv4 to IPv6, and from IPv6 to - * IPv4. The helper takes care of the groundwork for the - * transition, including resizing the socket buffer. The eBPF - * program is expected to fill the new headers, if any, via - * **skb_store_bytes**\ () and to recompute the checksums with - * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ - * (). The main case for this helper is to perform NAT64 - * operations out of an eBPF program. - * - * Internally, the GSO type is marked as dodgy so that headers are - * checked and segments are recalculated by the GSO/GRO engine. - * The size for GSO target is adapted as well. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31; - -/* - * bpf_skb_change_type - * - * Change the packet type for the packet associated to *skb*. This - * comes down to setting *skb*\ **->pkt_type** to *type*, except - * the eBPF program does not have a write access to *skb*\ - * **->pkt_type** beside this helper. Using a helper here allows - * for graceful handling of errors. - * - * The major use case is to change incoming *skb*s to - * **PACKET_HOST** in a programmatic way instead of having to - * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for - * example. - * - * Note that *type* only allows certain values. At this time, they - * are: - * - * **PACKET_HOST** - * Packet is for us. - * **PACKET_BROADCAST** - * Send packet to all. - * **PACKET_MULTICAST** - * Send packet to group. - * **PACKET_OTHERHOST** - * Send packet to someone else. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32; - -/* - * bpf_skb_under_cgroup - * - * Check whether *skb* is a descendant of the cgroup2 held by - * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. - * - * Returns - * The return value depends on the result of the test, and can be: - * - * * 0, if the *skb* failed the cgroup2 descendant test. - * * 1, if the *skb* succeeded the cgroup2 descendant test. - * * A negative error code, if an error occurred. - */ -static int (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33; - -/* - * bpf_get_hash_recalc - * - * Retrieve the hash of the packet, *skb*\ **->hash**. If it is - * not set, in particular if the hash was cleared due to mangling, - * recompute this hash. Later accesses to the hash can be done - * directly with *skb*\ **->hash**. - * - * Calling **bpf_set_hash_invalid**\ (), changing a packet - * prototype with **bpf_skb_change_proto**\ (), or calling - * **bpf_skb_store_bytes**\ () with the - * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear - * the hash and to trigger a new computation for the next call to - * **bpf_get_hash_recalc**\ (). - * - * Returns - * The 32-bit hash. - */ -static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34; - -/* - * bpf_get_current_task - * - * - * Returns - * A pointer to the current task struct. - */ -static __u64 (*bpf_get_current_task)(void) = (void *) 35; - -/* - * bpf_probe_write_user - * - * Attempt in a safe way to write *len* bytes from the buffer - * *src* to *dst* in memory. It only works for threads that are in - * user context, and *dst* must be a valid user space address. - * - * This helper should not be used to implement any kind of - * security mechanism because of TOC-TOU attacks, but rather to - * debug, divert, and manipulate execution of semi-cooperative - * processes. - * - * Keep in mind that this feature is meant for experiments, and it - * has a risk of crashing the system and running programs. - * Therefore, when an eBPF program using this helper is attached, - * a warning including PID and process name is printed to kernel - * logs. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36; - -/* - * bpf_current_task_under_cgroup - * - * Check whether the probe is being run is the context of a given - * subset of the cgroup2 hierarchy. The cgroup2 to test is held by - * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. - * - * Returns - * The return value depends on the result of the test, and can be: - * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. - * * A negative error code, if an error occurred. - */ -static int (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37; - -/* - * bpf_skb_change_tail - * - * Resize (trim or grow) the packet associated to *skb* to the - * new *len*. The *flags* are reserved for future usage, and must - * be left at zero. - * - * The basic idea is that the helper performs the needed work to - * change the size of the packet, then the eBPF program rewrites - * the rest via helpers like **bpf_skb_store_bytes**\ (), - * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () - * and others. This helper is a slow path utility intended for - * replies with control messages. And because it is targeted for - * slow path, the helper itself can afford to be slow: it - * implicitly linearizes, unclones and drops offloads from the - * *skb*. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38; - -/* - * bpf_skb_pull_data - * - * Pull in non-linear data in case the *skb* is non-linear and not - * all of *len* are part of the linear section. Make *len* bytes - * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. - * - * This helper is only needed for reading and writing with direct - * packet access. - * - * For direct packet access, testing that offsets to access - * are within packet boundaries (test on *skb*\ **->data_end**) is - * susceptible to fail if offsets are invalid, or if the requested - * data is in non-linear parts of the *skb*. On failure the - * program can just bail out, or in the case of a non-linear - * buffer, use a helper to make the data available. The - * **bpf_skb_load_bytes**\ () helper is a first solution to access - * the data. Another one consists in using **bpf_skb_pull_data** - * to pull in once the non-linear parts, then retesting and - * eventually access the data. - * - * At the same time, this also makes sure the *skb* is uncloned, - * which is a necessary condition for direct write. As this needs - * to be an invariant for the write part only, the verifier - * detects writes and adds a prologue that is calling - * **bpf_skb_pull_data()** to effectively unclone the *skb* from - * the very beginning in case it is indeed cloned. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39; - -/* - * bpf_csum_update - * - * Add the checksum *csum* into *skb*\ **->csum** in case the - * driver has supplied a checksum for the entire packet into that - * field. Return an error otherwise. This helper is intended to be - * used in combination with **bpf_csum_diff**\ (), in particular - * when the checksum needs to be updated after data has been - * written into the packet through direct packet access. - * - * Returns - * The checksum on success, or a negative error code in case of - * failure. - */ -static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40; - -/* - * bpf_set_hash_invalid - * - * Invalidate the current *skb*\ **->hash**. It can be used after - * mangling on headers through direct packet access, in order to - * indicate that the hash is outdated and to trigger a - * recalculation the next time the kernel tries to access this - * hash or when the **bpf_get_hash_recalc**\ () helper is called. - * - */ -static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41; - -/* - * bpf_get_numa_node_id - * - * Return the id of the current NUMA node. The primary use case - * for this helper is the selection of sockets for the local NUMA - * node, when the program is attached to sockets using the - * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), - * but the helper is also available to other eBPF program types, - * similarly to **bpf_get_smp_processor_id**\ (). - * - * Returns - * The id of current NUMA node. - */ -static int (*bpf_get_numa_node_id)(void) = (void *) 42; - -/* - * bpf_skb_change_head - * - * Grows headroom of packet associated to *skb* and adjusts the - * offset of the MAC header accordingly, adding *len* bytes of - * space. It automatically extends and reallocates memory as - * required. - * - * This helper can be used on a layer 3 *skb* to push a MAC header - * for redirection into a layer 2 device. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43; - -/* - * bpf_xdp_adjust_head - * - * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that - * it is possible to use a negative value for *delta*. This helper - * can be used to prepare the packet for pushing or popping - * headers. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44; - -/* - * bpf_probe_read_str - * - * Copy a NUL terminated string from an unsafe kernel address - * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for - * more details. - * - * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() - * instead. - * - * Returns - * On success, the strictly positive length of the string, - * including the trailing NUL character. On error, a negative - * value. - */ -static int (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45; - -/* - * bpf_get_socket_cookie - * - * If the **struct sk_buff** pointed by *skb* has a known socket, - * retrieve the cookie (generated by the kernel) of this socket. - * If no cookie has been set yet, generate a new cookie. Once - * generated, the socket cookie remains stable for the life of the - * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a global socket - * identifier that can be assumed unique. - * - * Returns - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. - */ -static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46; - -/* - * bpf_get_socket_uid - * - * - * Returns - * The owner UID of the socket associated to *skb*. If the socket - * is **NULL**, or if it is not a full socket (i.e. if it is a - * time-wait or a request socket instead), **overflowuid** value - * is returned (note that **overflowuid** might also be the actual - * UID value for the socket). - */ -static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47; - -/* - * bpf_set_hash - * - * Set the full hash for *skb* (set the field *skb*\ **->hash**) - * to value *hash*. - * - * Returns - * 0 - */ -static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48; - -/* - * bpf_setsockopt - * - * Emulate a call to **setsockopt()** on the socket associated to - * *bpf_socket*, which must be a full socket. The *level* at - * which the option resides and the name *optname* of the option - * must be specified, see **setsockopt(2)** for more information. - * The option value of length *optlen* is pointed by *optval*. - * - * This helper actually implements a subset of **setsockopt()**. - * It supports the following *level*\ s: - * - * * **SOL_SOCKET**, which supports the following *optname*\ s: - * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. - * * **IPPROTO_TCP**, which supports the following *optname*\ s: - * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49; - -/* - * bpf_skb_adjust_room - * - * Grow or shrink the room for data in the packet associated to - * *skb* by *len_diff*, and according to the selected *mode*. - * - * There are two supported modes at this time: - * - * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). - * - * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). - * - * The following flags are supported at this time: - * - * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. - * Adjusting mss in this way is not allowed for datagrams. - * - * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, - * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: - * Any new space is reserved to hold a tunnel header. - * Configure skb offsets and other fields accordingly. - * - * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, - * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: - * Use with ENCAP_L3 flags to further specify the tunnel type. - * - * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): - * Use with ENCAP_L3/L4 flags to further specify the tunnel - * type; *len* is the length of the inner MAC header. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50; - -/* - * bpf_redirect_map - * - * Redirect the packet to the endpoint referenced by *map* at - * index *key*. Depending on its type, this *map* can contain - * references to net devices (for forwarding packets through other - * ports), or to CPUs (for redirecting XDP frames to another CPU; - * but this is only implemented for native XDP (with driver - * support) as of this writing). - * - * The lower two bits of *flags* are used as the return code if - * the map lookup fails. This is so that the return value can be - * one of the XDP program return codes up to XDP_TX, as chosen by - * the caller. Any higher bits in the *flags* argument must be - * unset. - * - * When used to redirect packets to net devices, this helper - * provides a high performance increase over **bpf_redirect**\ (). - * This is due to various implementation details of the underlying - * mechanisms, one of which is the fact that **bpf_redirect_map**\ - * () tries to send packet as a "bulk" to the device. - * - * Returns - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. - */ -static int (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51; - -/* - * bpf_sk_redirect_map - * - * Redirect the packet to the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress path otherwise). This is the only flag supported for now. - * - * Returns - * **SK_PASS** on success, or **SK_DROP** on error. - */ -static int (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52; - -/* - * bpf_sock_map_update - * - * Add an entry to, or update a *map* referencing sockets. The - * *skops* is used as a new value for the entry associated to - * *key*. *flags* is one of: - * - * **BPF_NOEXIST** - * The entry for *key* must not exist in the map. - * **BPF_EXIST** - * The entry for *key* must already exist in the map. - * **BPF_ANY** - * No condition on the existence of the entry for *key*. - * - * If the *map* has eBPF programs (parser and verdict), those will - * be inherited by the socket being added. If the socket is - * already attached to eBPF programs, this results in an error. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53; - -/* - * bpf_xdp_adjust_meta - * - * Adjust the address pointed by *xdp_md*\ **->data_meta** by - * *delta* (which can be positive or negative). Note that this - * operation modifies the address stored in *xdp_md*\ **->data**, - * so the latter must be loaded only after the helper has been - * called. - * - * The use of *xdp_md*\ **->data_meta** is optional and programs - * are not required to use it. The rationale is that when the - * packet is processed with XDP (e.g. as DoS filter), it is - * possible to push further meta data along with it before passing - * to the stack, and to give the guarantee that an ingress eBPF - * program attached as a TC classifier on the same device can pick - * this up for further post-processing. Since TC works with socket - * buffers, it remains possible to set from XDP the **mark** or - * **priority** pointers, or other pointers for the socket buffer. - * Having this scratch space generic and programmable allows for - * more flexibility as the user is free to store whatever meta - * data they need. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54; - -/* - * bpf_perf_event_read_value - * - * Read the value of a perf event counter, and store it into *buf* - * of size *buf_size*. This helper relies on a *map* of type - * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event - * counter is selected when *map* is updated with perf event file - * descriptors. The *map* is an array whose size is the number of - * available CPUs, and each cell contains a value relative to one - * CPU. The value to retrieve is indicated by *flags*, that - * contains the index of the CPU to look up, masked with - * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to - * **BPF_F_CURRENT_CPU** to indicate that the value for the - * current CPU should be retrieved. - * - * This helper behaves in a way close to - * **bpf_perf_event_read**\ () helper, save that instead of - * just returning the value observed, it fills the *buf* - * structure. This allows for additional data to be retrieved: in - * particular, the enabled and running times (in *buf*\ - * **->enabled** and *buf*\ **->running**, respectively) are - * copied. In general, **bpf_perf_event_read_value**\ () is - * recommended over **bpf_perf_event_read**\ (), which has some - * ABI issues and provides fewer functionalities. - * - * These values are interesting, because hardware PMU (Performance - * Monitoring Unit) counters are limited resources. When there are - * more PMU based perf events opened than available counters, - * kernel will multiplex these events so each event gets certain - * percentage (but not all) of the PMU time. In case that - * multiplexing happens, the number of samples or counter value - * will not reflect the case compared to when no multiplexing - * occurs. This makes comparison between different runs difficult. - * Typically, the counter value should be normalized before - * comparing to other experiments. The usual normalization is done - * as follows. - * - * :: - * - * normalized_counter = counter * t_enabled / t_running - * - * Where t_enabled is the time enabled for event and t_running is - * the time running for event since last normalization. The - * enabled and running times are accumulated since the perf event - * open. To achieve scaling factor between two invocations of an - * eBPF program, users can can use CPU id as the key (which is - * typical for perf array usage model) to remember the previous - * value and do the calculation inside the eBPF program. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55; - -/* - * bpf_perf_prog_read_value - * - * For en eBPF program attached to a perf event, retrieve the - * value of the event counter associated to *ctx* and store it in - * the structure pointed by *buf* and of size *buf_size*. Enabled - * and running times are also stored in the structure (see - * description of helper **bpf_perf_event_read_value**\ () for - * more details). - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56; - -/* - * bpf_getsockopt - * - * Emulate a call to **getsockopt()** on the socket associated to - * *bpf_socket*, which must be a full socket. The *level* at - * which the option resides and the name *optname* of the option - * must be specified, see **getsockopt(2)** for more information. - * The retrieved value is stored in the structure pointed by - * *opval* and of length *optlen*. - * - * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57; - -/* - * bpf_override_return - * - * Used for error injection, this helper uses kprobes to override - * the return value of the probed function, and to set it to *rc*. - * The first argument is the context *regs* on which the kprobe - * works. - * - * This helper works by setting setting the PC (program counter) - * to an override function which is run in place of the original - * probed function. This means the probed function is not run at - * all. The replacement function just returns with the required - * value. - * - * This helper has security implications, and thus is subject to - * restrictions. It is only available if the kernel was compiled - * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration - * option, and in this case it only works on functions tagged with - * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. - * - * Returns - * 0 - */ -static int (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58; - -/* - * bpf_sock_ops_cb_flags_set - * - * Attempt to set the value of the **bpf_sock_ops_cb_flags** field - * for the full TCP socket associated to *bpf_sock_ops* to - * *argval*. - * - * The primary use of this field is to determine if there should - * be calls to eBPF programs of type - * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP - * code. A program of the same type can change its value, per - * connection and as necessary, when the connection is - * established. This field is directly accessible for reading, but - * this helper must be used for updates in order to return an - * error if an eBPF program tries to set a callback that is not - * supported in the current kernel. - * - * *argval* is a flag array which can combine these flags: - * - * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) - * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) - * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) - * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) - * - * Therefore, this function can be used to clear a callback flag by - * setting the appropriate bit to zero. e.g. to disable the RTO - * callback: - * - * **bpf_sock_ops_cb_flags_set(bpf_sock,** - * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** - * - * Here are some examples of where one could call such eBPF - * program: - * - * * When RTO fires. - * * When a packet is retransmitted. - * * When the connection terminates. - * * When a packet is sent. - * * When a packet is received. - * - * Returns - * Code **-EINVAL** if the socket is not a full TCP socket; - * otherwise, a positive number containing the bits that could not - * be set is returned (which comes down to 0 if all bits were set - * as required). - */ -static int (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59; - -/* - * bpf_msg_redirect_map - * - * This helper is used in programs implementing policies at the - * socket level. If the message *msg* is allowed to pass (i.e. if - * the verdict eBPF program returns **SK_PASS**), redirect it to - * the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress path otherwise). This is the only flag supported for now. - * - * Returns - * **SK_PASS** on success, or **SK_DROP** on error. - */ -static int (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60; - -/* - * bpf_msg_apply_bytes - * - * For socket policies, apply the verdict of the eBPF program to - * the next *bytes* (number of bytes) of message *msg*. - * - * For example, this helper can be used in the following cases: - * - * * A single **sendmsg**\ () or **sendfile**\ () system call - * contains multiple logical messages that the eBPF program is - * supposed to read and for which it should apply a verdict. - * * An eBPF program only cares to read the first *bytes* of a - * *msg*. If the message has a large payload, then setting up - * and calling the eBPF program repeatedly for all bytes, even - * though the verdict is already known, would create unnecessary - * overhead. - * - * When called from within an eBPF program, the helper sets a - * counter internal to the BPF infrastructure, that is used to - * apply the last verdict to the next *bytes*. If *bytes* is - * smaller than the current data being processed from a - * **sendmsg**\ () or **sendfile**\ () system call, the first - * *bytes* will be sent and the eBPF program will be re-run with - * the pointer for start of data pointing to byte number *bytes* - * **+ 1**. If *bytes* is larger than the current data being - * processed, then the eBPF verdict will be applied to multiple - * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are - * consumed. - * - * Note that if a socket closes with the internal counter holding - * a non-zero value, this is not a problem because data is not - * being buffered for *bytes* and is sent as it is received. - * - * Returns - * 0 - */ -static int (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61; - -/* - * bpf_msg_cork_bytes - * - * For socket policies, prevent the execution of the verdict eBPF - * program for message *msg* until *bytes* (byte number) have been - * accumulated. - * - * This can be used when one needs a specific number of bytes - * before a verdict can be assigned, even if the data spans - * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme - * case would be a user calling **sendmsg**\ () repeatedly with - * 1-byte long message segments. Obviously, this is bad for - * performance, but it is still valid. If the eBPF program needs - * *bytes* bytes to validate a header, this helper can be used to - * prevent the eBPF program to be called again until *bytes* have - * been accumulated. - * - * Returns - * 0 - */ -static int (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62; - -/* - * bpf_msg_pull_data - * - * For socket policies, pull in non-linear data from user space - * for *msg* and set pointers *msg*\ **->data** and *msg*\ - * **->data_end** to *start* and *end* bytes offsets into *msg*, - * respectively. - * - * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it can only parse data that the (**data**, **data_end**) - * pointers have already consumed. For **sendmsg**\ () hooks this - * is likely the first scatterlist element. But for calls relying - * on the **sendpage** handler (e.g. **sendfile**\ ()) this will - * be the range (**0**, **0**) because the data is shared with - * user space and by default the objective is to avoid allowing - * user space to modify data while (or after) eBPF verdict is - * being decided. This helper can be used to pull in data and to - * set the start and end pointer to given values. Data will be - * copied if necessary (i.e. if data was not linear and if start - * and end pointers do not point to the same chunk). - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63; - -/* - * bpf_bind - * - * Bind the socket associated to *ctx* to the address pointed by - * *addr*, of length *addr_len*. This allows for making outgoing - * connection from the desired IP address, which can be useful for - * example when all processes inside a cgroup should use one - * single IP address on a host that has multiple IP configured. - * - * This helper works for IPv4 and IPv6, TCP and UDP sockets. The - * domain (*addr*\ **->sa_family**) must be **AF_INET** (or - * **AF_INET6**). Looking for a free port to bind to can be - * expensive, therefore binding to port is not permitted by the - * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) - * must be set to zero. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64; - -/* - * bpf_xdp_adjust_tail - * - * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is - * only possible to shrink the packet as of this writing, - * therefore *delta* must be a negative integer. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65; - -/* - * bpf_skb_get_xfrm_state - * - * Retrieve the XFRM state (IP transform framework, see also - * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. - * - * The retrieved value is stored in the **struct bpf_xfrm_state** - * pointed by *xfrm_state* and of length *size*. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_XFRM** configuration option. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66; - -/* - * bpf_get_stack - * - * Return a user or a kernel stack in bpf program provided buffer. - * To achieve this, the helper needs *ctx*, which is a pointer - * to the context on which the tracing program is executed. - * To store the stacktrace, the bpf program provides *buf* with - * a nonnegative *size*. - * - * The last argument, *flags*, holds the number of stack frames to - * skip (from 0 to 255), masked with - * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set - * the following flags: - * - * **BPF_F_USER_STACK** - * Collect a user space stack instead of a kernel stack. - * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. - * - * **bpf_get_stack**\ () can collect up to - * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject - * to sufficient large buffer size. Note that - * this limit can be controlled with the **sysctl** program, and - * that it should be manually increased in order to profile long - * user stacks (such as stacks for Java programs). To do so, use: - * - * :: - * - * # sysctl kernel.perf_event_max_stack=<new value> - * - * Returns - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. - */ -static int (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67; - -/* - * bpf_skb_load_bytes_relative - * - * This helper is similar to **bpf_skb_load_bytes**\ () in that - * it provides an easy way to load *len* bytes from *offset* - * from the packet associated to *skb*, into the buffer pointed - * by *to*. The difference to **bpf_skb_load_bytes**\ () is that - * a fifth argument *start_header* exists in order to select a - * base offset to start from. *start_header* can be one of: - * - * **BPF_HDR_START_MAC** - * Base offset to load data from is *skb*'s mac header. - * **BPF_HDR_START_NET** - * Base offset to load data from is *skb*'s network header. - * - * In general, "direct packet access" is the preferred method to - * access packet data, however, this helper is in particular useful - * in socket filters where *skb*\ **->data** does not always point - * to the start of the mac header and where "direct packet access" - * is not available. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68; - -/* - * bpf_fib_lookup - * - * Do FIB lookup in kernel tables using parameters in *params*. - * If lookup is successful and result shows packet is to be - * forwarded, the neighbor tables are searched for the nexthop. - * If successful (ie., FIB lookup shows forwarding and nexthop - * is resolved), the nexthop address is returned in ipv4_dst - * or ipv6_dst based on family, smac is set to mac address of - * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only), and ifindex - * is set to the device index of the nexthop from the FIB lookup. - * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: - * - * **BPF_FIB_LOOKUP_DIRECT** - * Do a direct table lookup vs full lookup using FIB - * rules. - * **BPF_FIB_LOOKUP_OUTPUT** - * Perform lookup from an egress perspective (default is - * ingress). - * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * - * Returns - * * < 0 if any input argument is invalid - * * 0 on success (packet is forwarded, nexthop neighbor exists) - * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the - * packet is not forwarded or needs assist from full stack - */ -static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69; - -/* - * bpf_sock_hash_update - * - * Add an entry to, or update a sockhash *map* referencing sockets. - * The *skops* is used as a new value for the entry associated to - * *key*. *flags* is one of: - * - * **BPF_NOEXIST** - * The entry for *key* must not exist in the map. - * **BPF_EXIST** - * The entry for *key* must already exist in the map. - * **BPF_ANY** - * No condition on the existence of the entry for *key*. - * - * If the *map* has eBPF programs (parser and verdict), those will - * be inherited by the socket being added. If the socket is - * already attached to eBPF programs, this results in an error. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70; - -/* - * bpf_msg_redirect_hash - * - * This helper is used in programs implementing policies at the - * socket level. If the message *msg* is allowed to pass (i.e. if - * the verdict eBPF program returns **SK_PASS**), redirect it to - * the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress path otherwise). This is the only flag supported for now. - * - * Returns - * **SK_PASS** on success, or **SK_DROP** on error. - */ -static int (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71; - -/* - * bpf_sk_redirect_hash - * - * This helper is used in programs implementing policies at the - * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it - * to the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress otherwise). This is the only flag supported for now. - * - * Returns - * **SK_PASS** on success, or **SK_DROP** on error. - */ -static int (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72; - -/* - * bpf_lwt_push_encap - * - * Encapsulate the packet associated to *skb* within a Layer 3 - * protocol header. This header is provided in the buffer at - * address *hdr*, with *len* its size in bytes. *type* indicates - * the protocol of the header and can be one of: - * - * **BPF_LWT_ENCAP_SEG6** - * IPv6 encapsulation with Segment Routing Header - * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, - * the IPv6 header is computed by the kernel. - * **BPF_LWT_ENCAP_SEG6_INLINE** - * Only works if *skb* contains an IPv6 packet. Insert a - * Segment Routing Header (**struct ipv6_sr_hdr**) inside - * the IPv6 header. - * **BPF_LWT_ENCAP_IP** - * IP encapsulation (GRE/GUE/IPIP/etc). The outer header - * must be IPv4 or IPv6, followed by zero or more - * additional headers, up to **LWT_BPF_MAX_HEADROOM** - * total bytes in all prepended headers. Please note that - * if **skb_is_gso**\ (*skb*) is true, no more than two - * headers can be prepended, and the inner header, if - * present, should be either GRE or UDP/GUE. - * - * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs - * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can - * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and - * **BPF_PROG_TYPE_LWT_XMIT**. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73; - -/* - * bpf_lwt_seg6_store_bytes - * - * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. Only the flags, tag and TLVs - * inside the outermost IPv6 Segment Routing Header can be - * modified through this helper. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74; - -/* - * bpf_lwt_seg6_adjust_srh - * - * Adjust the size allocated to TLVs in the outermost IPv6 - * Segment Routing Header contained in the packet associated to - * *skb*, at position *offset* by *delta* bytes. Only offsets - * after the segments are accepted. *delta* can be as well - * positive (growing) as negative (shrinking). - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75; - -/* - * bpf_lwt_seg6_action - * - * Apply an IPv6 Segment Routing action of type *action* to the - * packet associated to *skb*. Each action takes a parameter - * contained at address *param*, and of length *param_len* bytes. - * *action* can be one of: - * - * **SEG6_LOCAL_ACTION_END_X** - * End.X action: Endpoint with Layer-3 cross-connect. - * Type of *param*: **struct in6_addr**. - * **SEG6_LOCAL_ACTION_END_T** - * End.T action: Endpoint with specific IPv6 table lookup. - * Type of *param*: **int**. - * **SEG6_LOCAL_ACTION_END_B6** - * End.B6 action: Endpoint bound to an SRv6 policy. - * Type of *param*: **struct ipv6_sr_hdr**. - * **SEG6_LOCAL_ACTION_END_B6_ENCAP** - * End.B6.Encap action: Endpoint bound to an SRv6 - * encapsulation policy. - * Type of *param*: **struct ipv6_sr_hdr**. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76; - -/* - * bpf_rc_repeat - * - * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. - * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. - * - * The *ctx* should point to the lirc sample as passed into - * the program. - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_BPF_LIRC_MODE2** configuration option set to - * "**y**". - * - * Returns - * 0 - */ -static int (*bpf_rc_repeat)(void *ctx) = (void *) 77; - -/* - * bpf_rc_keydown - * - * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). - * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. - * - * The *ctx* should point to the lirc sample as passed into - * the program. - * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_BPF_LIRC_MODE2** configuration option set to - * "**y**". - * - * Returns - * 0 - */ -static int (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78; - -/* - * bpf_skb_cgroup_id - * - * Return the cgroup v2 id of the socket associated with the *skb*. - * This is roughly similar to the **bpf_get_cgroup_classid**\ () - * helper for cgroup v1 by providing a tag resp. identifier that - * can be matched on or used for map lookups e.g. to implement - * policy. The cgroup v2 id of a given path in the hierarchy is - * exposed in user space through the f_handle API in order to get - * to the same 64-bit id. - * - * This helper can be used on TC egress path, but not on ingress, - * and is available only if the kernel was compiled with the - * **CONFIG_SOCK_CGROUP_DATA** configuration option. - * - * Returns - * The id is returned or 0 in case the id could not be retrieved. - */ -static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79; - -/* - * bpf_get_current_cgroup_id - * - * - * Returns - * A 64-bit integer containing the current cgroup id based - * on the cgroup within which the current task is running. - */ -static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80; - -/* - * bpf_get_local_storage - * - * Get the pointer to the local storage area. - * The type and the size of the local storage is defined - * by the *map* argument. - * The *flags* meaning is specific for each map type, - * and has to be 0 for cgroup local storage. - * - * Depending on the BPF program type, a local storage area - * can be shared between multiple instances of the BPF program, - * running simultaneously. - * - * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter - * the shared data. - * - * Returns - * A pointer to the local storage area. - */ -static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81; - -/* - * bpf_sk_select_reuseport - * - * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. - * It checks the selected socket is matching the incoming - * request in the socket buffer. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82; - -/* - * bpf_skb_ancestor_cgroup_id - * - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * - * Returns - * The id is returned or 0 in case the id could not be retrieved. - */ -static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83; - -/* - * bpf_sk_lookup_tcp - * - * Look for TCP socket matching *tuple*, optionally in a child - * network namespace *netns*. The return value must be checked, - * and if non-**NULL**, released via **bpf_sk_release**\ (). - * - * The *ctx* should point to the context of the program, such as - * the skb or socket (depending on the hook in use). This is used - * to determine the base network namespace for the lookup. - * - * *tuple_size* must be one of: - * - * **sizeof**\ (*tuple*\ **->ipv4**) - * Look for an IPv4 socket. - * **sizeof**\ (*tuple*\ **->ipv6**) - * Look for an IPv6 socket. - * - * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will - * will be used. For the TC hooks, this is the netns of the device - * in the skb. For socket hooks, this is the netns of the socket. - * If *netns* is any other signed 32-bit value greater than or - * equal to zero then it specifies the ID of the netns relative to - * the netns associated with the *ctx*. *netns* values beyond the - * range of 32-bit integers are reserved for future use. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_NET** configuration option. - * - * Returns - * Pointer to **struct bpf_sock**, or **NULL** in case of failure. - * For sockets with reuseport option, the **struct bpf_sock** - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - */ -static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84; - -/* - * bpf_sk_lookup_udp - * - * Look for UDP socket matching *tuple*, optionally in a child - * network namespace *netns*. The return value must be checked, - * and if non-**NULL**, released via **bpf_sk_release**\ (). - * - * The *ctx* should point to the context of the program, such as - * the skb or socket (depending on the hook in use). This is used - * to determine the base network namespace for the lookup. - * - * *tuple_size* must be one of: - * - * **sizeof**\ (*tuple*\ **->ipv4**) - * Look for an IPv4 socket. - * **sizeof**\ (*tuple*\ **->ipv6**) - * Look for an IPv6 socket. - * - * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will - * will be used. For the TC hooks, this is the netns of the device - * in the skb. For socket hooks, this is the netns of the socket. - * If *netns* is any other signed 32-bit value greater than or - * equal to zero then it specifies the ID of the netns relative to - * the netns associated with the *ctx*. *netns* values beyond the - * range of 32-bit integers are reserved for future use. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_NET** configuration option. - * - * Returns - * Pointer to **struct bpf_sock**, or **NULL** in case of failure. - * For sockets with reuseport option, the **struct bpf_sock** - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - */ -static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85; - -/* - * bpf_sk_release - * - * Release the reference held by *sock*. *sock* must be a - * non-**NULL** pointer that was returned from - * **bpf_sk_lookup_xxx**\ (). - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_sk_release)(struct bpf_sock *sock) = (void *) 86; - -/* - * bpf_map_push_elem - * - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is - * removed to make room for this. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87; - -/* - * bpf_map_pop_elem - * - * Pop an element from *map*. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88; - -/* - * bpf_map_peek_elem - * - * Get an element from *map* without removing it. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89; - -/* - * bpf_msg_push_data - * - * For socket policies, insert *len* bytes into *msg* at offset - * *start*. - * - * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the *msg*. - * This can later be read and used by any of the lower layer BPF - * hooks. - * - * This helper may fail if under memory pressure (a malloc - * fails) in these cases BPF programs will get an appropriate - * error and BPF programs will need to handle them. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90; - -/* - * bpf_msg_pop_data - * - * Will remove *len* bytes from a *msg* starting at byte *start*. - * This may result in **ENOMEM** errors under certain situations if - * an allocation and copy are required due to a full ring buffer. - * However, the helper will try to avoid doing the allocation - * if possible. Other errors can occur if input parameters are - * invalid either due to *start* byte not being valid part of *msg* - * payload and/or *pop* value being to large. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91; - -/* - * bpf_rc_pointer_rel - * - * This helper is used in programs implementing IR decoding, to - * report a successfully decoded pointer movement. - * - * The *ctx* should point to the lirc sample as passed into - * the program. - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_BPF_LIRC_MODE2** configuration option set to - * "**y**". - * - * Returns - * 0 - */ -static int (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92; - -/* - * bpf_spin_lock - * - * Acquire a spinlock represented by the pointer *lock*, which is - * stored as part of a value of a map. Taking the lock allows to - * safely update the rest of the fields in that value. The - * spinlock can (and must) later be released with a call to - * **bpf_spin_unlock**\ (\ *lock*\ ). - * - * Spinlocks in BPF programs come with a number of restrictions - * and constraints: - * - * * **bpf_spin_lock** objects are only allowed inside maps of - * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this - * list could be extended in the future). - * * BTF description of the map is mandatory. - * * The BPF program can take ONE lock at a time, since taking two - * or more could cause dead locks. - * * Only one **struct bpf_spin_lock** is allowed per map element. - * * When the lock is taken, calls (either BPF to BPF or helpers) - * are not allowed. - * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not - * allowed inside a spinlock-ed region. - * * The BPF program MUST call **bpf_spin_unlock**\ () to release - * the lock, on all execution paths, before it returns. - * * The BPF program can access **struct bpf_spin_lock** only via - * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () - * helpers. Loading or storing data into the **struct - * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. - * * To use the **bpf_spin_lock**\ () helper, the BTF description - * of the map value must be a struct and have **struct - * bpf_spin_lock** *anyname*\ **;** field at the top level. - * Nested lock inside another struct is not allowed. - * * The **struct bpf_spin_lock** *lock* field in a map value must - * be aligned on a multiple of 4 bytes in that value. - * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy - * the **bpf_spin_lock** field to user space. - * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from - * a BPF program, do not update the **bpf_spin_lock** field. - * * **bpf_spin_lock** cannot be on the stack or inside a - * networking packet (it can only be inside of a map values). - * * **bpf_spin_lock** is available to root only. - * * Tracing programs and socket filter programs cannot use - * **bpf_spin_lock**\ () due to insufficient preemption checks - * (but this may change in the future). - * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. - * - * Returns - * 0 - */ -static int (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93; - -/* - * bpf_spin_unlock - * - * Release the *lock* previously locked by a call to - * **bpf_spin_lock**\ (\ *lock*\ ). - * - * Returns - * 0 - */ -static int (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94; - -/* - * bpf_sk_fullsock - * - * This helper gets a **struct bpf_sock** pointer such - * that all the fields in this **bpf_sock** can be accessed. - * - * Returns - * A **struct bpf_sock** pointer on success, or **NULL** in - * case of failure. - */ -static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95; - -/* - * bpf_tcp_sock - * - * This helper gets a **struct bpf_tcp_sock** pointer from a - * **struct bpf_sock** pointer. - * - * Returns - * A **struct bpf_tcp_sock** pointer on success, or **NULL** in - * case of failure. - */ -static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96; - -/* - * bpf_skb_ecn_set_ce - * - * Set ECN (Explicit Congestion Notification) field of IP header - * to **CE** (Congestion Encountered) if current value is **ECT** - * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 - * and IPv4. - * - * Returns - * 1 if the **CE** flag is set (either by the current helper call - * or because it was already present), 0 if it is not set. - */ -static int (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97; - -/* - * bpf_get_listener_sock - * - * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. - * **bpf_sk_release**\ () is unnecessary and not allowed. - * - * Returns - * A **struct bpf_sock** pointer on success, or **NULL** in - * case of failure. - */ -static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98; - -/* - * bpf_skc_lookup_tcp - * - * Look for TCP socket matching *tuple*, optionally in a child - * network namespace *netns*. The return value must be checked, - * and if non-**NULL**, released via **bpf_sk_release**\ (). - * - * This function is identical to **bpf_sk_lookup_tcp**\ (), except - * that it also returns timewait or request sockets. Use - * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the - * full structure. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_NET** configuration option. - * - * Returns - * Pointer to **struct bpf_sock**, or **NULL** in case of failure. - * For sockets with reuseport option, the **struct bpf_sock** - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - */ -static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99; - -/* - * bpf_tcp_check_syncookie - * - * Check whether *iph* and *th* contain a valid SYN cookie ACK for - * the listening socket in *sk*. - * - * *iph* points to the start of the IPv4 or IPv6 header, while - * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). - * - * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). - * - * - * Returns - * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative - * error otherwise. - */ -static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100; - -/* - * bpf_sysctl_get_name - * - * Get name of sysctl in /proc/sys/ and copy it into provided by - * program buffer *buf* of size *buf_len*. - * - * The buffer is always NUL terminated, unless it's zero-sized. - * - * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is - * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name - * only (e.g. "tcp_mem"). - * - * Returns - * Number of character copied (not including the trailing NUL). - * - * **-E2BIG** if the buffer wasn't big enough (*buf* will contain - * truncated name in this case). - */ -static int (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101; - -/* - * bpf_sysctl_get_current_value - * - * Get current value of sysctl as it is presented in /proc/sys - * (incl. newline, etc), and copy it as a string into provided - * by program buffer *buf* of size *buf_len*. - * - * The whole value is copied, no matter what file position user - * space issued e.g. sys_read at. - * - * The buffer is always NUL terminated, unless it's zero-sized. - * - * Returns - * Number of character copied (not including the trailing NUL). - * - * **-E2BIG** if the buffer wasn't big enough (*buf* will contain - * truncated name in this case). - * - * **-EINVAL** if current value was unavailable, e.g. because - * sysctl is uninitialized and read returns -EIO for it. - */ -static int (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102; - -/* - * bpf_sysctl_get_new_value - * - * Get new value being written by user space to sysctl (before - * the actual write happens) and copy it as a string into - * provided by program buffer *buf* of size *buf_len*. - * - * User space may write new value at file position > 0. - * - * The buffer is always NUL terminated, unless it's zero-sized. - * - * Returns - * Number of character copied (not including the trailing NUL). - * - * **-E2BIG** if the buffer wasn't big enough (*buf* will contain - * truncated name in this case). - * - * **-EINVAL** if sysctl is being read. - */ -static int (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103; - -/* - * bpf_sysctl_set_new_value - * - * Override new value being written by user space to sysctl with - * value provided by program in buffer *buf* of size *buf_len*. - * - * *buf* should contain a string in same form as provided by user - * space on sysctl write. - * - * User space may write new value at file position > 0. To override - * the whole sysctl value file position should be set to zero. - * - * Returns - * 0 on success. - * - * **-E2BIG** if the *buf_len* is too big. - * - * **-EINVAL** if sysctl is being read. - */ -static int (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104; - -/* - * bpf_strtol - * - * Convert the initial part of the string from buffer *buf* of - * size *buf_len* to a long integer according to the given base - * and save the result in *res*. - * - * The string may begin with an arbitrary amount of white space - * (as determined by **isspace**\ (3)) followed by a single - * optional '**-**' sign. - * - * Five least significant bits of *flags* encode base, other bits - * are currently unused. - * - * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space **strtol**\ (3). - * - * Returns - * Number of characters consumed on success. Must be positive but - * no more than *buf_len*. - * - * **-EINVAL** if no valid digits were found or unsupported base - * was provided. - * - * **-ERANGE** if resulting value was out of range. - */ -static int (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105; - -/* - * bpf_strtoul - * - * Convert the initial part of the string from buffer *buf* of - * size *buf_len* to an unsigned long integer according to the - * given base and save the result in *res*. - * - * The string may begin with an arbitrary amount of white space - * (as determined by **isspace**\ (3)). - * - * Five least significant bits of *flags* encode base, other bits - * are currently unused. - * - * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space **strtoul**\ (3). - * - * Returns - * Number of characters consumed on success. Must be positive but - * no more than *buf_len*. - * - * **-EINVAL** if no valid digits were found or unsupported base - * was provided. - * - * **-ERANGE** if resulting value was out of range. - */ -static int (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106; - -/* - * bpf_sk_storage_get - * - * Get a bpf-local-storage from a *sk*. - * - * Logically, it could be thought of getting the value from - * a *map* with *sk* as the **key**. From this - * perspective, the usage is not much different from - * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this - * helper enforces the key must be a full socket and the map must - * be a **BPF_MAP_TYPE_SK_STORAGE** also. - * - * Underneath, the value is stored locally at *sk* instead of - * the *map*. The *map* is used as the bpf-local-storage - * "type". The bpf-local-storage "type" (i.e. the *map*) is - * searched against all bpf-local-storages residing at *sk*. - * - * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be - * used such that a new bpf-local-storage will be - * created if one does not exist. *value* can be used - * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify - * the initial value of a bpf-local-storage. If *value* is - * **NULL**, the new bpf-local-storage will be zero initialized. - * - * Returns - * A bpf-local-storage pointer is returned on success. - * - * **NULL** if not found or there was an error in adding - * a new bpf-local-storage. - */ -static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, void *value, __u64 flags) = (void *) 107; - -/* - * bpf_sk_storage_delete - * - * Delete a bpf-local-storage from a *sk*. - * - * Returns - * 0 on success. - * - * **-ENOENT** if the bpf-local-storage cannot be found. - */ -static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = (void *) 108; - -/* - * bpf_send_signal - * - * Send signal *sig* to the current task. - * - * Returns - * 0 on success or successfully queued. - * - * **-EBUSY** if work queue under nmi is full. - * - * **-EINVAL** if *sig* is invalid. - * - * **-EPERM** if no permission to send the *sig*. - * - * **-EAGAIN** if bpf program can try again. - */ -static int (*bpf_send_signal)(__u32 sig) = (void *) 109; - -/* - * bpf_tcp_gen_syncookie - * - * Try to issue a SYN cookie for the packet with corresponding - * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. - * - * *iph* points to the start of the IPv4 or IPv6 header, while - * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). - * - * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. - * - * - * Returns - * On success, lower 32 bits hold the generated SYN cookie in - * followed by 16 bits which hold the MSS value for that cookie, - * and the top 16 bits are unused. - * - * On failure, the returned value is one of the following: - * - * **-EINVAL** SYN cookie cannot be issued due to error - * - * **-ENOENT** SYN cookie should not be issued (no SYN flood) - * - * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies - * - * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 - */ -static __s64 (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110; - -/* - * bpf_skb_output - * - * Write raw *data* blob into a special BPF perf event held by - * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf - * event must have the following attributes: **PERF_SAMPLE_RAW** - * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and - * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. - * - * The *flags* are used to indicate the index in *map* for which - * the value must be put, masked with **BPF_F_INDEX_MASK**. - * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** - * to indicate that the index of the current CPU core should be - * used. - * - * The value to write, of *size*, is passed through eBPF stack and - * pointed by *data*. - * - * *ctx* is a pointer to in-kernel struct sk_buff. - * - * This helper is similar to **bpf_perf_event_output**\ () but - * restricted to raw_tracepoint bpf programs. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111; - -/* - * bpf_probe_read_user - * - * Safely attempt to read *size* bytes from user space address - * *unsafe_ptr* and store the data in *dst*. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; - -/* - * bpf_probe_read_kernel - * - * Safely attempt to read *size* bytes from kernel space address - * *unsafe_ptr* and store the data in *dst*. - * - * Returns - * 0 on success, or a negative error in case of failure. - */ -static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; - -/* - * bpf_probe_read_user_str - * - * Copy a NUL terminated string from an unsafe user address - * *unsafe_ptr* to *dst*. The *size* should include the - * terminating NUL byte. In case the string length is smaller than - * *size*, the target is not padded with further NUL bytes. If the - * string length is larger than *size*, just *size*-1 bytes are - * copied and the last byte is set to NUL. - * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: - * - * :: - * - * SEC("kprobe/sys_open") - * void bpf_sys_open(struct pt_regs *ctx) - * { - * char buf[PATHLEN]; // PATHLEN is defined to 256 - * int res = bpf_probe_read_user_str(buf, sizeof(buf), - * ctx->di); - * - * // Consume buf, for example push it to - * // userspace via bpf_perf_event_output(); we - * // can use res (the string length) as event - * // size, after checking its boundaries. - * } - * - * In comparison, using **bpf_probe_read_user()** helper here - * instead to read the string would require to estimate the length - * at compile time, and would often result in copying more memory - * than necessary. - * - * Another useful use case is when parsing individual process - * arguments or individual environment variables navigating - * *current*\ **->mm->arg_start** and *current*\ - * **->mm->env_start**: using this helper and the return value, - * one can quickly iterate at the right offset of the memory area. - * - * Returns - * On success, the strictly positive length of the string, - * including the trailing NUL character. On error, a negative - * value. - */ -static int (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114; - -/* - * bpf_probe_read_kernel_str - * - * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* - * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. - * - * Returns - * On success, the strictly positive length of the string, including - * the trailing NUL character. On error, a negative value. - */ -static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115; - - diff --git a/src/contrib/libbpf/bpf/bpf_helpers.h b/src/contrib/libbpf/bpf/bpf_helpers.h deleted file mode 100644 index 0c7d28292..000000000 --- a/src/contrib/libbpf/bpf/bpf_helpers.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_HELPERS__ -#define __BPF_HELPERS__ - -#include "bpf_helper_defs.h" - -#define __uint(name, val) int (*name)[val] -#define __type(name, val) typeof(val) *name - -/* Helper macro to print out debug messages */ -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - -/* - * Helper macro to place programs, maps, license in - * different sections in elf_bpf file. Section names - * are interpreted by elf_bpf loader - */ -#define SEC(NAME) __attribute__((section(NAME), used)) - -#ifndef __always_inline -#define __always_inline __attribute__((always_inline)) -#endif - -/* - * Helper structure used by eBPF C program - * to describe BPF map attributes to libbpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; - -enum libbpf_pin_type { - LIBBPF_PIN_NONE, - /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ - LIBBPF_PIN_BY_NAME, -}; - -#endif diff --git a/src/contrib/libbpf/bpf/bpf_prog_linfo.c b/src/contrib/libbpf/bpf/bpf_prog_linfo.c deleted file mode 100644 index 3ed1a27b5..000000000 --- a/src/contrib/libbpf/bpf/bpf_prog_linfo.c +++ /dev/null @@ -1,246 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2018 Facebook */ - -#include <string.h> -#include <stdlib.h> -#include <linux/err.h> -#include <linux/bpf.h> -#include "libbpf.h" -#include "libbpf_internal.h" - -struct bpf_prog_linfo { - void *raw_linfo; - void *raw_jited_linfo; - __u32 *nr_jited_linfo_per_func; - __u32 *jited_linfo_func_idx; - __u32 nr_linfo; - __u32 nr_jited_func; - __u32 rec_size; - __u32 jited_rec_size; -}; - -static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, - const __u64 *ksym_func, const __u32 *ksym_len) -{ - __u32 nr_jited_func, nr_linfo; - const void *raw_jited_linfo; - const __u64 *jited_linfo; - __u64 last_jited_linfo; - /* - * Index to raw_jited_linfo: - * i: Index for searching the next ksym_func - * prev_i: Index to the last found ksym_func - */ - __u32 i, prev_i; - __u32 f; /* Index to ksym_func */ - - raw_jited_linfo = prog_linfo->raw_jited_linfo; - jited_linfo = raw_jited_linfo; - if (ksym_func[0] != *jited_linfo) - goto errout; - - prog_linfo->jited_linfo_func_idx[0] = 0; - nr_jited_func = prog_linfo->nr_jited_func; - nr_linfo = prog_linfo->nr_linfo; - - for (prev_i = 0, i = 1, f = 1; - i < nr_linfo && f < nr_jited_func; - i++) { - raw_jited_linfo += prog_linfo->jited_rec_size; - last_jited_linfo = *jited_linfo; - jited_linfo = raw_jited_linfo; - - if (ksym_func[f] == *jited_linfo) { - prog_linfo->jited_linfo_func_idx[f] = i; - - /* Sanity check */ - if (last_jited_linfo - ksym_func[f - 1] + 1 > - ksym_len[f - 1]) - goto errout; - - prog_linfo->nr_jited_linfo_per_func[f - 1] = - i - prev_i; - prev_i = i; - - /* - * The ksym_func[f] is found in jited_linfo. - * Look for the next one. - */ - f++; - } else if (*jited_linfo <= last_jited_linfo) { - /* Ensure the addr is increasing _within_ a func */ - goto errout; - } - } - - if (f != nr_jited_func) - goto errout; - - prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = - nr_linfo - prev_i; - - return 0; - -errout: - return -EINVAL; -} - -void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) -{ - if (!prog_linfo) - return; - - free(prog_linfo->raw_linfo); - free(prog_linfo->raw_jited_linfo); - free(prog_linfo->nr_jited_linfo_per_func); - free(prog_linfo->jited_linfo_func_idx); - free(prog_linfo); -} - -struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) -{ - struct bpf_prog_linfo *prog_linfo; - __u32 nr_linfo, nr_jited_func; - __u64 data_sz; - - nr_linfo = info->nr_line_info; - - if (!nr_linfo) - return NULL; - - /* - * The min size that bpf_prog_linfo has to access for - * searching purpose. - */ - if (info->line_info_rec_size < - offsetof(struct bpf_line_info, file_name_off)) - return NULL; - - prog_linfo = calloc(1, sizeof(*prog_linfo)); - if (!prog_linfo) - return NULL; - - /* Copy xlated line_info */ - prog_linfo->nr_linfo = nr_linfo; - prog_linfo->rec_size = info->line_info_rec_size; - data_sz = (__u64)nr_linfo * prog_linfo->rec_size; - prog_linfo->raw_linfo = malloc(data_sz); - if (!prog_linfo->raw_linfo) - goto err_free; - memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz); - - nr_jited_func = info->nr_jited_ksyms; - if (!nr_jited_func || - !info->jited_line_info || - info->nr_jited_line_info != nr_linfo || - info->jited_line_info_rec_size < sizeof(__u64) || - info->nr_jited_func_lens != nr_jited_func || - !info->jited_ksyms || - !info->jited_func_lens) - /* Not enough info to provide jited_line_info */ - return prog_linfo; - - /* Copy jited_line_info */ - prog_linfo->nr_jited_func = nr_jited_func; - prog_linfo->jited_rec_size = info->jited_line_info_rec_size; - data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size; - prog_linfo->raw_jited_linfo = malloc(data_sz); - if (!prog_linfo->raw_jited_linfo) - goto err_free; - memcpy(prog_linfo->raw_jited_linfo, - (void *)(long)info->jited_line_info, data_sz); - - /* Number of jited_line_info per jited func */ - prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * - sizeof(__u32)); - if (!prog_linfo->nr_jited_linfo_per_func) - goto err_free; - - /* - * For each jited func, - * the start idx to the "linfo" and "jited_linfo" array, - */ - prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * - sizeof(__u32)); - if (!prog_linfo->jited_linfo_func_idx) - goto err_free; - - if (dissect_jited_func(prog_linfo, - (__u64 *)(long)info->jited_ksyms, - (__u32 *)(long)info->jited_func_lens)) - goto err_free; - - return prog_linfo; - -err_free: - bpf_prog_linfo__free(prog_linfo); - return NULL; -} - -const struct bpf_line_info * -bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, - __u64 addr, __u32 func_idx, __u32 nr_skip) -{ - __u32 jited_rec_size, rec_size, nr_linfo, start, i; - const void *raw_jited_linfo, *raw_linfo; - const __u64 *jited_linfo; - - if (func_idx >= prog_linfo->nr_jited_func) - return NULL; - - nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; - if (nr_skip >= nr_linfo) - return NULL; - - start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; - jited_rec_size = prog_linfo->jited_rec_size; - raw_jited_linfo = prog_linfo->raw_jited_linfo + - (start * jited_rec_size); - jited_linfo = raw_jited_linfo; - if (addr < *jited_linfo) - return NULL; - - nr_linfo -= nr_skip; - rec_size = prog_linfo->rec_size; - raw_linfo = prog_linfo->raw_linfo + (start * rec_size); - for (i = 0; i < nr_linfo; i++) { - if (addr < *jited_linfo) - break; - - raw_linfo += rec_size; - raw_jited_linfo += jited_rec_size; - jited_linfo = raw_jited_linfo; - } - - return raw_linfo - rec_size; -} - -const struct bpf_line_info * -bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, - __u32 insn_off, __u32 nr_skip) -{ - const struct bpf_line_info *linfo; - __u32 rec_size, nr_linfo, i; - const void *raw_linfo; - - nr_linfo = prog_linfo->nr_linfo; - if (nr_skip >= nr_linfo) - return NULL; - - rec_size = prog_linfo->rec_size; - raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); - linfo = raw_linfo; - if (insn_off < linfo->insn_off) - return NULL; - - nr_linfo -= nr_skip; - for (i = 0; i < nr_linfo; i++) { - if (insn_off < linfo->insn_off) - break; - - raw_linfo += rec_size; - linfo = raw_linfo; - } - - return raw_linfo - rec_size; -} diff --git a/src/contrib/libbpf/bpf/bpf_tracing.h b/src/contrib/libbpf/bpf/bpf_tracing.h deleted file mode 100644 index b0dafe8b4..000000000 --- a/src/contrib/libbpf/bpf/bpf_tracing.h +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_TRACING_H__ -#define __BPF_TRACING_H__ - -/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ -#if defined(__TARGET_ARCH_x86) - #define bpf_target_x86 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_s390) - #define bpf_target_s390 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_arm) - #define bpf_target_arm - #define bpf_target_defined -#elif defined(__TARGET_ARCH_arm64) - #define bpf_target_arm64 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_mips) - #define bpf_target_mips - #define bpf_target_defined -#elif defined(__TARGET_ARCH_powerpc) - #define bpf_target_powerpc - #define bpf_target_defined -#elif defined(__TARGET_ARCH_sparc) - #define bpf_target_sparc - #define bpf_target_defined -#else - #undef bpf_target_defined -#endif - -/* Fall back to what the compiler says */ -#ifndef bpf_target_defined -#if defined(__x86_64__) - #define bpf_target_x86 -#elif defined(__s390__) - #define bpf_target_s390 -#elif defined(__arm__) - #define bpf_target_arm -#elif defined(__aarch64__) - #define bpf_target_arm64 -#elif defined(__mips__) - #define bpf_target_mips -#elif defined(__powerpc__) - #define bpf_target_powerpc -#elif defined(__sparc__) - #define bpf_target_sparc -#endif -#endif - -#if defined(bpf_target_x86) - -#ifdef __KERNEL__ -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) -#else -#ifdef __i386__ -/* i386 kernel is built with -mregparm=3 */ -#define PT_REGS_PARM1(x) ((x)->eax) -#define PT_REGS_PARM2(x) ((x)->edx) -#define PT_REGS_PARM3(x) ((x)->ecx) -#define PT_REGS_PARM4(x) 0 -#define PT_REGS_PARM5(x) 0 -#define PT_REGS_RET(x) ((x)->esp) -#define PT_REGS_FP(x) ((x)->ebp) -#define PT_REGS_RC(x) ((x)->eax) -#define PT_REGS_SP(x) ((x)->esp) -#define PT_REGS_IP(x) ((x)->eip) -#else -#define PT_REGS_PARM1(x) ((x)->rdi) -#define PT_REGS_PARM2(x) ((x)->rsi) -#define PT_REGS_PARM3(x) ((x)->rdx) -#define PT_REGS_PARM4(x) ((x)->rcx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->rsp) -#define PT_REGS_FP(x) ((x)->rbp) -#define PT_REGS_RC(x) ((x)->rax) -#define PT_REGS_SP(x) ((x)->rsp) -#define PT_REGS_IP(x) ((x)->rip) -#endif -#endif - -#elif defined(bpf_target_s390) - -/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_S390 const volatile user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) -#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) -#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) -#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) -#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) -#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) -#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) - -#elif defined(bpf_target_arm) - -#define PT_REGS_PARM1(x) ((x)->uregs[0]) -#define PT_REGS_PARM2(x) ((x)->uregs[1]) -#define PT_REGS_PARM3(x) ((x)->uregs[2]) -#define PT_REGS_PARM4(x) ((x)->uregs[3]) -#define PT_REGS_PARM5(x) ((x)->uregs[4]) -#define PT_REGS_RET(x) ((x)->uregs[14]) -#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->uregs[0]) -#define PT_REGS_SP(x) ((x)->uregs[13]) -#define PT_REGS_IP(x) ((x)->uregs[12]) - -#elif defined(bpf_target_arm64) - -/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_ARM64 const volatile struct user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) -#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) -#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) -#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) -#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) -#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) - -#elif defined(bpf_target_mips) - -#define PT_REGS_PARM1(x) ((x)->regs[4]) -#define PT_REGS_PARM2(x) ((x)->regs[5]) -#define PT_REGS_PARM3(x) ((x)->regs[6]) -#define PT_REGS_PARM4(x) ((x)->regs[7]) -#define PT_REGS_PARM5(x) ((x)->regs[8]) -#define PT_REGS_RET(x) ((x)->regs[31]) -#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[1]) -#define PT_REGS_SP(x) ((x)->regs[29]) -#define PT_REGS_IP(x) ((x)->cp0_epc) - -#elif defined(bpf_target_powerpc) - -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#elif defined(bpf_target_sparc) - -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) - -/* Should this also be a bpf_target check for the sparc case? */ -#if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#else -#define PT_REGS_IP(x) ((x)->pc) -#endif - -#endif - -#if defined(bpf_target_powerpc) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_sparc) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#else -#define BPF_KPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) -#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) -#endif - -#endif diff --git a/src/contrib/libbpf/bpf/btf.c b/src/contrib/libbpf/bpf/btf.c deleted file mode 100644 index 88efa2bb7..000000000 --- a/src/contrib/libbpf/bpf/btf.c +++ /dev/null @@ -1,2884 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2018 Facebook */ - -#include <endian.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <linux/err.h> -#include <linux/btf.h> -#include <gelf.h> -#include "btf.h" -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" -#include "hashmap.h" - -#define BTF_MAX_NR_TYPES 0x7fffffff -#define BTF_MAX_STR_OFFSET 0x7fffffff - -static struct btf_type btf_void; - -struct btf { - union { - struct btf_header *hdr; - void *data; - }; - struct btf_type **types; - const char *strings; - void *nohdr_data; - __u32 nr_types; - __u32 types_size; - __u32 data_size; - int fd; -}; - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -static int btf_add_type(struct btf *btf, struct btf_type *t) -{ - if (btf->types_size - btf->nr_types < 2) { - struct btf_type **new_types; - __u32 expand_by, new_size; - - if (btf->types_size == BTF_MAX_NR_TYPES) - return -E2BIG; - - expand_by = max(btf->types_size >> 2, 16); - new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); - - new_types = realloc(btf->types, sizeof(*new_types) * new_size); - if (!new_types) - return -ENOMEM; - - if (btf->nr_types == 0) - new_types[0] = &btf_void; - - btf->types = new_types; - btf->types_size = new_size; - } - - btf->types[++(btf->nr_types)] = t; - - return 0; -} - -static int btf_parse_hdr(struct btf *btf) -{ - const struct btf_header *hdr = btf->hdr; - __u32 meta_left; - - if (btf->data_size < sizeof(struct btf_header)) { - pr_debug("BTF header not found\n"); - return -EINVAL; - } - - if (hdr->magic != BTF_MAGIC) { - pr_debug("Invalid BTF magic:%x\n", hdr->magic); - return -EINVAL; - } - - if (hdr->version != BTF_VERSION) { - pr_debug("Unsupported BTF version:%u\n", hdr->version); - return -ENOTSUP; - } - - if (hdr->flags) { - pr_debug("Unsupported BTF flags:%x\n", hdr->flags); - return -ENOTSUP; - } - - meta_left = btf->data_size - sizeof(*hdr); - if (!meta_left) { - pr_debug("BTF has no data\n"); - return -EINVAL; - } - - if (meta_left < hdr->type_off) { - pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); - return -EINVAL; - } - - if (meta_left < hdr->str_off) { - pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); - return -EINVAL; - } - - if (hdr->type_off >= hdr->str_off) { - pr_debug("BTF type section offset >= string section offset. No type?\n"); - return -EINVAL; - } - - if (hdr->type_off & 0x02) { - pr_debug("BTF type section is not aligned to 4 bytes\n"); - return -EINVAL; - } - - btf->nohdr_data = btf->hdr + 1; - - return 0; -} - -static int btf_parse_str_sec(struct btf *btf) -{ - const struct btf_header *hdr = btf->hdr; - const char *start = btf->nohdr_data + hdr->str_off; - const char *end = start + btf->hdr->str_len; - - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || - start[0] || end[-1]) { - pr_debug("Invalid BTF string section\n"); - return -EINVAL; - } - - btf->strings = start; - - return 0; -} - -static int btf_type_size(struct btf_type *t) -{ - int base_size = sizeof(struct btf_type); - __u16 vlen = btf_vlen(t); - - switch (btf_kind(t)) { - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - return base_size; - case BTF_KIND_INT: - return base_size + sizeof(__u32); - case BTF_KIND_ENUM: - return base_size + vlen * sizeof(struct btf_enum); - case BTF_KIND_ARRAY: - return base_size + sizeof(struct btf_array); - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - return base_size + vlen * sizeof(struct btf_member); - case BTF_KIND_FUNC_PROTO: - return base_size + vlen * sizeof(struct btf_param); - case BTF_KIND_VAR: - return base_size + sizeof(struct btf_var); - case BTF_KIND_DATASEC: - return base_size + vlen * sizeof(struct btf_var_secinfo); - default: - pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); - return -EINVAL; - } -} - -static int btf_parse_type_sec(struct btf *btf) -{ - struct btf_header *hdr = btf->hdr; - void *nohdr_data = btf->nohdr_data; - void *next_type = nohdr_data + hdr->type_off; - void *end_type = nohdr_data + hdr->str_off; - - while (next_type < end_type) { - struct btf_type *t = next_type; - int type_size; - int err; - - type_size = btf_type_size(t); - if (type_size < 0) - return type_size; - next_type += type_size; - err = btf_add_type(btf, t); - if (err) - return err; - } - - return 0; -} - -__u32 btf__get_nr_types(const struct btf *btf) -{ - return btf->nr_types; -} - -const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) -{ - if (type_id > btf->nr_types) - return NULL; - - return btf->types[type_id]; -} - -static bool btf_type_is_void(const struct btf_type *t) -{ - return t == &btf_void || btf_is_fwd(t); -} - -static bool btf_type_is_void_or_null(const struct btf_type *t) -{ - return !t || btf_type_is_void(t); -} - -#define MAX_RESOLVE_DEPTH 32 - -__s64 btf__resolve_size(const struct btf *btf, __u32 type_id) -{ - const struct btf_array *array; - const struct btf_type *t; - __u32 nelems = 1; - __s64 size = -1; - int i; - - t = btf__type_by_id(btf, type_id); - for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); - i++) { - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - case BTF_KIND_DATASEC: - size = t->size; - goto done; - case BTF_KIND_PTR: - size = sizeof(void *); - goto done; - case BTF_KIND_TYPEDEF: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - case BTF_KIND_VAR: - type_id = t->type; - break; - case BTF_KIND_ARRAY: - array = btf_array(t); - if (nelems && array->nelems > UINT32_MAX / nelems) - return -E2BIG; - nelems *= array->nelems; - type_id = array->type; - break; - default: - return -EINVAL; - } - - t = btf__type_by_id(btf, type_id); - } - -done: - if (size < 0) - return -EINVAL; - if (nelems && size > UINT32_MAX / nelems) - return -E2BIG; - - return nelems * size; -} - -int btf__resolve_type(const struct btf *btf, __u32 type_id) -{ - const struct btf_type *t; - int depth = 0; - - t = btf__type_by_id(btf, type_id); - while (depth < MAX_RESOLVE_DEPTH && - !btf_type_is_void_or_null(t) && - (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) { - type_id = t->type; - t = btf__type_by_id(btf, type_id); - depth++; - } - - if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t)) - return -EINVAL; - - return type_id; -} - -__s32 btf__find_by_name(const struct btf *btf, const char *type_name) -{ - __u32 i; - - if (!strcmp(type_name, "void")) - return 0; - - for (i = 1; i <= btf->nr_types; i++) { - const struct btf_type *t = btf->types[i]; - const char *name = btf__name_by_offset(btf, t->name_off); - - if (name && !strcmp(type_name, name)) - return i; - } - - return -ENOENT; -} - -__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, - __u32 kind) -{ - __u32 i; - - if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) - return 0; - - for (i = 1; i <= btf->nr_types; i++) { - const struct btf_type *t = btf->types[i]; - const char *name; - - if (btf_kind(t) != kind) - continue; - name = btf__name_by_offset(btf, t->name_off); - if (name && !strcmp(type_name, name)) - return i; - } - - return -ENOENT; -} - -void btf__free(struct btf *btf) -{ - if (!btf) - return; - - if (btf->fd != -1) - close(btf->fd); - - free(btf->data); - free(btf->types); - free(btf); -} - -struct btf *btf__new(__u8 *data, __u32 size) -{ - struct btf *btf; - int err; - - btf = calloc(1, sizeof(struct btf)); - if (!btf) - return ERR_PTR(-ENOMEM); - - btf->fd = -1; - - btf->data = malloc(size); - if (!btf->data) { - err = -ENOMEM; - goto done; - } - - memcpy(btf->data, data, size); - btf->data_size = size; - - err = btf_parse_hdr(btf); - if (err) - goto done; - - err = btf_parse_str_sec(btf); - if (err) - goto done; - - err = btf_parse_type_sec(btf); - -done: - if (err) { - btf__free(btf); - return ERR_PTR(err); - } - - return btf; -} - -static bool btf_check_endianness(const GElf_Ehdr *ehdr) -{ -#if __BYTE_ORDER == __LITTLE_ENDIAN - return ehdr->e_ident[EI_DATA] == ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN - return ehdr->e_ident[EI_DATA] == ELFDATA2MSB; -#else -# error "Unrecognized __BYTE_ORDER__" -#endif -} - -struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) -{ - Elf_Data *btf_data = NULL, *btf_ext_data = NULL; - int err = 0, fd = -1, idx = 0; - struct btf *btf = NULL; - Elf_Scn *scn = NULL; - Elf *elf = NULL; - GElf_Ehdr ehdr; - - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warn("failed to init libelf for %s\n", path); - return ERR_PTR(-LIBBPF_ERRNO__LIBELF); - } - - fd = open(path, O_RDONLY); - if (fd < 0) { - err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); - return ERR_PTR(err); - } - - err = -LIBBPF_ERRNO__FORMAT; - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - pr_warn("failed to open %s as ELF file\n", path); - goto done; - } - if (!gelf_getehdr(elf, &ehdr)) { - pr_warn("failed to get EHDR from %s\n", path); - goto done; - } - if (!btf_check_endianness(&ehdr)) { - pr_warn("non-native ELF endianness is not supported\n"); - goto done; - } - if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { - pr_warn("failed to get e_shstrndx from %s\n", path); - goto done; - } - - while ((scn = elf_nextscn(elf, scn)) != NULL) { - GElf_Shdr sh; - char *name; - - idx++; - if (gelf_getshdr(scn, &sh) != &sh) { - pr_warn("failed to get section(%d) header from %s\n", - idx, path); - goto done; - } - name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!name) { - pr_warn("failed to get section(%d) name from %s\n", - idx, path); - goto done; - } - if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = elf_getdata(scn, 0); - if (!btf_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } - continue; - } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { - btf_ext_data = elf_getdata(scn, 0); - if (!btf_ext_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } - continue; - } - } - - err = 0; - - if (!btf_data) { - err = -ENOENT; - goto done; - } - btf = btf__new(btf_data->d_buf, btf_data->d_size); - if (IS_ERR(btf)) - goto done; - - if (btf_ext && btf_ext_data) { - *btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size); - if (IS_ERR(*btf_ext)) - goto done; - } else if (btf_ext) { - *btf_ext = NULL; - } -done: - if (elf) - elf_end(elf); - close(fd); - - if (err) - return ERR_PTR(err); - /* - * btf is always parsed before btf_ext, so no need to clean up - * btf_ext, if btf loading failed - */ - if (IS_ERR(btf)) - return btf; - if (btf_ext && IS_ERR(*btf_ext)) { - btf__free(btf); - err = PTR_ERR(*btf_ext); - return ERR_PTR(err); - } - return btf; -} - -static int compare_vsi_off(const void *_a, const void *_b) -{ - const struct btf_var_secinfo *a = _a; - const struct btf_var_secinfo *b = _b; - - return a->offset - b->offset; -} - -static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, - struct btf_type *t) -{ - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; - struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; - - if (!name) { - pr_debug("No name found in string section for DATASEC kind.\n"); - return -ENOENT; - } - - ret = bpf_object__section_size(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } - - t->size = size; - - for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { - t_var = btf__type_by_id(btf, vsi->type); - var = btf_var(t_var); - - if (!btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); - return -EINVAL; - } - - if (var->linkage == BTF_VAR_STATIC) - continue; - - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); - return -ENOENT; - } - - ret = bpf_object__variable_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); - return -ENOENT; - } - - vsi->offset = off; - } - - qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); - return 0; -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - int err = 0; - __u32 i; - - for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = btf->types[i]; - - /* Loader needs to fix up some of the things compiler - * couldn't get its hands on while emitting BTF. This - * is section size and global variable offset. We use - * the info from the ELF itself for this purpose. - */ - if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); - if (err) - break; - } - } - - return err; -} - -int btf__load(struct btf *btf) -{ - __u32 log_buf_size = BPF_LOG_BUF_SIZE; - char *log_buf = NULL; - int err = 0; - - if (btf->fd >= 0) - return -EEXIST; - - log_buf = malloc(log_buf_size); - if (!log_buf) - return -ENOMEM; - - *log_buf = 0; - - btf->fd = bpf_load_btf(btf->data, btf->data_size, - log_buf, log_buf_size, false); - if (btf->fd < 0) { - err = -errno; - pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (*log_buf) - pr_warn("%s\n", log_buf); - goto done; - } - -done: - free(log_buf); - return err; -} - -int btf__fd(const struct btf *btf) -{ - return btf->fd; -} - -const void *btf__get_raw_data(const struct btf *btf, __u32 *size) -{ - *size = btf->data_size; - return btf->data; -} - -const char *btf__name_by_offset(const struct btf *btf, __u32 offset) -{ - if (offset < btf->hdr->str_len) - return &btf->strings[offset]; - else - return NULL; -} - -int btf__get_from_id(__u32 id, struct btf **btf) -{ - struct bpf_btf_info btf_info = { 0 }; - __u32 len = sizeof(btf_info); - __u32 last_size; - int btf_fd; - void *ptr; - int err; - - err = 0; - *btf = NULL; - btf_fd = bpf_btf_get_fd_by_id(id); - if (btf_fd < 0) - return 0; - - /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so - * let's start with a sane default - 4KiB here - and resize it only if - * bpf_obj_get_info_by_fd() needs a bigger buffer. - */ - btf_info.btf_size = 4096; - last_size = btf_info.btf_size; - ptr = malloc(last_size); - if (!ptr) { - err = -ENOMEM; - goto exit_free; - } - - memset(ptr, 0, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - - if (!err && btf_info.btf_size > last_size) { - void *temp_ptr; - - last_size = btf_info.btf_size; - temp_ptr = realloc(ptr, last_size); - if (!temp_ptr) { - err = -ENOMEM; - goto exit_free; - } - ptr = temp_ptr; - memset(ptr, 0, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - } - - if (err || btf_info.btf_size > last_size) { - err = errno; - goto exit_free; - } - - *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); - if (IS_ERR(*btf)) { - err = PTR_ERR(*btf); - *btf = NULL; - } - -exit_free: - close(btf_fd); - free(ptr); - - return err; -} - -int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, - __u32 expected_key_size, __u32 expected_value_size, - __u32 *key_type_id, __u32 *value_type_id) -{ - const struct btf_type *container_type; - const struct btf_member *key, *value; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; - - if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == - max_name) { - pr_warn("map:%s length of '____btf_map_%s' is too long\n", - map_name, map_name); - return -EINVAL; - } - - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map_name, container_name); - return container_id; - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warn("map:%s cannot find BTF type for container_id:%u\n", - map_name, container_id); - return -EINVAL; - } - - if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { - pr_warn("map:%s container_name:%s is an invalid container struct\n", - map_name, container_name); - return -EINVAL; - } - - key = btf_members(container_type); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warn("map:%s invalid BTF key_type_size\n", map_name); - return key_size; - } - - if (expected_key_size != key_size) { - pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map_name, (__u32)key_size, expected_key_size); - return -EINVAL; - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warn("map:%s invalid BTF value_type_size\n", map_name); - return value_size; - } - - if (expected_value_size != value_size) { - pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map_name, (__u32)value_size, expected_value_size); - return -EINVAL; - } - - *key_type_id = key->type; - *value_type_id = value->type; - - return 0; -} - -struct btf_ext_sec_setup_param { - __u32 off; - __u32 len; - __u32 min_rec_size; - struct btf_ext_info *ext_info; - const char *desc; -}; - -static int btf_ext_setup_info(struct btf_ext *btf_ext, - struct btf_ext_sec_setup_param *ext_sec) -{ - const struct btf_ext_info_sec *sinfo; - struct btf_ext_info *ext_info; - __u32 info_left, record_size; - /* The start of the info sec (including the __u32 record_size). */ - void *info; - - if (ext_sec->len == 0) - return 0; - - if (ext_sec->off & 0x03) { - pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", - ext_sec->desc); - return -EINVAL; - } - - info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; - info_left = ext_sec->len; - - if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { - pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", - ext_sec->desc, ext_sec->off, ext_sec->len); - return -EINVAL; - } - - /* At least a record size */ - if (info_left < sizeof(__u32)) { - pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); - return -EINVAL; - } - - /* The record size needs to meet the minimum standard */ - record_size = *(__u32 *)info; - if (record_size < ext_sec->min_rec_size || - record_size & 0x03) { - pr_debug("%s section in .BTF.ext has invalid record size %u\n", - ext_sec->desc, record_size); - return -EINVAL; - } - - sinfo = info + sizeof(__u32); - info_left -= sizeof(__u32); - - /* If no records, return failure now so .BTF.ext won't be used. */ - if (!info_left) { - pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); - return -EINVAL; - } - - while (info_left) { - unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); - __u64 total_record_size; - __u32 num_records; - - if (info_left < sec_hdrlen) { - pr_debug("%s section header is not found in .BTF.ext\n", - ext_sec->desc); - return -EINVAL; - } - - num_records = sinfo->num_info; - if (num_records == 0) { - pr_debug("%s section has incorrect num_records in .BTF.ext\n", - ext_sec->desc); - return -EINVAL; - } - - total_record_size = sec_hdrlen + - (__u64)num_records * record_size; - if (info_left < total_record_size) { - pr_debug("%s section has incorrect num_records in .BTF.ext\n", - ext_sec->desc); - return -EINVAL; - } - - info_left -= total_record_size; - sinfo = (void *)sinfo + total_record_size; - } - - ext_info = ext_sec->ext_info; - ext_info->len = ext_sec->len - sizeof(__u32); - ext_info->rec_size = record_size; - ext_info->info = info + sizeof(__u32); - - return 0; -} - -static int btf_ext_setup_func_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->func_info_off, - .len = btf_ext->hdr->func_info_len, - .min_rec_size = sizeof(struct bpf_func_info_min), - .ext_info = &btf_ext->func_info, - .desc = "func_info" - }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_line_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->line_info_off, - .len = btf_ext->hdr->line_info_len, - .min_rec_size = sizeof(struct bpf_line_info_min), - .ext_info = &btf_ext->line_info, - .desc = "line_info", - }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->field_reloc_off, - .len = btf_ext->hdr->field_reloc_len, - .min_rec_size = sizeof(struct bpf_field_reloc), - .ext_info = &btf_ext->field_reloc_info, - .desc = "field_reloc", - }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) -{ - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - - if (data_size < offsetofend(struct btf_ext_header, hdr_len) || - data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found"); - return -EINVAL; - } - - if (hdr->magic != BTF_MAGIC) { - pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); - return -EINVAL; - } - - if (hdr->version != BTF_VERSION) { - pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); - return -ENOTSUP; - } - - if (hdr->flags) { - pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); - return -ENOTSUP; - } - - if (data_size == hdr->hdr_len) { - pr_debug("BTF.ext has no data\n"); - return -EINVAL; - } - - return 0; -} - -void btf_ext__free(struct btf_ext *btf_ext) -{ - if (!btf_ext) - return; - free(btf_ext->data); - free(btf_ext); -} - -struct btf_ext *btf_ext__new(__u8 *data, __u32 size) -{ - struct btf_ext *btf_ext; - int err; - - err = btf_ext_parse_hdr(data, size); - if (err) - return ERR_PTR(err); - - btf_ext = calloc(1, sizeof(struct btf_ext)); - if (!btf_ext) - return ERR_PTR(-ENOMEM); - - btf_ext->data_size = size; - btf_ext->data = malloc(size); - if (!btf_ext->data) { - err = -ENOMEM; - goto done; - } - memcpy(btf_ext->data, data, size); - - if (btf_ext->hdr->hdr_len < - offsetofend(struct btf_ext_header, line_info_len)) - goto done; - err = btf_ext_setup_func_info(btf_ext); - if (err) - goto done; - - err = btf_ext_setup_line_info(btf_ext); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < - offsetofend(struct btf_ext_header, field_reloc_len)) - goto done; - err = btf_ext_setup_field_reloc(btf_ext); - if (err) - goto done; - -done: - if (err) { - btf_ext__free(btf_ext); - return ERR_PTR(err); - } - - return btf_ext; -} - -const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) -{ - *size = btf_ext->data_size; - return btf_ext->data; -} - -static int btf_ext_reloc_info(const struct btf *btf, - const struct btf_ext_info *ext_info, - const char *sec_name, __u32 insns_cnt, - void **info, __u32 *cnt) -{ - __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); - __u32 i, record_size, existing_len, records_len; - struct btf_ext_info_sec *sinfo; - const char *info_sec_name; - __u64 remain_len; - void *data; - - record_size = ext_info->rec_size; - sinfo = ext_info->info; - remain_len = ext_info->len; - while (remain_len > 0) { - records_len = sinfo->num_info * record_size; - info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); - if (strcmp(info_sec_name, sec_name)) { - remain_len -= sec_hdrlen + records_len; - sinfo = (void *)sinfo + sec_hdrlen + records_len; - continue; - } - - existing_len = (*cnt) * record_size; - data = realloc(*info, existing_len + records_len); - if (!data) - return -ENOMEM; - - memcpy(data + existing_len, sinfo->data, records_len); - /* adjust insn_off only, the rest data will be passed - * to the kernel. - */ - for (i = 0; i < sinfo->num_info; i++) { - __u32 *insn_off; - - insn_off = data + existing_len + (i * record_size); - *insn_off = *insn_off / sizeof(struct bpf_insn) + - insns_cnt; - } - *info = data; - *cnt += sinfo->num_info; - return 0; - } - - return -ENOENT; -} - -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt) -{ - return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, - insns_cnt, func_info, cnt); -} - -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt) -{ - return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, - insns_cnt, line_info, cnt); -} - -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) -{ - return btf_ext->func_info.rec_size; -} - -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) -{ - return btf_ext->line_info.rec_size; -} - -struct btf_dedup; - -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts); -static void btf_dedup_free(struct btf_dedup *d); -static int btf_dedup_strings(struct btf_dedup *d); -static int btf_dedup_prim_types(struct btf_dedup *d); -static int btf_dedup_struct_types(struct btf_dedup *d); -static int btf_dedup_ref_types(struct btf_dedup *d); -static int btf_dedup_compact_types(struct btf_dedup *d); -static int btf_dedup_remap_types(struct btf_dedup *d); - -/* - * Deduplicate BTF types and strings. - * - * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF - * section with all BTF type descriptors and string data. It overwrites that - * memory in-place with deduplicated types and strings without any loss of - * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section - * is provided, all the strings referenced from .BTF.ext section are honored - * and updated to point to the right offsets after deduplication. - * - * If function returns with error, type/string data might be garbled and should - * be discarded. - * - * More verbose and detailed description of both problem btf_dedup is solving, - * as well as solution could be found at: - * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html - * - * Problem description and justification - * ===================================== - * - * BTF type information is typically emitted either as a result of conversion - * from DWARF to BTF or directly by compiler. In both cases, each compilation - * unit contains information about a subset of all the types that are used - * in an application. These subsets are frequently overlapping and contain a lot - * of duplicated information when later concatenated together into a single - * binary. This algorithm ensures that each unique type is represented by single - * BTF type descriptor, greatly reducing resulting size of BTF data. - * - * Compilation unit isolation and subsequent duplication of data is not the only - * problem. The same type hierarchy (e.g., struct and all the type that struct - * references) in different compilation units can be represented in BTF to - * various degrees of completeness (or, rather, incompleteness) due to - * struct/union forward declarations. - * - * Let's take a look at an example, that we'll use to better understand the - * problem (and solution). Suppose we have two compilation units, each using - * same `struct S`, but each of them having incomplete type information about - * struct's fields: - * - * // CU #1: - * struct S; - * struct A { - * int a; - * struct A* self; - * struct S* parent; - * }; - * struct B; - * struct S { - * struct A* a_ptr; - * struct B* b_ptr; - * }; - * - * // CU #2: - * struct S; - * struct A; - * struct B { - * int b; - * struct B* self; - * struct S* parent; - * }; - * struct S { - * struct A* a_ptr; - * struct B* b_ptr; - * }; - * - * In case of CU #1, BTF data will know only that `struct B` exist (but no - * more), but will know the complete type information about `struct A`. While - * for CU #2, it will know full type information about `struct B`, but will - * only know about forward declaration of `struct A` (in BTF terms, it will - * have `BTF_KIND_FWD` type descriptor with name `B`). - * - * This compilation unit isolation means that it's possible that there is no - * single CU with complete type information describing structs `S`, `A`, and - * `B`. Also, we might get tons of duplicated and redundant type information. - * - * Additional complication we need to keep in mind comes from the fact that - * types, in general, can form graphs containing cycles, not just DAGs. - * - * While algorithm does deduplication, it also merges and resolves type - * information (unless disabled throught `struct btf_opts`), whenever possible. - * E.g., in the example above with two compilation units having partial type - * information for structs `A` and `B`, the output of algorithm will emit - * a single copy of each BTF type that describes structs `A`, `B`, and `S` - * (as well as type information for `int` and pointers), as if they were defined - * in a single compilation unit as: - * - * struct A { - * int a; - * struct A* self; - * struct S* parent; - * }; - * struct B { - * int b; - * struct B* self; - * struct S* parent; - * }; - * struct S { - * struct A* a_ptr; - * struct B* b_ptr; - * }; - * - * Algorithm summary - * ================= - * - * Algorithm completes its work in 6 separate passes: - * - * 1. Strings deduplication. - * 2. Primitive types deduplication (int, enum, fwd). - * 3. Struct/union types deduplication. - * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func - * protos, and const/volatile/restrict modifiers). - * 5. Types compaction. - * 6. Types remapping. - * - * Algorithm determines canonical type descriptor, which is a single - * representative type for each truly unique type. This canonical type is the - * one that will go into final deduplicated BTF type information. For - * struct/unions, it is also the type that algorithm will merge additional type - * information into (while resolving FWDs), as it discovers it from data in - * other CUs. Each input BTF type eventually gets either mapped to itself, if - * that type is canonical, or to some other type, if that type is equivalent - * and was chosen as canonical representative. This mapping is stored in - * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that - * FWD type got resolved to. - * - * To facilitate fast discovery of canonical types, we also maintain canonical - * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash - * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types - * that match that signature. With sufficiently good choice of type signature - * hashing function, we can limit number of canonical types for each unique type - * signature to a very small number, allowing to find canonical type for any - * duplicated type very quickly. - * - * Struct/union deduplication is the most critical part and algorithm for - * deduplicating structs/unions is described in greater details in comments for - * `btf_dedup_is_equiv` function. - */ -int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts) -{ - struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); - int err; - - if (IS_ERR(d)) { - pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); - return -EINVAL; - } - - err = btf_dedup_strings(d); - if (err < 0) { - pr_debug("btf_dedup_strings failed:%d\n", err); - goto done; - } - err = btf_dedup_prim_types(d); - if (err < 0) { - pr_debug("btf_dedup_prim_types failed:%d\n", err); - goto done; - } - err = btf_dedup_struct_types(d); - if (err < 0) { - pr_debug("btf_dedup_struct_types failed:%d\n", err); - goto done; - } - err = btf_dedup_ref_types(d); - if (err < 0) { - pr_debug("btf_dedup_ref_types failed:%d\n", err); - goto done; - } - err = btf_dedup_compact_types(d); - if (err < 0) { - pr_debug("btf_dedup_compact_types failed:%d\n", err); - goto done; - } - err = btf_dedup_remap_types(d); - if (err < 0) { - pr_debug("btf_dedup_remap_types failed:%d\n", err); - goto done; - } - -done: - btf_dedup_free(d); - return err; -} - -#define BTF_UNPROCESSED_ID ((__u32)-1) -#define BTF_IN_PROGRESS_ID ((__u32)-2) - -struct btf_dedup { - /* .BTF section to be deduped in-place */ - struct btf *btf; - /* - * Optional .BTF.ext section. When provided, any strings referenced - * from it will be taken into account when deduping strings - */ - struct btf_ext *btf_ext; - /* - * This is a map from any type's signature hash to a list of possible - * canonical representative type candidates. Hash collisions are - * ignored, so even types of various kinds can share same list of - * candidates, which is fine because we rely on subsequent - * btf_xxx_equal() checks to authoritatively verify type equality. - */ - struct hashmap *dedup_table; - /* Canonical types map */ - __u32 *map; - /* Hypothetical mapping, used during type graph equivalence checks */ - __u32 *hypot_map; - __u32 *hypot_list; - size_t hypot_cnt; - size_t hypot_cap; - /* Various option modifying behavior of algorithm */ - struct btf_dedup_opts opts; -}; - -struct btf_str_ptr { - const char *str; - __u32 new_off; - bool used; -}; - -struct btf_str_ptrs { - struct btf_str_ptr *ptrs; - const char *data; - __u32 cnt; - __u32 cap; -}; - -static long hash_combine(long h, long value) -{ - return h * 31 + value; -} - -#define for_each_dedup_cand(d, node, hash) \ - hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) - -static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id) -{ - return hashmap__append(d->dedup_table, - (void *)hash, (void *)(long)type_id); -} - -static int btf_dedup_hypot_map_add(struct btf_dedup *d, - __u32 from_id, __u32 to_id) -{ - if (d->hypot_cnt == d->hypot_cap) { - __u32 *new_list; - - d->hypot_cap += max(16, d->hypot_cap / 2); - new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); - if (!new_list) - return -ENOMEM; - d->hypot_list = new_list; - } - d->hypot_list[d->hypot_cnt++] = from_id; - d->hypot_map[from_id] = to_id; - return 0; -} - -static void btf_dedup_clear_hypot_map(struct btf_dedup *d) -{ - int i; - - for (i = 0; i < d->hypot_cnt; i++) - d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; - d->hypot_cnt = 0; -} - -static void btf_dedup_free(struct btf_dedup *d) -{ - hashmap__free(d->dedup_table); - d->dedup_table = NULL; - - free(d->map); - d->map = NULL; - - free(d->hypot_map); - d->hypot_map = NULL; - - free(d->hypot_list); - d->hypot_list = NULL; - - free(d); -} - -static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx) -{ - return (size_t)key; -} - -static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx) -{ - return 0; -} - -static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) -{ - return k1 == k2; -} - -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts) -{ - struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); - hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; - int i, err = 0; - - if (!d) - return ERR_PTR(-ENOMEM); - - d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; - /* dedup_table_size is now used only to force collisions in tests */ - if (opts && opts->dedup_table_size == 1) - hash_fn = btf_dedup_collision_hash_fn; - - d->btf = btf; - d->btf_ext = btf_ext; - - d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); - if (IS_ERR(d->dedup_table)) { - err = PTR_ERR(d->dedup_table); - d->dedup_table = NULL; - goto done; - } - - d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); - if (!d->map) { - err = -ENOMEM; - goto done; - } - /* special BTF "void" type is made canonical immediately */ - d->map[0] = 0; - for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = d->btf->types[i]; - - /* VAR and DATASEC are never deduped and are self-canonical */ - if (btf_is_var(t) || btf_is_datasec(t)) - d->map[i] = i; - else - d->map[i] = BTF_UNPROCESSED_ID; - } - - d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); - if (!d->hypot_map) { - err = -ENOMEM; - goto done; - } - for (i = 0; i <= btf->nr_types; i++) - d->hypot_map[i] = BTF_UNPROCESSED_ID; - -done: - if (err) { - btf_dedup_free(d); - return ERR_PTR(err); - } - - return d; -} - -typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); - -/* - * Iterate over all possible places in .BTF and .BTF.ext that can reference - * string and pass pointer to it to a provided callback `fn`. - */ -static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) -{ - void *line_data_cur, *line_data_end; - int i, j, r, rec_size; - struct btf_type *t; - - for (i = 1; i <= d->btf->nr_types; i++) { - t = d->btf->types[i]; - r = fn(&t->name_off, ctx); - if (r) - return r; - - switch (btf_kind(t)) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); - - for (j = 0; j < vlen; j++) { - r = fn(&m->name_off, ctx); - if (r) - return r; - m++; - } - break; - } - case BTF_KIND_ENUM: { - struct btf_enum *m = btf_enum(t); - __u16 vlen = btf_vlen(t); - - for (j = 0; j < vlen; j++) { - r = fn(&m->name_off, ctx); - if (r) - return r; - m++; - } - break; - } - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - __u16 vlen = btf_vlen(t); - - for (j = 0; j < vlen; j++) { - r = fn(&m->name_off, ctx); - if (r) - return r; - m++; - } - break; - } - default: - break; - } - } - - if (!d->btf_ext) - return 0; - - line_data_cur = d->btf_ext->line_info.info; - line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; - rec_size = d->btf_ext->line_info.rec_size; - - while (line_data_cur < line_data_end) { - struct btf_ext_info_sec *sec = line_data_cur; - struct bpf_line_info_min *line_info; - __u32 num_info = sec->num_info; - - r = fn(&sec->sec_name_off, ctx); - if (r) - return r; - - line_data_cur += sizeof(struct btf_ext_info_sec); - for (i = 0; i < num_info; i++) { - line_info = line_data_cur; - r = fn(&line_info->file_name_off, ctx); - if (r) - return r; - r = fn(&line_info->line_off, ctx); - if (r) - return r; - line_data_cur += rec_size; - } - } - - return 0; -} - -static int str_sort_by_content(const void *a1, const void *a2) -{ - const struct btf_str_ptr *p1 = a1; - const struct btf_str_ptr *p2 = a2; - - return strcmp(p1->str, p2->str); -} - -static int str_sort_by_offset(const void *a1, const void *a2) -{ - const struct btf_str_ptr *p1 = a1; - const struct btf_str_ptr *p2 = a2; - - if (p1->str != p2->str) - return p1->str < p2->str ? -1 : 1; - return 0; -} - -static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) -{ - const struct btf_str_ptr *p = pelem; - - if (str_ptr != p->str) - return (const char *)str_ptr < p->str ? -1 : 1; - return 0; -} - -static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) -{ - struct btf_str_ptrs *strs; - struct btf_str_ptr *s; - - if (*str_off_ptr == 0) - return 0; - - strs = ctx; - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); - if (!s) - return -EINVAL; - s->used = true; - return 0; -} - -static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) -{ - struct btf_str_ptrs *strs; - struct btf_str_ptr *s; - - if (*str_off_ptr == 0) - return 0; - - strs = ctx; - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); - if (!s) - return -EINVAL; - *str_off_ptr = s->new_off; - return 0; -} - -/* - * Dedup string and filter out those that are not referenced from either .BTF - * or .BTF.ext (if provided) sections. - * - * This is done by building index of all strings in BTF's string section, - * then iterating over all entities that can reference strings (e.g., type - * names, struct field names, .BTF.ext line info, etc) and marking corresponding - * strings as used. After that all used strings are deduped and compacted into - * sequential blob of memory and new offsets are calculated. Then all the string - * references are iterated again and rewritten using new offsets. - */ -static int btf_dedup_strings(struct btf_dedup *d) -{ - const struct btf_header *hdr = d->btf->hdr; - char *start = (char *)d->btf->nohdr_data + hdr->str_off; - char *end = start + d->btf->hdr->str_len; - char *p = start, *tmp_strs = NULL; - struct btf_str_ptrs strs = { - .cnt = 0, - .cap = 0, - .ptrs = NULL, - .data = start, - }; - int i, j, err = 0, grp_idx; - bool grp_used; - - /* build index of all strings */ - while (p < end) { - if (strs.cnt + 1 > strs.cap) { - struct btf_str_ptr *new_ptrs; - - strs.cap += max(strs.cnt / 2, 16); - new_ptrs = realloc(strs.ptrs, - sizeof(strs.ptrs[0]) * strs.cap); - if (!new_ptrs) { - err = -ENOMEM; - goto done; - } - strs.ptrs = new_ptrs; - } - - strs.ptrs[strs.cnt].str = p; - strs.ptrs[strs.cnt].used = false; - - p += strlen(p) + 1; - strs.cnt++; - } - - /* temporary storage for deduplicated strings */ - tmp_strs = malloc(d->btf->hdr->str_len); - if (!tmp_strs) { - err = -ENOMEM; - goto done; - } - - /* mark all used strings */ - strs.ptrs[0].used = true; - err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); - if (err) - goto done; - - /* sort strings by context, so that we can identify duplicates */ - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); - - /* - * iterate groups of equal strings and if any instance in a group was - * referenced, emit single instance and remember new offset - */ - p = tmp_strs; - grp_idx = 0; - grp_used = strs.ptrs[0].used; - /* iterate past end to avoid code duplication after loop */ - for (i = 1; i <= strs.cnt; i++) { - /* - * when i == strs.cnt, we want to skip string comparison and go - * straight to handling last group of strings (otherwise we'd - * need to handle last group after the loop w/ duplicated code) - */ - if (i < strs.cnt && - !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { - grp_used = grp_used || strs.ptrs[i].used; - continue; - } - - /* - * this check would have been required after the loop to handle - * last group of strings, but due to <= condition in a loop - * we avoid that duplication - */ - if (grp_used) { - int new_off = p - tmp_strs; - __u32 len = strlen(strs.ptrs[grp_idx].str); - - memmove(p, strs.ptrs[grp_idx].str, len + 1); - for (j = grp_idx; j < i; j++) - strs.ptrs[j].new_off = new_off; - p += len + 1; - } - - if (i < strs.cnt) { - grp_idx = i; - grp_used = strs.ptrs[i].used; - } - } - - /* replace original strings with deduped ones */ - d->btf->hdr->str_len = p - tmp_strs; - memmove(start, tmp_strs, d->btf->hdr->str_len); - end = start + d->btf->hdr->str_len; - - /* restore original order for further binary search lookups */ - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); - - /* remap string offsets */ - err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); - if (err) - goto done; - - d->btf->hdr->str_len = end - start; - -done: - free(tmp_strs); - free(strs.ptrs); - return err; -} - -static long btf_hash_common(struct btf_type *t) -{ - long h; - - h = hash_combine(0, t->name_off); - h = hash_combine(h, t->info); - h = hash_combine(h, t->size); - return h; -} - -static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) -{ - return t1->name_off == t2->name_off && - t1->info == t2->info && - t1->size == t2->size; -} - -/* Calculate type signature hash of INT. */ -static long btf_hash_int(struct btf_type *t) -{ - __u32 info = *(__u32 *)(t + 1); - long h; - - h = btf_hash_common(t); - h = hash_combine(h, info); - return h; -} - -/* Check structural equality of two INTs. */ -static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) -{ - __u32 info1, info2; - - if (!btf_equal_common(t1, t2)) - return false; - info1 = *(__u32 *)(t1 + 1); - info2 = *(__u32 *)(t2 + 1); - return info1 == info2; -} - -/* Calculate type signature hash of ENUM. */ -static long btf_hash_enum(struct btf_type *t) -{ - long h; - - /* don't hash vlen and enum members to support enum fwd resolving */ - h = hash_combine(0, t->name_off); - h = hash_combine(h, t->info & ~0xffff); - h = hash_combine(h, t->size); - return h; -} - -/* Check structural equality of two ENUMs. */ -static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) -{ - const struct btf_enum *m1, *m2; - __u16 vlen; - int i; - - if (!btf_equal_common(t1, t2)) - return false; - - vlen = btf_vlen(t1); - m1 = btf_enum(t1); - m2 = btf_enum(t2); - for (i = 0; i < vlen; i++) { - if (m1->name_off != m2->name_off || m1->val != m2->val) - return false; - m1++; - m2++; - } - return true; -} - -static inline bool btf_is_enum_fwd(struct btf_type *t) -{ - return btf_is_enum(t) && btf_vlen(t) == 0; -} - -static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) -{ - if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) - return btf_equal_enum(t1, t2); - /* ignore vlen when comparing */ - return t1->name_off == t2->name_off && - (t1->info & ~0xffff) == (t2->info & ~0xffff) && - t1->size == t2->size; -} - -/* - * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, - * as referenced type IDs equivalence is established separately during type - * graph equivalence check algorithm. - */ -static long btf_hash_struct(struct btf_type *t) -{ - const struct btf_member *member = btf_members(t); - __u32 vlen = btf_vlen(t); - long h = btf_hash_common(t); - int i; - - for (i = 0; i < vlen; i++) { - h = hash_combine(h, member->name_off); - h = hash_combine(h, member->offset); - /* no hashing of referenced type ID, it can be unresolved yet */ - member++; - } - return h; -} - -/* - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type - * IDs. This check is performed during type graph equivalence check and - * referenced types equivalence is checked separately. - */ -static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) -{ - const struct btf_member *m1, *m2; - __u16 vlen; - int i; - - if (!btf_equal_common(t1, t2)) - return false; - - vlen = btf_vlen(t1); - m1 = btf_members(t1); - m2 = btf_members(t2); - for (i = 0; i < vlen; i++) { - if (m1->name_off != m2->name_off || m1->offset != m2->offset) - return false; - m1++; - m2++; - } - return true; -} - -/* - * Calculate type signature hash of ARRAY, including referenced type IDs, - * under assumption that they were already resolved to canonical type IDs and - * are not going to change. - */ -static long btf_hash_array(struct btf_type *t) -{ - const struct btf_array *info = btf_array(t); - long h = btf_hash_common(t); - - h = hash_combine(h, info->type); - h = hash_combine(h, info->index_type); - h = hash_combine(h, info->nelems); - return h; -} - -/* - * Check exact equality of two ARRAYs, taking into account referenced - * type IDs, under assumption that they were already resolved to canonical - * type IDs and are not going to change. - * This function is called during reference types deduplication to compare - * ARRAY to potential canonical representative. - */ -static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) -{ - const struct btf_array *info1, *info2; - - if (!btf_equal_common(t1, t2)) - return false; - - info1 = btf_array(t1); - info2 = btf_array(t2); - return info1->type == info2->type && - info1->index_type == info2->index_type && - info1->nelems == info2->nelems; -} - -/* - * Check structural compatibility of two ARRAYs, ignoring referenced type - * IDs. This check is performed during type graph equivalence check and - * referenced types equivalence is checked separately. - */ -static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) -{ - if (!btf_equal_common(t1, t2)) - return false; - - return btf_array(t1)->nelems == btf_array(t2)->nelems; -} - -/* - * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, - * under assumption that they were already resolved to canonical type IDs and - * are not going to change. - */ -static long btf_hash_fnproto(struct btf_type *t) -{ - const struct btf_param *member = btf_params(t); - __u16 vlen = btf_vlen(t); - long h = btf_hash_common(t); - int i; - - for (i = 0; i < vlen; i++) { - h = hash_combine(h, member->name_off); - h = hash_combine(h, member->type); - member++; - } - return h; -} - -/* - * Check exact equality of two FUNC_PROTOs, taking into account referenced - * type IDs, under assumption that they were already resolved to canonical - * type IDs and are not going to change. - * This function is called during reference types deduplication to compare - * FUNC_PROTO to potential canonical representative. - */ -static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) -{ - const struct btf_param *m1, *m2; - __u16 vlen; - int i; - - if (!btf_equal_common(t1, t2)) - return false; - - vlen = btf_vlen(t1); - m1 = btf_params(t1); - m2 = btf_params(t2); - for (i = 0; i < vlen; i++) { - if (m1->name_off != m2->name_off || m1->type != m2->type) - return false; - m1++; - m2++; - } - return true; -} - -/* - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type - * IDs. This check is performed during type graph equivalence check and - * referenced types equivalence is checked separately. - */ -static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) -{ - const struct btf_param *m1, *m2; - __u16 vlen; - int i; - - /* skip return type ID */ - if (t1->name_off != t2->name_off || t1->info != t2->info) - return false; - - vlen = btf_vlen(t1); - m1 = btf_params(t1); - m2 = btf_params(t2); - for (i = 0; i < vlen; i++) { - if (m1->name_off != m2->name_off) - return false; - m1++; - m2++; - } - return true; -} - -/* - * Deduplicate primitive types, that can't reference other types, by calculating - * their type signature hash and comparing them with any possible canonical - * candidate. If no canonical candidate matches, type itself is marked as - * canonical and is added into `btf_dedup->dedup_table` as another candidate. - */ -static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) -{ - struct btf_type *t = d->btf->types[type_id]; - struct hashmap_entry *hash_entry; - struct btf_type *cand; - /* if we don't find equivalent type, then we are canonical */ - __u32 new_id = type_id; - __u32 cand_id; - long h; - - switch (btf_kind(t)) { - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_ARRAY: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_FUNC: - case BTF_KIND_FUNC_PROTO: - case BTF_KIND_VAR: - case BTF_KIND_DATASEC: - return 0; - - case BTF_KIND_INT: - h = btf_hash_int(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; - if (btf_equal_int(t, cand)) { - new_id = cand_id; - break; - } - } - break; - - case BTF_KIND_ENUM: - h = btf_hash_enum(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; - if (btf_equal_enum(t, cand)) { - new_id = cand_id; - break; - } - if (d->opts.dont_resolve_fwds) - continue; - if (btf_compat_enum(t, cand)) { - if (btf_is_enum_fwd(t)) { - /* resolve fwd to full enum */ - new_id = cand_id; - break; - } - /* resolve canonical enum fwd to full enum */ - d->map[cand_id] = type_id; - } - } - break; - - case BTF_KIND_FWD: - h = btf_hash_common(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; - if (btf_equal_common(t, cand)) { - new_id = cand_id; - break; - } - } - break; - - default: - return -EINVAL; - } - - d->map[type_id] = new_id; - if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) - return -ENOMEM; - - return 0; -} - -static int btf_dedup_prim_types(struct btf_dedup *d) -{ - int i, err; - - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_prim_type(d, i); - if (err) - return err; - } - return 0; -} - -/* - * Check whether type is already mapped into canonical one (could be to itself). - */ -static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) -{ - return d->map[type_id] <= BTF_MAX_NR_TYPES; -} - -/* - * Resolve type ID into its canonical type ID, if any; otherwise return original - * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow - * STRUCT/UNION link and resolve it into canonical type ID as well. - */ -static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) -{ - while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) - type_id = d->map[type_id]; - return type_id; -} - -/* - * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original - * type ID. - */ -static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) -{ - __u32 orig_type_id = type_id; - - if (!btf_is_fwd(d->btf->types[type_id])) - return type_id; - - while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) - type_id = d->map[type_id]; - - if (!btf_is_fwd(d->btf->types[type_id])) - return type_id; - - return orig_type_id; -} - - -static inline __u16 btf_fwd_kind(struct btf_type *t) -{ - return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; -} - -/* - * Check equivalence of BTF type graph formed by candidate struct/union (we'll - * call it "candidate graph" in this description for brevity) to a type graph - * formed by (potential) canonical struct/union ("canonical graph" for brevity - * here, though keep in mind that not all types in canonical graph are - * necessarily canonical representatives themselves, some of them might be - * duplicates or its uniqueness might not have been established yet). - * Returns: - * - >0, if type graphs are equivalent; - * - 0, if not equivalent; - * - <0, on error. - * - * Algorithm performs side-by-side DFS traversal of both type graphs and checks - * equivalence of BTF types at each step. If at any point BTF types in candidate - * and canonical graphs are not compatible structurally, whole graphs are - * incompatible. If types are structurally equivalent (i.e., all information - * except referenced type IDs is exactly the same), a mapping from `canon_id` to - * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). - * If a type references other types, then those referenced types are checked - * for equivalence recursively. - * - * During DFS traversal, if we find that for current `canon_id` type we - * already have some mapping in hypothetical map, we check for two possible - * situations: - * - `canon_id` is mapped to exactly the same type as `cand_id`. This will - * happen when type graphs have cycles. In this case we assume those two - * types are equivalent. - * - `canon_id` is mapped to different type. This is contradiction in our - * hypothetical mapping, because same graph in canonical graph corresponds - * to two different types in candidate graph, which for equivalent type - * graphs shouldn't happen. This condition terminates equivalence check - * with negative result. - * - * If type graphs traversal exhausts types to check and find no contradiction, - * then type graphs are equivalent. - * - * When checking types for equivalence, there is one special case: FWD types. - * If FWD type resolution is allowed and one of the types (either from canonical - * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind - * flag) and their names match, hypothetical mapping is updated to point from - * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, - * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. - * - * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, - * if there are two exactly named (or anonymous) structs/unions that are - * compatible structurally, one of which has FWD field, while other is concrete - * STRUCT/UNION, but according to C sources they are different structs/unions - * that are referencing different types with the same name. This is extremely - * unlikely to happen, but btf_dedup API allows to disable FWD resolution if - * this logic is causing problems. - * - * Doing FWD resolution means that both candidate and/or canonical graphs can - * consists of portions of the graph that come from multiple compilation units. - * This is due to the fact that types within single compilation unit are always - * deduplicated and FWDs are already resolved, if referenced struct/union - * definiton is available. So, if we had unresolved FWD and found corresponding - * STRUCT/UNION, they will be from different compilation units. This - * consequently means that when we "link" FWD to corresponding STRUCT/UNION, - * type graph will likely have at least two different BTF types that describe - * same type (e.g., most probably there will be two different BTF types for the - * same 'int' primitive type) and could even have "overlapping" parts of type - * graph that describe same subset of types. - * - * This in turn means that our assumption that each type in canonical graph - * must correspond to exactly one type in candidate graph might not hold - * anymore and will make it harder to detect contradictions using hypothetical - * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION - * resolution only in canonical graph. FWDs in candidate graphs are never - * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs - * that can occur: - * - Both types in canonical and candidate graphs are FWDs. If they are - * structurally equivalent, then they can either be both resolved to the - * same STRUCT/UNION or not resolved at all. In both cases they are - * equivalent and there is no need to resolve FWD on candidate side. - * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, - * so nothing to resolve as well, algorithm will check equivalence anyway. - * - Type in canonical graph is FWD, while type in candidate is concrete - * STRUCT/UNION. In this case candidate graph comes from single compilation - * unit, so there is exactly one BTF type for each unique C type. After - * resolving FWD into STRUCT/UNION, there might be more than one BTF type - * in canonical graph mapping to single BTF type in candidate graph, but - * because hypothetical mapping maps from canonical to candidate types, it's - * alright, and we still maintain the property of having single `canon_id` - * mapping to single `cand_id` (there could be two different `canon_id` - * mapped to the same `cand_id`, but it's not contradictory). - * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate - * graph is FWD. In this case we are just going to check compatibility of - * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll - * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to - * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs - * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from - * canonical graph. - */ -static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, - __u32 canon_id) -{ - struct btf_type *cand_type; - struct btf_type *canon_type; - __u32 hypot_type_id; - __u16 cand_kind; - __u16 canon_kind; - int i, eq; - - /* if both resolve to the same canonical, they must be equivalent */ - if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) - return 1; - - canon_id = resolve_fwd_id(d, canon_id); - - hypot_type_id = d->hypot_map[canon_id]; - if (hypot_type_id <= BTF_MAX_NR_TYPES) - return hypot_type_id == cand_id; - - if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) - return -ENOMEM; - - cand_type = d->btf->types[cand_id]; - canon_type = d->btf->types[canon_id]; - cand_kind = btf_kind(cand_type); - canon_kind = btf_kind(canon_type); - - if (cand_type->name_off != canon_type->name_off) - return 0; - - /* FWD <--> STRUCT/UNION equivalence check, if enabled */ - if (!d->opts.dont_resolve_fwds - && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) - && cand_kind != canon_kind) { - __u16 real_kind; - __u16 fwd_kind; - - if (cand_kind == BTF_KIND_FWD) { - real_kind = canon_kind; - fwd_kind = btf_fwd_kind(cand_type); - } else { - real_kind = cand_kind; - fwd_kind = btf_fwd_kind(canon_type); - } - return fwd_kind == real_kind; - } - - if (cand_kind != canon_kind) - return 0; - - switch (cand_kind) { - case BTF_KIND_INT: - return btf_equal_int(cand_type, canon_type); - - case BTF_KIND_ENUM: - if (d->opts.dont_resolve_fwds) - return btf_equal_enum(cand_type, canon_type); - else - return btf_compat_enum(cand_type, canon_type); - - case BTF_KIND_FWD: - return btf_equal_common(cand_type, canon_type); - - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - if (cand_type->info != canon_type->info) - return 0; - return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); - - case BTF_KIND_ARRAY: { - const struct btf_array *cand_arr, *canon_arr; - - if (!btf_compat_array(cand_type, canon_type)) - return 0; - cand_arr = btf_array(cand_type); - canon_arr = btf_array(canon_type); - eq = btf_dedup_is_equiv(d, - cand_arr->index_type, canon_arr->index_type); - if (eq <= 0) - return eq; - return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *cand_m, *canon_m; - __u16 vlen; - - if (!btf_shallow_equal_struct(cand_type, canon_type)) - return 0; - vlen = btf_vlen(cand_type); - cand_m = btf_members(cand_type); - canon_m = btf_members(canon_type); - for (i = 0; i < vlen; i++) { - eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); - if (eq <= 0) - return eq; - cand_m++; - canon_m++; - } - - return 1; - } - - case BTF_KIND_FUNC_PROTO: { - const struct btf_param *cand_p, *canon_p; - __u16 vlen; - - if (!btf_compat_fnproto(cand_type, canon_type)) - return 0; - eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); - if (eq <= 0) - return eq; - vlen = btf_vlen(cand_type); - cand_p = btf_params(cand_type); - canon_p = btf_params(canon_type); - for (i = 0; i < vlen; i++) { - eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); - if (eq <= 0) - return eq; - cand_p++; - canon_p++; - } - return 1; - } - - default: - return -EINVAL; - } - return 0; -} - -/* - * Use hypothetical mapping, produced by successful type graph equivalence - * check, to augment existing struct/union canonical mapping, where possible. - * - * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record - * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: - * it doesn't matter if FWD type was part of canonical graph or candidate one, - * we are recording the mapping anyway. As opposed to carefulness required - * for struct/union correspondence mapping (described below), for FWD resolution - * it's not important, as by the time that FWD type (reference type) will be - * deduplicated all structs/unions will be deduped already anyway. - * - * Recording STRUCT/UNION mapping is purely a performance optimization and is - * not required for correctness. It needs to be done carefully to ensure that - * struct/union from candidate's type graph is not mapped into corresponding - * struct/union from canonical type graph that itself hasn't been resolved into - * canonical representative. The only guarantee we have is that canonical - * struct/union was determined as canonical and that won't change. But any - * types referenced through that struct/union fields could have been not yet - * resolved, so in case like that it's too early to establish any kind of - * correspondence between structs/unions. - * - * No canonical correspondence is derived for primitive types (they are already - * deduplicated completely already anyway) or reference types (they rely on - * stability of struct/union canonical relationship for equivalence checks). - */ -static void btf_dedup_merge_hypot_map(struct btf_dedup *d) -{ - __u32 cand_type_id, targ_type_id; - __u16 t_kind, c_kind; - __u32 t_id, c_id; - int i; - - for (i = 0; i < d->hypot_cnt; i++) { - cand_type_id = d->hypot_list[i]; - targ_type_id = d->hypot_map[cand_type_id]; - t_id = resolve_type_id(d, targ_type_id); - c_id = resolve_type_id(d, cand_type_id); - t_kind = btf_kind(d->btf->types[t_id]); - c_kind = btf_kind(d->btf->types[c_id]); - /* - * Resolve FWD into STRUCT/UNION. - * It's ok to resolve FWD into STRUCT/UNION that's not yet - * mapped to canonical representative (as opposed to - * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because - * eventually that struct is going to be mapped and all resolved - * FWDs will automatically resolve to correct canonical - * representative. This will happen before ref type deduping, - * which critically depends on stability of these mapping. This - * stability is not a requirement for STRUCT/UNION equivalence - * checks, though. - */ - if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) - d->map[c_id] = t_id; - else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) - d->map[t_id] = c_id; - - if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && - c_kind != BTF_KIND_FWD && - is_type_mapped(d, c_id) && - !is_type_mapped(d, t_id)) { - /* - * as a perf optimization, we can map struct/union - * that's part of type graph we just verified for - * equivalence. We can do that for struct/union that has - * canonical representative only, though. - */ - d->map[t_id] = c_id; - } - } -} - -/* - * Deduplicate struct/union types. - * - * For each struct/union type its type signature hash is calculated, taking - * into account type's name, size, number, order and names of fields, but - * ignoring type ID's referenced from fields, because they might not be deduped - * completely until after reference types deduplication phase. This type hash - * is used to iterate over all potential canonical types, sharing same hash. - * For each canonical candidate we check whether type graphs that they form - * (through referenced types in fields and so on) are equivalent using algorithm - * implemented in `btf_dedup_is_equiv`. If such equivalence is found and - * BTF_KIND_FWD resolution is allowed, then hypothetical mapping - * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence - * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to - * potentially map other structs/unions to their canonical representatives, - * if such relationship hasn't yet been established. This speeds up algorithm - * by eliminating some of the duplicate work. - * - * If no matching canonical representative was found, struct/union is marked - * as canonical for itself and is added into btf_dedup->dedup_table hash map - * for further look ups. - */ -static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) -{ - struct btf_type *cand_type, *t; - struct hashmap_entry *hash_entry; - /* if we don't find equivalent type, then we are canonical */ - __u32 new_id = type_id; - __u16 kind; - long h; - - /* already deduped or is in process of deduping (loop detected) */ - if (d->map[type_id] <= BTF_MAX_NR_TYPES) - return 0; - - t = d->btf->types[type_id]; - kind = btf_kind(t); - - if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) - return 0; - - h = btf_hash_struct(t); - for_each_dedup_cand(d, hash_entry, h) { - __u32 cand_id = (__u32)(long)hash_entry->value; - int eq; - - /* - * Even though btf_dedup_is_equiv() checks for - * btf_shallow_equal_struct() internally when checking two - * structs (unions) for equivalence, we need to guard here - * from picking matching FWD type as a dedup candidate. - * This can happen due to hash collision. In such case just - * relying on btf_dedup_is_equiv() would lead to potentially - * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because - * FWD and compatible STRUCT/UNION are considered equivalent. - */ - cand_type = d->btf->types[cand_id]; - if (!btf_shallow_equal_struct(t, cand_type)) - continue; - - btf_dedup_clear_hypot_map(d); - eq = btf_dedup_is_equiv(d, type_id, cand_id); - if (eq < 0) - return eq; - if (!eq) - continue; - new_id = cand_id; - btf_dedup_merge_hypot_map(d); - break; - } - - d->map[type_id] = new_id; - if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) - return -ENOMEM; - - return 0; -} - -static int btf_dedup_struct_types(struct btf_dedup *d) -{ - int i, err; - - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_struct_type(d, i); - if (err) - return err; - } - return 0; -} - -/* - * Deduplicate reference type. - * - * Once all primitive and struct/union types got deduplicated, we can easily - * deduplicate all other (reference) BTF types. This is done in two steps: - * - * 1. Resolve all referenced type IDs into their canonical type IDs. This - * resolution can be done either immediately for primitive or struct/union types - * (because they were deduped in previous two phases) or recursively for - * reference types. Recursion will always terminate at either primitive or - * struct/union type, at which point we can "unwind" chain of reference types - * one by one. There is no danger of encountering cycles because in C type - * system the only way to form type cycle is through struct/union, so any chain - * of reference types, even those taking part in a type cycle, will inevitably - * reach struct/union at some point. - * - * 2. Once all referenced type IDs are resolved into canonical ones, BTF type - * becomes "stable", in the sense that no further deduplication will cause - * any changes to it. With that, it's now possible to calculate type's signature - * hash (this time taking into account referenced type IDs) and loop over all - * potential canonical representatives. If no match was found, current type - * will become canonical representative of itself and will be added into - * btf_dedup->dedup_table as another possible canonical representative. - */ -static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) -{ - struct hashmap_entry *hash_entry; - __u32 new_id = type_id, cand_id; - struct btf_type *t, *cand; - /* if we don't find equivalent type, then we are representative type */ - int ref_type_id; - long h; - - if (d->map[type_id] == BTF_IN_PROGRESS_ID) - return -ELOOP; - if (d->map[type_id] <= BTF_MAX_NR_TYPES) - return resolve_type_id(d, type_id); - - t = d->btf->types[type_id]; - d->map[type_id] = BTF_IN_PROGRESS_ID; - - switch (btf_kind(t)) { - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - ref_type_id = btf_dedup_ref_type(d, t->type); - if (ref_type_id < 0) - return ref_type_id; - t->type = ref_type_id; - - h = btf_hash_common(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; - if (btf_equal_common(t, cand)) { - new_id = cand_id; - break; - } - } - break; - - case BTF_KIND_ARRAY: { - struct btf_array *info = btf_array(t); - - ref_type_id = btf_dedup_ref_type(d, info->type); - if (ref_type_id < 0) - return ref_type_id; - info->type = ref_type_id; - - ref_type_id = btf_dedup_ref_type(d, info->index_type); - if (ref_type_id < 0) - return ref_type_id; - info->index_type = ref_type_id; - - h = btf_hash_array(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; - if (btf_equal_array(t, cand)) { - new_id = cand_id; - break; - } - } - break; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *param; - __u16 vlen; - int i; - - ref_type_id = btf_dedup_ref_type(d, t->type); - if (ref_type_id < 0) - return ref_type_id; - t->type = ref_type_id; - - vlen = btf_vlen(t); - param = btf_params(t); - for (i = 0; i < vlen; i++) { - ref_type_id = btf_dedup_ref_type(d, param->type); - if (ref_type_id < 0) - return ref_type_id; - param->type = ref_type_id; - param++; - } - - h = btf_hash_fnproto(t); - for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; - cand = d->btf->types[cand_id]; - if (btf_equal_fnproto(t, cand)) { - new_id = cand_id; - break; - } - } - break; - } - - default: - return -EINVAL; - } - - d->map[type_id] = new_id; - if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) - return -ENOMEM; - - return new_id; -} - -static int btf_dedup_ref_types(struct btf_dedup *d) -{ - int i, err; - - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_ref_type(d, i); - if (err < 0) - return err; - } - /* we won't need d->dedup_table anymore */ - hashmap__free(d->dedup_table); - d->dedup_table = NULL; - return 0; -} - -/* - * Compact types. - * - * After we established for each type its corresponding canonical representative - * type, we now can eliminate types that are not canonical and leave only - * canonical ones layed out sequentially in memory by copying them over - * duplicates. During compaction btf_dedup->hypot_map array is reused to store - * a map from original type ID to a new compacted type ID, which will be used - * during next phase to "fix up" type IDs, referenced from struct/union and - * reference types. - */ -static int btf_dedup_compact_types(struct btf_dedup *d) -{ - struct btf_type **new_types; - __u32 next_type_id = 1; - char *types_start, *p; - int i, len; - - /* we are going to reuse hypot_map to store compaction remapping */ - d->hypot_map[0] = 0; - for (i = 1; i <= d->btf->nr_types; i++) - d->hypot_map[i] = BTF_UNPROCESSED_ID; - - types_start = d->btf->nohdr_data + d->btf->hdr->type_off; - p = types_start; - - for (i = 1; i <= d->btf->nr_types; i++) { - if (d->map[i] != i) - continue; - - len = btf_type_size(d->btf->types[i]); - if (len < 0) - return len; - - memmove(p, d->btf->types[i], len); - d->hypot_map[i] = next_type_id; - d->btf->types[next_type_id] = (struct btf_type *)p; - p += len; - next_type_id++; - } - - /* shrink struct btf's internal types index and update btf_header */ - d->btf->nr_types = next_type_id - 1; - d->btf->types_size = d->btf->nr_types; - d->btf->hdr->type_len = p - types_start; - new_types = realloc(d->btf->types, - (1 + d->btf->nr_types) * sizeof(struct btf_type *)); - if (!new_types) - return -ENOMEM; - d->btf->types = new_types; - - /* make sure string section follows type information without gaps */ - d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; - memmove(p, d->btf->strings, d->btf->hdr->str_len); - d->btf->strings = p; - p += d->btf->hdr->str_len; - - d->btf->data_size = p - (char *)d->btf->data; - return 0; -} - -/* - * Figure out final (deduplicated and compacted) type ID for provided original - * `type_id` by first resolving it into corresponding canonical type ID and - * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, - * which is populated during compaction phase. - */ -static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) -{ - __u32 resolved_type_id, new_type_id; - - resolved_type_id = resolve_type_id(d, type_id); - new_type_id = d->hypot_map[resolved_type_id]; - if (new_type_id > BTF_MAX_NR_TYPES) - return -EINVAL; - return new_type_id; -} - -/* - * Remap referenced type IDs into deduped type IDs. - * - * After BTF types are deduplicated and compacted, their final type IDs may - * differ from original ones. The map from original to a corresponding - * deduped type ID is stored in btf_dedup->hypot_map and is populated during - * compaction phase. During remapping phase we are rewriting all type IDs - * referenced from any BTF type (e.g., struct fields, func proto args, etc) to - * their final deduped type IDs. - */ -static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) -{ - struct btf_type *t = d->btf->types[type_id]; - int i, r; - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_ENUM: - break; - - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - r = btf_dedup_remap_type_id(d, t->type); - if (r < 0) - return r; - t->type = r; - break; - - case BTF_KIND_ARRAY: { - struct btf_array *arr_info = btf_array(t); - - r = btf_dedup_remap_type_id(d, arr_info->type); - if (r < 0) - return r; - arr_info->type = r; - r = btf_dedup_remap_type_id(d, arr_info->index_type); - if (r < 0) - return r; - arr_info->index_type = r; - break; - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *member = btf_members(t); - __u16 vlen = btf_vlen(t); - - for (i = 0; i < vlen; i++) { - r = btf_dedup_remap_type_id(d, member->type); - if (r < 0) - return r; - member->type = r; - member++; - } - break; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *param = btf_params(t); - __u16 vlen = btf_vlen(t); - - r = btf_dedup_remap_type_id(d, t->type); - if (r < 0) - return r; - t->type = r; - - for (i = 0; i < vlen; i++) { - r = btf_dedup_remap_type_id(d, param->type); - if (r < 0) - return r; - param->type = r; - param++; - } - break; - } - - case BTF_KIND_DATASEC: { - struct btf_var_secinfo *var = btf_var_secinfos(t); - __u16 vlen = btf_vlen(t); - - for (i = 0; i < vlen; i++) { - r = btf_dedup_remap_type_id(d, var->type); - if (r < 0) - return r; - var->type = r; - var++; - } - break; - } - - default: - return -EINVAL; - } - - return 0; -} - -static int btf_dedup_remap_types(struct btf_dedup *d) -{ - int i, r; - - for (i = 1; i <= d->btf->nr_types; i++) { - r = btf_dedup_remap_type(d, i); - if (r < 0) - return r; - } - return 0; -} diff --git a/src/contrib/libbpf/bpf/btf.h b/src/contrib/libbpf/bpf/btf.h deleted file mode 100644 index d9ac73a02..000000000 --- a/src/contrib/libbpf/bpf/btf.h +++ /dev/null @@ -1,311 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -/* Copyright (c) 2018 Facebook */ - -#ifndef __LIBBPF_BTF_H -#define __LIBBPF_BTF_H - -#include <stdarg.h> -#include <linux/btf.h> -#include <linux/types.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - -#define BTF_ELF_SEC ".BTF" -#define BTF_EXT_ELF_SEC ".BTF.ext" -#define MAPS_ELF_SEC ".maps" - -struct btf; -struct btf_ext; -struct btf_type; - -struct bpf_object; - -/* - * The .BTF.ext ELF section layout defined as - * struct btf_ext_header - * func_info subsection - * - * The func_info subsection layout: - * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 - * a list of bpf_func_info records for section #1 - * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h - * but may not be identical - * struct btf_sec_func_info for section #2 - * a list of bpf_func_info records for section #2 - * ...... - * - * Note that the bpf_func_info record size in .BTF.ext may not - * be the same as the one defined in include/uapi/linux/bpf.h. - * The loader should ensure that record_size meets minimum - * requirement and pass the record as is to the kernel. The - * kernel will handle the func_info properly based on its contents. - */ -struct btf_ext_header { - __u16 magic; - __u8 version; - __u8 flags; - __u32 hdr_len; - - /* All offsets are in bytes relative to the end of this header */ - __u32 func_info_off; - __u32 func_info_len; - __u32 line_info_off; - __u32 line_info_len; - - /* optional part of .BTF.ext header */ - __u32 field_reloc_off; - __u32 field_reloc_len; -}; - -LIBBPF_API void btf__free(struct btf *btf); -LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); -LIBBPF_API struct btf *btf__parse_elf(const char *path, - struct btf_ext **btf_ext); -LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); -LIBBPF_API int btf__load(struct btf *btf); -LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, - const char *type_name); -LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, - const char *type_name, __u32 kind); -LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); -LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, - __u32 id); -LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); -LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); -LIBBPF_API int btf__fd(const struct btf *btf); -LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); -LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, - __u32 expected_key_size, - __u32 expected_value_size, - __u32 *key_type_id, __u32 *value_type_id); - -LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); -LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); -LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, - __u32 *size); -LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt); -LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); -LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); - -struct btf_dedup_opts { - unsigned int dedup_table_size; - bool dont_resolve_fwds; -}; - -LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts); - -struct btf_dump; - -struct btf_dump_opts { - void *ctx; -}; - -typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); - -LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn); -LIBBPF_API void btf_dump__free(struct btf_dump *d); - -LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); - -/* - * A set of helpers for easier BTF types handling - */ -static inline __u16 btf_kind(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info); -} - -static inline __u16 btf_vlen(const struct btf_type *t) -{ - return BTF_INFO_VLEN(t->info); -} - -static inline bool btf_kflag(const struct btf_type *t) -{ - return BTF_INFO_KFLAG(t->info); -} - -static inline bool btf_is_int(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_INT; -} - -static inline bool btf_is_ptr(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_PTR; -} - -static inline bool btf_is_array(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_ARRAY; -} - -static inline bool btf_is_struct(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_STRUCT; -} - -static inline bool btf_is_union(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_UNION; -} - -static inline bool btf_is_composite(const struct btf_type *t) -{ - __u16 kind = btf_kind(t); - - return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; -} - -static inline bool btf_is_enum(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_ENUM; -} - -static inline bool btf_is_fwd(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_FWD; -} - -static inline bool btf_is_typedef(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_TYPEDEF; -} - -static inline bool btf_is_volatile(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_VOLATILE; -} - -static inline bool btf_is_const(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_CONST; -} - -static inline bool btf_is_restrict(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_RESTRICT; -} - -static inline bool btf_is_mod(const struct btf_type *t) -{ - __u16 kind = btf_kind(t); - - return kind == BTF_KIND_VOLATILE || - kind == BTF_KIND_CONST || - kind == BTF_KIND_RESTRICT; -} - -static inline bool btf_is_func(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_FUNC; -} - -static inline bool btf_is_func_proto(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_FUNC_PROTO; -} - -static inline bool btf_is_var(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_VAR; -} - -static inline bool btf_is_datasec(const struct btf_type *t) -{ - return btf_kind(t) == BTF_KIND_DATASEC; -} - -static inline __u8 btf_int_encoding(const struct btf_type *t) -{ - return BTF_INT_ENCODING(*(__u32 *)(t + 1)); -} - -static inline __u8 btf_int_offset(const struct btf_type *t) -{ - return BTF_INT_OFFSET(*(__u32 *)(t + 1)); -} - -static inline __u8 btf_int_bits(const struct btf_type *t) -{ - return BTF_INT_BITS(*(__u32 *)(t + 1)); -} - -static inline struct btf_array *btf_array(const struct btf_type *t) -{ - return (struct btf_array *)(t + 1); -} - -static inline struct btf_enum *btf_enum(const struct btf_type *t) -{ - return (struct btf_enum *)(t + 1); -} - -static inline struct btf_member *btf_members(const struct btf_type *t) -{ - return (struct btf_member *)(t + 1); -} - -/* Get bit offset of a member with specified index. */ -static inline __u32 btf_member_bit_offset(const struct btf_type *t, - __u32 member_idx) -{ - const struct btf_member *m = btf_members(t) + member_idx; - bool kflag = btf_kflag(t); - - return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset; -} -/* - * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or - * BTF_KIND_UNION. If member is not a bitfield, zero is returned. - */ -static inline __u32 btf_member_bitfield_size(const struct btf_type *t, - __u32 member_idx) -{ - const struct btf_member *m = btf_members(t) + member_idx; - bool kflag = btf_kflag(t); - - return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; -} - -static inline struct btf_param *btf_params(const struct btf_type *t) -{ - return (struct btf_param *)(t + 1); -} - -static inline struct btf_var *btf_var(const struct btf_type *t) -{ - return (struct btf_var *)(t + 1); -} - -static inline struct btf_var_secinfo * -btf_var_secinfos(const struct btf_type *t) -{ - return (struct btf_var_secinfo *)(t + 1); -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __LIBBPF_BTF_H */ diff --git a/src/contrib/libbpf/bpf/btf_dump.c b/src/contrib/libbpf/bpf/btf_dump.c deleted file mode 100644 index cb126d8fc..000000000 --- a/src/contrib/libbpf/bpf/btf_dump.c +++ /dev/null @@ -1,1386 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * BTF-to-C type converter. - * - * Copyright (c) 2019 Facebook - */ - -#include <stdbool.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <linux/err.h> -#include <linux/btf.h> -#include "btf.h" -#include "hashmap.h" -#include "libbpf.h" -#include "libbpf_internal.h" - -static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; -static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; - -static const char *pfx(int lvl) -{ - return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl]; -} - -enum btf_dump_type_order_state { - NOT_ORDERED, - ORDERING, - ORDERED, -}; - -enum btf_dump_type_emit_state { - NOT_EMITTED, - EMITTING, - EMITTED, -}; - -/* per-type auxiliary state */ -struct btf_dump_type_aux_state { - /* topological sorting state */ - enum btf_dump_type_order_state order_state: 2; - /* emitting state used to determine the need for forward declaration */ - enum btf_dump_type_emit_state emit_state: 2; - /* whether forward declaration was already emitted */ - __u8 fwd_emitted: 1; - /* whether unique non-duplicate name was already assigned */ - __u8 name_resolved: 1; - /* whether type is referenced from any other type */ - __u8 referenced: 1; -}; - -struct btf_dump { - const struct btf *btf; - const struct btf_ext *btf_ext; - btf_dump_printf_fn_t printf_fn; - struct btf_dump_opts opts; - - /* per-type auxiliary state */ - struct btf_dump_type_aux_state *type_states; - /* per-type optional cached unique name, must be freed, if present */ - const char **cached_names; - - /* topo-sorted list of dependent type definitions */ - __u32 *emit_queue; - int emit_queue_cap; - int emit_queue_cnt; - - /* - * stack of type declarations (e.g., chain of modifiers, arrays, - * funcs, etc) - */ - __u32 *decl_stack; - int decl_stack_cap; - int decl_stack_cnt; - - /* maps struct/union/enum name to a number of name occurrences */ - struct hashmap *type_names; - /* - * maps typedef identifiers and enum value names to a number of such - * name occurrences - */ - struct hashmap *ident_names; -}; - -static size_t str_hash_fn(const void *key, void *ctx) -{ - const char *s = key; - size_t h = 0; - - while (*s) { - h = h * 31 + *s; - s++; - } - return h; -} - -static bool str_equal_fn(const void *a, const void *b, void *ctx) -{ - return strcmp(a, b) == 0; -} - -static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) -{ - return btf__name_by_offset(d->btf, name_off); -} - -static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - d->printf_fn(d->opts.ctx, fmt, args); - va_end(args); -} - -struct btf_dump *btf_dump__new(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn) -{ - struct btf_dump *d; - int err; - - d = calloc(1, sizeof(struct btf_dump)); - if (!d) - return ERR_PTR(-ENOMEM); - - d->btf = btf; - d->btf_ext = btf_ext; - d->printf_fn = printf_fn; - d->opts.ctx = opts ? opts->ctx : NULL; - - d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); - if (IS_ERR(d->type_names)) { - err = PTR_ERR(d->type_names); - d->type_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); - } - d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); - if (IS_ERR(d->ident_names)) { - err = PTR_ERR(d->ident_names); - d->ident_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); - } - - return d; -} - -void btf_dump__free(struct btf_dump *d) -{ - int i, cnt; - - if (!d) - return; - - free(d->type_states); - if (d->cached_names) { - /* any set cached name is owned by us and should be freed */ - for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) { - if (d->cached_names[i]) - free((void *)d->cached_names[i]); - } - } - free(d->cached_names); - free(d->emit_queue); - free(d->decl_stack); - hashmap__free(d->type_names); - hashmap__free(d->ident_names); - - free(d); -} - -static int btf_dump_mark_referenced(struct btf_dump *d); -static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); -static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); - -/* - * Dump BTF type in a compilable C syntax, including all the necessary - * dependent types, necessary for compilation. If some of the dependent types - * were already emitted as part of previous btf_dump__dump_type() invocation - * for another type, they won't be emitted again. This API allows callers to - * filter out BTF types according to user-defined criterias and emitted only - * minimal subset of types, necessary to compile everything. Full struct/union - * definitions will still be emitted, even if the only usage is through - * pointer and could be satisfied with just a forward declaration. - * - * Dumping is done in two high-level passes: - * 1. Topologically sort type definitions to satisfy C rules of compilation. - * 2. Emit type definitions in C syntax. - * - * Returns 0 on success; <0, otherwise. - */ -int btf_dump__dump_type(struct btf_dump *d, __u32 id) -{ - int err, i; - - if (id > btf__get_nr_types(d->btf)) - return -EINVAL; - - /* type states are lazily allocated, as they might not be needed */ - if (!d->type_states) { - d->type_states = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->type_states[0])); - if (!d->type_states) - return -ENOMEM; - d->cached_names = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->cached_names[0])); - if (!d->cached_names) - return -ENOMEM; - - /* VOID is special */ - d->type_states[0].order_state = ORDERED; - d->type_states[0].emit_state = EMITTED; - - /* eagerly determine referenced types for anon enums */ - err = btf_dump_mark_referenced(d); - if (err) - return err; - } - - d->emit_queue_cnt = 0; - err = btf_dump_order_type(d, id, false); - if (err < 0) - return err; - - for (i = 0; i < d->emit_queue_cnt; i++) - btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/); - - return 0; -} - -/* - * Mark all types that are referenced from any other type. This is used to - * determine top-level anonymous enums that need to be emitted as an - * independent type declarations. - * Anonymous enums come in two flavors: either embedded in a struct's field - * definition, in which case they have to be declared inline as part of field - * type declaration; or as a top-level anonymous enum, typically used for - * declaring global constants. It's impossible to distinguish between two - * without knowning whether given enum type was referenced from other type: - * top-level anonymous enum won't be referenced by anything, while embedded - * one will. - */ -static int btf_dump_mark_referenced(struct btf_dump *d) -{ - int i, j, n = btf__get_nr_types(d->btf); - const struct btf_type *t; - __u16 vlen; - - for (i = 1; i <= n; i++) { - t = btf__type_by_id(d->btf, i); - vlen = btf_vlen(t); - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - break; - - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - d->type_states[t->type].referenced = 1; - break; - - case BTF_KIND_ARRAY: { - const struct btf_array *a = btf_array(t); - - d->type_states[a->index_type].referenced = 1; - d->type_states[a->type].referenced = 1; - break; - } - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *m = btf_members(t); - - for (j = 0; j < vlen; j++, m++) - d->type_states[m->type].referenced = 1; - break; - } - case BTF_KIND_FUNC_PROTO: { - const struct btf_param *p = btf_params(t); - - for (j = 0; j < vlen; j++, p++) - d->type_states[p->type].referenced = 1; - break; - } - case BTF_KIND_DATASEC: { - const struct btf_var_secinfo *v = btf_var_secinfos(t); - - for (j = 0; j < vlen; j++, v++) - d->type_states[v->type].referenced = 1; - break; - } - default: - return -EINVAL; - } - } - return 0; -} -static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) -{ - __u32 *new_queue; - size_t new_cap; - - if (d->emit_queue_cnt >= d->emit_queue_cap) { - new_cap = max(16, d->emit_queue_cap * 3 / 2); - new_queue = realloc(d->emit_queue, - new_cap * sizeof(new_queue[0])); - if (!new_queue) - return -ENOMEM; - d->emit_queue = new_queue; - d->emit_queue_cap = new_cap; - } - - d->emit_queue[d->emit_queue_cnt++] = id; - return 0; -} - -/* - * Determine order of emitting dependent types and specified type to satisfy - * C compilation rules. This is done through topological sorting with an - * additional complication which comes from C rules. The main idea for C is - * that if some type is "embedded" into a struct/union, it's size needs to be - * known at the time of definition of containing type. E.g., for: - * - * struct A {}; - * struct B { struct A x; } - * - * struct A *HAS* to be defined before struct B, because it's "embedded", - * i.e., it is part of struct B layout. But in the following case: - * - * struct A; - * struct B { struct A *x; } - * struct A {}; - * - * it's enough to just have a forward declaration of struct A at the time of - * struct B definition, as struct B has a pointer to struct A, so the size of - * field x is known without knowing struct A size: it's sizeof(void *). - * - * Unfortunately, there are some trickier cases we need to handle, e.g.: - * - * struct A {}; // if this was forward-declaration: compilation error - * struct B { - * struct { // anonymous struct - * struct A y; - * } *x; - * }; - * - * In this case, struct B's field x is a pointer, so it's size is known - * regardless of the size of (anonymous) struct it points to. But because this - * struct is anonymous and thus defined inline inside struct B, *and* it - * embeds struct A, compiler requires full definition of struct A to be known - * before struct B can be defined. This creates a transitive dependency - * between struct A and struct B. If struct A was forward-declared before - * struct B definition and fully defined after struct B definition, that would - * trigger compilation error. - * - * All this means that while we are doing topological sorting on BTF type - * graph, we need to determine relationships between different types (graph - * nodes): - * - weak link (relationship) between X and Y, if Y *CAN* be - * forward-declared at the point of X definition; - * - strong link, if Y *HAS* to be fully-defined before X can be defined. - * - * The rule is as follows. Given a chain of BTF types from X to Y, if there is - * BTF_KIND_PTR type in the chain and at least one non-anonymous type - * Z (excluding X, including Y), then link is weak. Otherwise, it's strong. - * Weak/strong relationship is determined recursively during DFS traversal and - * is returned as a result from btf_dump_order_type(). - * - * btf_dump_order_type() is trying to avoid unnecessary forward declarations, - * but it is not guaranteeing that no extraneous forward declarations will be - * emitted. - * - * To avoid extra work, algorithm marks some of BTF types as ORDERED, when - * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT, - * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the - * entire graph path, so depending where from one came to that BTF type, it - * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM, - * once they are processed, there is no need to do it again, so they are - * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces - * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But - * in any case, once those are processed, no need to do it again, as the - * result won't change. - * - * Returns: - * - 1, if type is part of strong link (so there is strong topological - * ordering requirements); - * - 0, if type is part of weak link (so can be satisfied through forward - * declaration); - * - <0, on error (e.g., unsatisfiable type loop detected). - */ -static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) -{ - /* - * Order state is used to detect strong link cycles, but only for BTF - * kinds that are or could be an independent definition (i.e., - * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays, - * func_protos, modifiers are just means to get to these definitions. - * Int/void don't need definitions, they are assumed to be always - * properly defined. We also ignore datasec, var, and funcs for now. - * So for all non-defining kinds, we never even set ordering state, - * for defining kinds we set ORDERING and subsequently ORDERED if it - * forms a strong link. - */ - struct btf_dump_type_aux_state *tstate = &d->type_states[id]; - const struct btf_type *t; - __u16 vlen; - int err, i; - - /* return true, letting typedefs know that it's ok to be emitted */ - if (tstate->order_state == ORDERED) - return 1; - - t = btf__type_by_id(d->btf, id); - - if (tstate->order_state == ORDERING) { - /* type loop, but resolvable through fwd declaration */ - if (btf_is_composite(t) && through_ptr && t->name_off != 0) - return 0; - pr_warn("unsatisfiable type cycle, id:[%u]\n", id); - return -ELOOP; - } - - switch (btf_kind(t)) { - case BTF_KIND_INT: - tstate->order_state = ORDERED; - return 0; - - case BTF_KIND_PTR: - err = btf_dump_order_type(d, t->type, true); - tstate->order_state = ORDERED; - return err; - - case BTF_KIND_ARRAY: - return btf_dump_order_type(d, btf_array(t)->type, through_ptr); - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *m = btf_members(t); - /* - * struct/union is part of strong link, only if it's embedded - * (so no ptr in a path) or it's anonymous (so has to be - * defined inline, even if declared through ptr) - */ - if (through_ptr && t->name_off != 0) - return 0; - - tstate->order_state = ORDERING; - - vlen = btf_vlen(t); - for (i = 0; i < vlen; i++, m++) { - err = btf_dump_order_type(d, m->type, false); - if (err < 0) - return err; - } - - if (t->name_off != 0) { - err = btf_dump_add_emit_queue_id(d, id); - if (err < 0) - return err; - } - - tstate->order_state = ORDERED; - return 1; - } - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - /* - * non-anonymous or non-referenced enums are top-level - * declarations and should be emitted. Same logic can be - * applied to FWDs, it won't hurt anyways. - */ - if (t->name_off != 0 || !tstate->referenced) { - err = btf_dump_add_emit_queue_id(d, id); - if (err) - return err; - } - tstate->order_state = ORDERED; - return 1; - - case BTF_KIND_TYPEDEF: { - int is_strong; - - is_strong = btf_dump_order_type(d, t->type, through_ptr); - if (is_strong < 0) - return is_strong; - - /* typedef is similar to struct/union w.r.t. fwd-decls */ - if (through_ptr && !is_strong) - return 0; - - /* typedef is always a named definition */ - err = btf_dump_add_emit_queue_id(d, id); - if (err) - return err; - - d->type_states[id].order_state = ORDERED; - return 1; - } - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - return btf_dump_order_type(d, t->type, through_ptr); - - case BTF_KIND_FUNC_PROTO: { - const struct btf_param *p = btf_params(t); - bool is_strong; - - err = btf_dump_order_type(d, t->type, through_ptr); - if (err < 0) - return err; - is_strong = err > 0; - - vlen = btf_vlen(t); - for (i = 0; i < vlen; i++, p++) { - err = btf_dump_order_type(d, p->type, through_ptr); - if (err < 0) - return err; - if (err > 0) - is_strong = true; - } - return is_strong; - } - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - case BTF_KIND_DATASEC: - d->type_states[id].order_state = ORDERED; - return 0; - - default: - return -EINVAL; - } -} - -static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, - const struct btf_type *t); -static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, int lvl); - -static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, - const struct btf_type *t); -static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, int lvl); - -static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, - const struct btf_type *t); - -static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, int lvl); - -/* a local view into a shared stack */ -struct id_stack { - const __u32 *ids; - int cnt; -}; - -static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, - const char *fname, int lvl); -static void btf_dump_emit_type_chain(struct btf_dump *d, - struct id_stack *decl_stack, - const char *fname, int lvl); - -static const char *btf_dump_type_name(struct btf_dump *d, __u32 id); -static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id); -static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, - const char *orig_name); - -static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id) -{ - const struct btf_type *t = btf__type_by_id(d->btf, id); - - /* __builtin_va_list is a compiler built-in, which causes compilation - * errors, when compiling w/ different compiler, then used to compile - * original code (e.g., GCC to compile kernel, Clang to use generated - * C header from BTF). As it is built-in, it should be already defined - * properly internally in compiler. - */ - if (t->name_off == 0) - return false; - return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0; -} - -/* - * Emit C-syntax definitions of types from chains of BTF types. - * - * High-level handling of determining necessary forward declarations are handled - * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type - * declarations/definitions in C syntax are handled by a combo of - * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to - * corresponding btf_dump_emit_*_{def,fwd}() functions. - * - * We also keep track of "containing struct/union type ID" to determine when - * we reference it from inside and thus can avoid emitting unnecessary forward - * declaration. - * - * This algorithm is designed in such a way, that even if some error occurs - * (either technical, e.g., out of memory, or logical, i.e., malformed BTF - * that doesn't comply to C rules completely), algorithm will try to proceed - * and produce as much meaningful output as possible. - */ -static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) -{ - struct btf_dump_type_aux_state *tstate = &d->type_states[id]; - bool top_level_def = cont_id == 0; - const struct btf_type *t; - __u16 kind; - - if (tstate->emit_state == EMITTED) - return; - - t = btf__type_by_id(d->btf, id); - kind = btf_kind(t); - - if (tstate->emit_state == EMITTING) { - if (tstate->fwd_emitted) - return; - - switch (kind) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - /* - * if we are referencing a struct/union that we are - * part of - then no need for fwd declaration - */ - if (id == cont_id) - return; - if (t->name_off == 0) { - pr_warn("anonymous struct/union loop, id:[%u]\n", - id); - return; - } - btf_dump_emit_struct_fwd(d, id, t); - btf_dump_printf(d, ";\n\n"); - tstate->fwd_emitted = 1; - break; - case BTF_KIND_TYPEDEF: - /* - * for typedef fwd_emitted means typedef definition - * was emitted, but it can be used only for "weak" - * references through pointer only, not for embedding - */ - if (!btf_dump_is_blacklisted(d, id)) { - btf_dump_emit_typedef_def(d, id, t, 0); - btf_dump_printf(d, ";\n\n"); - }; - tstate->fwd_emitted = 1; - break; - default: - break; - } - - return; - } - - switch (kind) { - case BTF_KIND_INT: - tstate->emit_state = EMITTED; - break; - case BTF_KIND_ENUM: - if (top_level_def) { - btf_dump_emit_enum_def(d, id, t, 0); - btf_dump_printf(d, ";\n\n"); - } - tstate->emit_state = EMITTED; - break; - case BTF_KIND_PTR: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - btf_dump_emit_type(d, t->type, cont_id); - break; - case BTF_KIND_ARRAY: - btf_dump_emit_type(d, btf_array(t)->type, cont_id); - break; - case BTF_KIND_FWD: - btf_dump_emit_fwd_def(d, id, t); - btf_dump_printf(d, ";\n\n"); - tstate->emit_state = EMITTED; - break; - case BTF_KIND_TYPEDEF: - tstate->emit_state = EMITTING; - btf_dump_emit_type(d, t->type, id); - /* - * typedef can server as both definition and forward - * declaration; at this stage someone depends on - * typedef as a forward declaration (refers to it - * through pointer), so unless we already did it, - * emit typedef as a forward declaration - */ - if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) { - btf_dump_emit_typedef_def(d, id, t, 0); - btf_dump_printf(d, ";\n\n"); - } - tstate->emit_state = EMITTED; - break; - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - tstate->emit_state = EMITTING; - /* if it's a top-level struct/union definition or struct/union - * is anonymous, then in C we'll be emitting all fields and - * their types (as opposed to just `struct X`), so we need to - * make sure that all types, referenced from struct/union - * members have necessary forward-declarations, where - * applicable - */ - if (top_level_def || t->name_off == 0) { - const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); - int i, new_cont_id; - - new_cont_id = t->name_off == 0 ? cont_id : id; - for (i = 0; i < vlen; i++, m++) - btf_dump_emit_type(d, m->type, new_cont_id); - } else if (!tstate->fwd_emitted && id != cont_id) { - btf_dump_emit_struct_fwd(d, id, t); - btf_dump_printf(d, ";\n\n"); - tstate->fwd_emitted = 1; - } - - if (top_level_def) { - btf_dump_emit_struct_def(d, id, t, 0); - btf_dump_printf(d, ";\n\n"); - tstate->emit_state = EMITTED; - } else { - tstate->emit_state = NOT_EMITTED; - } - break; - case BTF_KIND_FUNC_PROTO: { - const struct btf_param *p = btf_params(t); - __u16 vlen = btf_vlen(t); - int i; - - btf_dump_emit_type(d, t->type, cont_id); - for (i = 0; i < vlen; i++, p++) - btf_dump_emit_type(d, p->type, cont_id); - - break; - } - default: - break; - } -} - -static int btf_align_of(const struct btf *btf, __u32 id) -{ - const struct btf_type *t = btf__type_by_id(btf, id); - __u16 kind = btf_kind(t); - - switch (kind) { - case BTF_KIND_INT: - case BTF_KIND_ENUM: - return min(sizeof(void *), t->size); - case BTF_KIND_PTR: - return sizeof(void *); - case BTF_KIND_TYPEDEF: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - return btf_align_of(btf, t->type); - case BTF_KIND_ARRAY: - return btf_align_of(btf, btf_array(t)->type); - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); - int i, align = 1; - - for (i = 0; i < vlen; i++, m++) - align = max(align, btf_align_of(btf, m->type)); - - return align; - } - default: - pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); - return 1; - } -} - -static bool btf_is_struct_packed(const struct btf *btf, __u32 id, - const struct btf_type *t) -{ - const struct btf_member *m; - int align, i, bit_sz; - __u16 vlen; - - align = btf_align_of(btf, id); - /* size of a non-packed struct has to be a multiple of its alignment*/ - if (t->size % align) - return true; - - m = btf_members(t); - vlen = btf_vlen(t); - /* all non-bitfield fields have to be naturally aligned */ - for (i = 0; i < vlen; i++, m++) { - align = btf_align_of(btf, m->type); - bit_sz = btf_member_bitfield_size(t, i); - if (bit_sz == 0 && m->offset % (8 * align) != 0) - return true; - } - - /* - * if original struct was marked as packed, but its layout is - * naturally aligned, we'll detect that it's not packed - */ - return false; -} - -static int chip_away_bits(int total, int at_most) -{ - return total % at_most ? : at_most; -} - -static void btf_dump_emit_bit_padding(const struct btf_dump *d, - int cur_off, int m_off, int m_bit_sz, - int align, int lvl) -{ - int off_diff = m_off - cur_off; - int ptr_bits = sizeof(void *) * 8; - - if (off_diff <= 0) - /* no gap */ - return; - if (m_bit_sz == 0 && off_diff < align * 8) - /* natural padding will take care of a gap */ - return; - - while (off_diff > 0) { - const char *pad_type; - int pad_bits; - - if (ptr_bits > 32 && off_diff > 32) { - pad_type = "long"; - pad_bits = chip_away_bits(off_diff, ptr_bits); - } else if (off_diff > 16) { - pad_type = "int"; - pad_bits = chip_away_bits(off_diff, 32); - } else if (off_diff > 8) { - pad_type = "short"; - pad_bits = chip_away_bits(off_diff, 16); - } else { - pad_type = "char"; - pad_bits = chip_away_bits(off_diff, 8); - } - btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); - off_diff -= pad_bits; - } -} - -static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, - const struct btf_type *t) -{ - btf_dump_printf(d, "%s %s", - btf_is_struct(t) ? "struct" : "union", - btf_dump_type_name(d, id)); -} - -static void btf_dump_emit_struct_def(struct btf_dump *d, - __u32 id, - const struct btf_type *t, - int lvl) -{ - const struct btf_member *m = btf_members(t); - bool is_struct = btf_is_struct(t); - int align, i, packed, off = 0; - __u16 vlen = btf_vlen(t); - - packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; - - btf_dump_printf(d, "%s%s%s {", - is_struct ? "struct" : "union", - t->name_off ? " " : "", - btf_dump_type_name(d, id)); - - for (i = 0; i < vlen; i++, m++) { - const char *fname; - int m_off, m_sz; - - fname = btf_name_of(d, m->name_off); - m_sz = btf_member_bitfield_size(t, i); - m_off = btf_member_bit_offset(t, i); - align = packed ? 1 : btf_align_of(d->btf, m->type); - - btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); - btf_dump_printf(d, "\n%s", pfx(lvl + 1)); - btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); - - if (m_sz) { - btf_dump_printf(d, ": %d", m_sz); - off = m_off + m_sz; - } else { - m_sz = max(0, btf__resolve_size(d->btf, m->type)); - off = m_off + m_sz * 8; - } - btf_dump_printf(d, ";"); - } - - /* pad at the end, if necessary */ - if (is_struct) { - align = packed ? 1 : btf_align_of(d->btf, id); - btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, - lvl + 1); - } - - if (vlen) - btf_dump_printf(d, "\n"); - btf_dump_printf(d, "%s}", pfx(lvl)); - if (packed) - btf_dump_printf(d, " __attribute__((packed))"); -} - -static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, - const struct btf_type *t) -{ - btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); -} - -static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, - int lvl) -{ - const struct btf_enum *v = btf_enum(t); - __u16 vlen = btf_vlen(t); - const char *name; - size_t dup_cnt; - int i; - - btf_dump_printf(d, "enum%s%s", - t->name_off ? " " : "", - btf_dump_type_name(d, id)); - - if (vlen) { - btf_dump_printf(d, " {"); - for (i = 0; i < vlen; i++, v++) { - name = btf_name_of(d, v->name_off); - /* enumerators share namespace with typedef idents */ - dup_cnt = btf_dump_name_dups(d, d->ident_names, name); - if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %d,", - pfx(lvl + 1), name, dup_cnt, - (__s32)v->val); - } else { - btf_dump_printf(d, "\n%s%s = %d,", - pfx(lvl + 1), name, - (__s32)v->val); - } - } - btf_dump_printf(d, "\n%s}", pfx(lvl)); - } -} - -static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, - const struct btf_type *t) -{ - const char *name = btf_dump_type_name(d, id); - - if (btf_kflag(t)) - btf_dump_printf(d, "union %s", name); - else - btf_dump_printf(d, "struct %s", name); -} - -static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, int lvl) -{ - const char *name = btf_dump_ident_name(d, id); - - /* - * Old GCC versions are emitting invalid typedef for __gnuc_va_list - * pointing to VOID. This generates warnings from btf_dump() and - * results in uncompilable header file, so we are fixing it up here - * with valid typedef into __builtin_va_list. - */ - if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) { - btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list"); - return; - } - - btf_dump_printf(d, "typedef "); - btf_dump_emit_type_decl(d, t->type, name, lvl); -} - -static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) -{ - __u32 *new_stack; - size_t new_cap; - - if (d->decl_stack_cnt >= d->decl_stack_cap) { - new_cap = max(16, d->decl_stack_cap * 3 / 2); - new_stack = realloc(d->decl_stack, - new_cap * sizeof(new_stack[0])); - if (!new_stack) - return -ENOMEM; - d->decl_stack = new_stack; - d->decl_stack_cap = new_cap; - } - - d->decl_stack[d->decl_stack_cnt++] = id; - - return 0; -} - -/* - * Emit type declaration (e.g., field type declaration in a struct or argument - * declaration in function prototype) in correct C syntax. - * - * For most types it's trivial, but there are few quirky type declaration - * cases worth mentioning: - * - function prototypes (especially nesting of function prototypes); - * - arrays; - * - const/volatile/restrict for pointers vs other types. - * - * For a good discussion of *PARSING* C syntax (as a human), see - * Peter van der Linden's "Expert C Programming: Deep C Secrets", - * Ch.3 "Unscrambling Declarations in C". - * - * It won't help with BTF to C conversion much, though, as it's an opposite - * problem. So we came up with this algorithm in reverse to van der Linden's - * parsing algorithm. It goes from structured BTF representation of type - * declaration to a valid compilable C syntax. - * - * For instance, consider this C typedef: - * typedef const int * const * arr[10] arr_t; - * It will be represented in BTF with this chain of BTF types: - * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int] - * - * Notice how [const] modifier always goes before type it modifies in BTF type - * graph, but in C syntax, const/volatile/restrict modifiers are written to - * the right of pointers, but to the left of other types. There are also other - * quirks, like function pointers, arrays of them, functions returning other - * functions, etc. - * - * We handle that by pushing all the types to a stack, until we hit "terminal" - * type (int/enum/struct/union/fwd). Then depending on the kind of a type on - * top of a stack, modifiers are handled differently. Array/function pointers - * have also wildly different syntax and how nesting of them are done. See - * code for authoritative definition. - * - * To avoid allocating new stack for each independent chain of BTF types, we - * share one bigger stack, with each chain working only on its own local view - * of a stack frame. Some care is required to "pop" stack frames after - * processing type declaration chain. - */ -static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, - const char *fname, int lvl) -{ - struct id_stack decl_stack; - const struct btf_type *t; - int err, stack_start; - - stack_start = d->decl_stack_cnt; - for (;;) { - err = btf_dump_push_decl_stack_id(d, id); - if (err < 0) { - /* - * if we don't have enough memory for entire type decl - * chain, restore stack, emit warning, and try to - * proceed nevertheless - */ - pr_warn("not enough memory for decl stack:%d", err); - d->decl_stack_cnt = stack_start; - return; - } - - /* VOID */ - if (id == 0) - break; - - t = btf__type_by_id(d->btf, id); - switch (btf_kind(t)) { - case BTF_KIND_PTR: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - case BTF_KIND_FUNC_PROTO: - id = t->type; - break; - case BTF_KIND_ARRAY: - id = btf_array(t)->type; - break; - case BTF_KIND_INT: - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_TYPEDEF: - goto done; - default: - pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", - btf_kind(t), id); - goto done; - } - } -done: - /* - * We might be inside a chain of declarations (e.g., array of function - * pointers returning anonymous (so inlined) structs, having another - * array field). Each of those needs its own "stack frame" to handle - * emitting of declarations. Those stack frames are non-overlapping - * portions of shared btf_dump->decl_stack. To make it a bit nicer to - * handle this set of nested stacks, we create a view corresponding to - * our own "stack frame" and work with it as an independent stack. - * We'll need to clean up after emit_type_chain() returns, though. - */ - decl_stack.ids = d->decl_stack + stack_start; - decl_stack.cnt = d->decl_stack_cnt - stack_start; - btf_dump_emit_type_chain(d, &decl_stack, fname, lvl); - /* - * emit_type_chain() guarantees that it will pop its entire decl_stack - * frame before returning. But it works with a read-only view into - * decl_stack, so it doesn't actually pop anything from the - * perspective of shared btf_dump->decl_stack, per se. We need to - * reset decl_stack state to how it was before us to avoid it growing - * all the time. - */ - d->decl_stack_cnt = stack_start; -} - -static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack) -{ - const struct btf_type *t; - __u32 id; - - while (decl_stack->cnt) { - id = decl_stack->ids[decl_stack->cnt - 1]; - t = btf__type_by_id(d->btf, id); - - switch (btf_kind(t)) { - case BTF_KIND_VOLATILE: - btf_dump_printf(d, "volatile "); - break; - case BTF_KIND_CONST: - btf_dump_printf(d, "const "); - break; - case BTF_KIND_RESTRICT: - btf_dump_printf(d, "restrict "); - break; - default: - return; - } - decl_stack->cnt--; - } -} - -static void btf_dump_emit_name(const struct btf_dump *d, - const char *name, bool last_was_ptr) -{ - bool separate = name[0] && !last_was_ptr; - - btf_dump_printf(d, "%s%s", separate ? " " : "", name); -} - -static void btf_dump_emit_type_chain(struct btf_dump *d, - struct id_stack *decls, - const char *fname, int lvl) -{ - /* - * last_was_ptr is used to determine if we need to separate pointer - * asterisk (*) from previous part of type signature with space, so - * that we get `int ***`, instead of `int * * *`. We default to true - * for cases where we have single pointer in a chain. E.g., in ptr -> - * func_proto case. func_proto will start a new emit_type_chain call - * with just ptr, which should be emitted as (*) or (*<fname>), so we - * don't want to prepend space for that last pointer. - */ - bool last_was_ptr = true; - const struct btf_type *t; - const char *name; - __u16 kind; - __u32 id; - - while (decls->cnt) { - id = decls->ids[--decls->cnt]; - if (id == 0) { - /* VOID is a special snowflake */ - btf_dump_emit_mods(d, decls); - btf_dump_printf(d, "void"); - last_was_ptr = false; - continue; - } - - t = btf__type_by_id(d->btf, id); - kind = btf_kind(t); - - switch (kind) { - case BTF_KIND_INT: - btf_dump_emit_mods(d, decls); - name = btf_name_of(d, t->name_off); - btf_dump_printf(d, "%s", name); - break; - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - btf_dump_emit_mods(d, decls); - /* inline anonymous struct/union */ - if (t->name_off == 0) - btf_dump_emit_struct_def(d, id, t, lvl); - else - btf_dump_emit_struct_fwd(d, id, t); - break; - case BTF_KIND_ENUM: - btf_dump_emit_mods(d, decls); - /* inline anonymous enum */ - if (t->name_off == 0) - btf_dump_emit_enum_def(d, id, t, lvl); - else - btf_dump_emit_enum_fwd(d, id, t); - break; - case BTF_KIND_FWD: - btf_dump_emit_mods(d, decls); - btf_dump_emit_fwd_def(d, id, t); - break; - case BTF_KIND_TYPEDEF: - btf_dump_emit_mods(d, decls); - btf_dump_printf(d, "%s", btf_dump_ident_name(d, id)); - break; - case BTF_KIND_PTR: - btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *"); - break; - case BTF_KIND_VOLATILE: - btf_dump_printf(d, " volatile"); - break; - case BTF_KIND_CONST: - btf_dump_printf(d, " const"); - break; - case BTF_KIND_RESTRICT: - btf_dump_printf(d, " restrict"); - break; - case BTF_KIND_ARRAY: { - const struct btf_array *a = btf_array(t); - const struct btf_type *next_t; - __u32 next_id; - bool multidim; - /* - * GCC has a bug - * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354) - * which causes it to emit extra const/volatile - * modifiers for an array, if array's element type has - * const/volatile modifiers. Clang doesn't do that. - * In general, it doesn't seem very meaningful to have - * a const/volatile modifier for array, so we are - * going to silently skip them here. - */ - while (decls->cnt) { - next_id = decls->ids[decls->cnt - 1]; - next_t = btf__type_by_id(d->btf, next_id); - if (btf_is_mod(next_t)) - decls->cnt--; - else - break; - } - - if (decls->cnt == 0) { - btf_dump_emit_name(d, fname, last_was_ptr); - btf_dump_printf(d, "[%u]", a->nelems); - return; - } - - next_id = decls->ids[decls->cnt - 1]; - next_t = btf__type_by_id(d->btf, next_id); - multidim = btf_is_array(next_t); - /* we need space if we have named non-pointer */ - if (fname[0] && !last_was_ptr) - btf_dump_printf(d, " "); - /* no parentheses for multi-dimensional array */ - if (!multidim) - btf_dump_printf(d, "("); - btf_dump_emit_type_chain(d, decls, fname, lvl); - if (!multidim) - btf_dump_printf(d, ")"); - btf_dump_printf(d, "[%u]", a->nelems); - return; - } - case BTF_KIND_FUNC_PROTO: { - const struct btf_param *p = btf_params(t); - __u16 vlen = btf_vlen(t); - int i; - - btf_dump_emit_mods(d, decls); - if (decls->cnt) { - btf_dump_printf(d, " ("); - btf_dump_emit_type_chain(d, decls, fname, lvl); - btf_dump_printf(d, ")"); - } else { - btf_dump_emit_name(d, fname, last_was_ptr); - } - btf_dump_printf(d, "("); - /* - * Clang for BPF target generates func_proto with no - * args as a func_proto with a single void arg (e.g., - * `int (*f)(void)` vs just `int (*f)()`). We are - * going to pretend there are no args for such case. - */ - if (vlen == 1 && p->type == 0) { - btf_dump_printf(d, ")"); - return; - } - - for (i = 0; i < vlen; i++, p++) { - if (i > 0) - btf_dump_printf(d, ", "); - - /* last arg of type void is vararg */ - if (i == vlen - 1 && p->type == 0) { - btf_dump_printf(d, "..."); - break; - } - - name = btf_name_of(d, p->name_off); - btf_dump_emit_type_decl(d, p->type, name, lvl); - } - - btf_dump_printf(d, ")"); - return; - } - default: - pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", - kind, id); - return; - } - - last_was_ptr = kind == BTF_KIND_PTR; - } - - btf_dump_emit_name(d, fname, last_was_ptr); -} - -/* return number of duplicates (occurrences) of a given name */ -static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, - const char *orig_name) -{ - size_t dup_cnt = 0; - - hashmap__find(name_map, orig_name, (void **)&dup_cnt); - dup_cnt++; - hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); - - return dup_cnt; -} - -static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, - struct hashmap *name_map) -{ - struct btf_dump_type_aux_state *s = &d->type_states[id]; - const struct btf_type *t = btf__type_by_id(d->btf, id); - const char *orig_name = btf_name_of(d, t->name_off); - const char **cached_name = &d->cached_names[id]; - size_t dup_cnt; - - if (t->name_off == 0) - return ""; - - if (s->name_resolved) - return *cached_name ? *cached_name : orig_name; - - dup_cnt = btf_dump_name_dups(d, name_map, orig_name); - if (dup_cnt > 1) { - const size_t max_len = 256; - char new_name[max_len]; - - snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt); - *cached_name = strdup(new_name); - } - - s->name_resolved = 1; - return *cached_name ? *cached_name : orig_name; -} - -static const char *btf_dump_type_name(struct btf_dump *d, __u32 id) -{ - return btf_dump_resolve_name(d, id, d->type_names); -} - -static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) -{ - return btf_dump_resolve_name(d, id, d->ident_names); -} diff --git a/src/contrib/libbpf/bpf/hashmap.c b/src/contrib/libbpf/bpf/hashmap.c deleted file mode 100644 index 612227294..000000000 --- a/src/contrib/libbpf/bpf/hashmap.c +++ /dev/null @@ -1,229 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * Generic non-thread safe hash map implementation. - * - * Copyright (c) 2019 Facebook - */ -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <errno.h> -#include <linux/err.h> -#include "hashmap.h" - -/* start with 4 buckets */ -#define HASHMAP_MIN_CAP_BITS 2 - -static void hashmap_add_entry(struct hashmap_entry **pprev, - struct hashmap_entry *entry) -{ - entry->next = *pprev; - *pprev = entry; -} - -static void hashmap_del_entry(struct hashmap_entry **pprev, - struct hashmap_entry *entry) -{ - *pprev = entry->next; - entry->next = NULL; -} - -void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, - hashmap_equal_fn equal_fn, void *ctx) -{ - map->hash_fn = hash_fn; - map->equal_fn = equal_fn; - map->ctx = ctx; - - map->buckets = NULL; - map->cap = 0; - map->cap_bits = 0; - map->sz = 0; -} - -struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, - hashmap_equal_fn equal_fn, - void *ctx) -{ - struct hashmap *map = malloc(sizeof(struct hashmap)); - - if (!map) - return ERR_PTR(-ENOMEM); - hashmap__init(map, hash_fn, equal_fn, ctx); - return map; -} - -void hashmap__clear(struct hashmap *map) -{ - free(map->buckets); - map->cap = map->cap_bits = map->sz = 0; -} - -void hashmap__free(struct hashmap *map) -{ - if (!map) - return; - - hashmap__clear(map); - free(map); -} - -size_t hashmap__size(const struct hashmap *map) -{ - return map->sz; -} - -size_t hashmap__capacity(const struct hashmap *map) -{ - return map->cap; -} - -static bool hashmap_needs_to_grow(struct hashmap *map) -{ - /* grow if empty or more than 75% filled */ - return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap); -} - -static int hashmap_grow(struct hashmap *map) -{ - struct hashmap_entry **new_buckets; - struct hashmap_entry *cur, *tmp; - size_t new_cap_bits, new_cap; - size_t h; - int bkt; - - new_cap_bits = map->cap_bits + 1; - if (new_cap_bits < HASHMAP_MIN_CAP_BITS) - new_cap_bits = HASHMAP_MIN_CAP_BITS; - - new_cap = 1UL << new_cap_bits; - new_buckets = calloc(new_cap, sizeof(new_buckets[0])); - if (!new_buckets) - return -ENOMEM; - - hashmap__for_each_entry_safe(map, cur, tmp, bkt) { - h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits); - hashmap_add_entry(&new_buckets[h], cur); - } - - map->cap = new_cap; - map->cap_bits = new_cap_bits; - free(map->buckets); - map->buckets = new_buckets; - - return 0; -} - -static bool hashmap_find_entry(const struct hashmap *map, - const void *key, size_t hash, - struct hashmap_entry ***pprev, - struct hashmap_entry **entry) -{ - struct hashmap_entry *cur, **prev_ptr; - - if (!map->buckets) - return false; - - for (prev_ptr = &map->buckets[hash], cur = *prev_ptr; - cur; - prev_ptr = &cur->next, cur = cur->next) { - if (map->equal_fn(cur->key, key, map->ctx)) { - if (pprev) - *pprev = prev_ptr; - *entry = cur; - return true; - } - } - - return false; -} - -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value) -{ - struct hashmap_entry *entry; - size_t h; - int err; - - if (old_key) - *old_key = NULL; - if (old_value) - *old_value = NULL; - - h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); - if (strategy != HASHMAP_APPEND && - hashmap_find_entry(map, key, h, NULL, &entry)) { - if (old_key) - *old_key = entry->key; - if (old_value) - *old_value = entry->value; - - if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) { - entry->key = key; - entry->value = value; - return 0; - } else if (strategy == HASHMAP_ADD) { - return -EEXIST; - } - } - - if (strategy == HASHMAP_UPDATE) - return -ENOENT; - - if (hashmap_needs_to_grow(map)) { - err = hashmap_grow(map); - if (err) - return err; - h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); - } - - entry = malloc(sizeof(struct hashmap_entry)); - if (!entry) - return -ENOMEM; - - entry->key = key; - entry->value = value; - hashmap_add_entry(&map->buckets[h], entry); - map->sz++; - - return 0; -} - -bool hashmap__find(const struct hashmap *map, const void *key, void **value) -{ - struct hashmap_entry *entry; - size_t h; - - h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); - if (!hashmap_find_entry(map, key, h, NULL, &entry)) - return false; - - if (value) - *value = entry->value; - return true; -} - -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value) -{ - struct hashmap_entry **pprev, *entry; - size_t h; - - h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); - if (!hashmap_find_entry(map, key, h, &pprev, &entry)) - return false; - - if (old_key) - *old_key = entry->key; - if (old_value) - *old_value = entry->value; - - hashmap_del_entry(pprev, entry); - free(entry); - map->sz--; - - return true; -} - diff --git a/src/contrib/libbpf/bpf/hashmap.h b/src/contrib/libbpf/bpf/hashmap.h deleted file mode 100644 index bae8879cd..000000000 --- a/src/contrib/libbpf/bpf/hashmap.h +++ /dev/null @@ -1,178 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * Generic non-thread safe hash map implementation. - * - * Copyright (c) 2019 Facebook - */ -#ifndef __LIBBPF_HASHMAP_H -#define __LIBBPF_HASHMAP_H - -#include <stdbool.h> -#include <stddef.h> -#ifdef __GLIBC__ -#include <bits/wordsize.h> -#else -#include <bits/reg.h> -#endif -#include "libbpf_internal.h" - -static inline size_t hash_bits(size_t h, int bits) -{ - /* shuffle bits and return requested number of upper bits */ - return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); -} - -typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); -typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); - -struct hashmap_entry { - const void *key; - void *value; - struct hashmap_entry *next; -}; - -struct hashmap { - hashmap_hash_fn hash_fn; - hashmap_equal_fn equal_fn; - void *ctx; - - struct hashmap_entry **buckets; - size_t cap; - size_t cap_bits; - size_t sz; -}; - -#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ - .hash_fn = (hash_fn), \ - .equal_fn = (equal_fn), \ - .ctx = (ctx), \ - .buckets = NULL, \ - .cap = 0, \ - .cap_bits = 0, \ - .sz = 0, \ -} - -void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, - hashmap_equal_fn equal_fn, void *ctx); -struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, - hashmap_equal_fn equal_fn, - void *ctx); -void hashmap__clear(struct hashmap *map); -void hashmap__free(struct hashmap *map); - -size_t hashmap__size(const struct hashmap *map); -size_t hashmap__capacity(const struct hashmap *map); - -/* - * Hashmap insertion strategy: - * - HASHMAP_ADD - only add key/value if key doesn't exist yet; - * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise, - * update value; - * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do - * nothing and return -ENOENT; - * - HASHMAP_APPEND - always add key/value pair, even if key already exists. - * This turns hashmap into a multimap by allowing multiple values to be - * associated with the same key. Most useful read API for such hashmap is - * hashmap__for_each_key_entry() iteration. If hashmap__find() is still - * used, it will return last inserted key/value entry (first in a bucket - * chain). - */ -enum hashmap_insert_strategy { - HASHMAP_ADD, - HASHMAP_SET, - HASHMAP_UPDATE, - HASHMAP_APPEND, -}; - -/* - * hashmap__insert() adds key/value entry w/ various semantics, depending on - * provided strategy value. If a given key/value pair replaced already - * existing key/value pair, both old key and old value will be returned - * through old_key and old_value to allow calling code do proper memory - * management. - */ -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value); - -static inline int hashmap__add(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); -} - -static inline int hashmap__set(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_SET, - old_key, old_value); -} - -static inline int hashmap__update(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_UPDATE, - old_key, old_value); -} - -static inline int hashmap__append(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); -} - -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value); - -bool hashmap__find(const struct hashmap *map, const void *key, void **value); - -/* - * hashmap__for_each_entry - iterate over all entries in hashmap - * @map: hashmap to iterate - * @cur: struct hashmap_entry * used as a loop cursor - * @bkt: integer used as a bucket loop cursor - */ -#define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) - -/* - * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe - * against removals - * @map: hashmap to iterate - * @cur: struct hashmap_entry * used as a loop cursor - * @tmp: struct hashmap_entry * used as a temporary next cursor storage - * @bkt: integer used as a bucket loop cursor - */ -#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ - cur && ({tmp = cur->next; true; }); \ - cur = tmp) - -/* - * hashmap__for_each_key_entry - iterate over entries associated with given key - * @map: hashmap to iterate - * @cur: struct hashmap_entry * used as a loop cursor - * @key: key to iterate entries for - */ -#define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ - cur; \ - cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) - -#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ - cur && ({ tmp = cur->next; true; }); \ - cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) - -#endif /* __LIBBPF_HASHMAP_H */ diff --git a/src/contrib/libbpf/bpf/libbpf.c b/src/contrib/libbpf/bpf/libbpf.c deleted file mode 100644 index 29d8d03aa..000000000 --- a/src/contrib/libbpf/bpf/libbpf.c +++ /dev/null @@ -1,6581 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * Common eBPF ELF object loading operations. - * - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - * Copyright (C) 2017 Nicira, Inc. - * Copyright (C) 2019 Isovalent, Inc. - */ - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <libgen.h> -#include <inttypes.h> -#include <string.h> -#include <unistd.h> -#include <endian.h> -#include <fcntl.h> -#include <errno.h> -#include <asm/unistd.h> -#include <linux/err.h> -#include <linux/kernel.h> -#include <linux/bpf.h> -#include <linux/btf.h> -#include <linux/filter.h> -#include <linux/list.h> -#include <linux/limits.h> -#include <linux/perf_event.h> -#include <linux/ring_buffer.h> -#include <linux/version.h> -#include <sys/epoll.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/vfs.h> -#include <sys/utsname.h> -#include <libelf.h> -#include <gelf.h> - -#include "libbpf.h" -#include "bpf.h" -#include "btf.h" -#include "str_error.h" -#include "libbpf_internal.h" -#include "hashmap.h" - -#ifndef EM_BPF -#define EM_BPF 247 -#endif - -#ifndef BPF_FS_MAGIC -#define BPF_FS_MAGIC 0xcafe4a11 -#endif - -/* vsprintf() in __base_pr() uses nonliteral format string. It may break - * compilation if user enables corresponding warning. Disable it explicitly. - */ -#pragma GCC diagnostic ignored "-Wformat-nonliteral" - -#define __printf(a, b) __attribute__((format(printf, a, b))) - -static int __base_pr(enum libbpf_print_level level, const char *format, - va_list args) -{ - if (level == LIBBPF_DEBUG) - return 0; - - return vfprintf(stderr, format, args); -} - -static libbpf_print_fn_t __libbpf_pr = __base_pr; - -libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) -{ - libbpf_print_fn_t old_print_fn = __libbpf_pr; - - __libbpf_pr = fn; - return old_print_fn; -} - -__printf(2, 3) -void libbpf_print(enum libbpf_print_level level, const char *format, ...) -{ - va_list args; - - if (!__libbpf_pr) - return; - - va_start(args, format); - __libbpf_pr(level, format, args); - va_end(args); -} - -#define STRERR_BUFSIZE 128 - -#define CHECK_ERR(action, err, out) do { \ - err = action; \ - if (err) \ - goto out; \ -} while (0) - - -/* Copied from tools/perf/util/util.h */ -#ifndef zfree -# define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) -#endif - -#ifndef zclose -# define zclose(fd) ({ \ - int ___err = 0; \ - if ((fd) >= 0) \ - ___err = close((fd)); \ - fd = -1; \ - ___err; }) -#endif - -#ifdef HAVE_LIBELF_MMAP_SUPPORT -# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP -#else -# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ -#endif - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -struct bpf_capabilities { - /* v4.14: kernel support for program & map names. */ - __u32 name:1; - /* v5.2: kernel support for global data sections. */ - __u32 global_data:1; - /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ - __u32 btf_func:1; - /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ - __u32 btf_datasec:1; - /* BPF_F_MMAPABLE is supported for arrays */ - __u32 array_mmap:1; -}; - -/* - * bpf_prog should be a better name but it has been used in - * linux/filter.h. - */ -struct bpf_program { - /* Index in elf obj file, for relocation use. */ - int idx; - char *name; - int prog_ifindex; - char *section_name; - /* section_name with / replaced by _; makes recursive pinning - * in bpf_object__pin_programs easier - */ - char *pin_name; - struct bpf_insn *insns; - size_t insns_cnt, main_prog_cnt; - enum bpf_prog_type type; - - struct reloc_desc { - enum { - RELO_LD64, - RELO_CALL, - RELO_DATA, - } type; - int insn_idx; - int map_idx; - int sym_off; - } *reloc_desc; - int nr_reloc; - int log_level; - - struct { - int nr; - int *fds; - } instances; - bpf_program_prep_t preprocessor; - - struct bpf_object *obj; - void *priv; - bpf_program_clear_priv_t clear_priv; - - enum bpf_attach_type expected_attach_type; - __u32 attach_btf_id; - __u32 attach_prog_fd; - void *func_info; - __u32 func_info_rec_size; - __u32 func_info_cnt; - - struct bpf_capabilities *caps; - - void *line_info; - __u32 line_info_rec_size; - __u32 line_info_cnt; - __u32 prog_flags; -}; - -enum libbpf_map_type { - LIBBPF_MAP_UNSPEC, - LIBBPF_MAP_DATA, - LIBBPF_MAP_BSS, - LIBBPF_MAP_RODATA, -}; - -static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = ".data", - [LIBBPF_MAP_BSS] = ".bss", - [LIBBPF_MAP_RODATA] = ".rodata", -}; - -struct bpf_map { - int fd; - char *name; - int sec_idx; - size_t sec_offset; - int map_ifindex; - int inner_map_fd; - struct bpf_map_def def; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - void *priv; - bpf_map_clear_priv_t clear_priv; - enum libbpf_map_type libbpf_type; - char *pin_path; - bool pinned; - bool reused; -}; - -struct bpf_secdata { - void *rodata; - void *data; -}; - -static LIST_HEAD(bpf_objects_list); - -struct bpf_object { - char name[BPF_OBJ_NAME_LEN]; - char license[64]; - __u32 kern_version; - - struct bpf_program *programs; - size_t nr_programs; - struct bpf_map *maps; - size_t nr_maps; - size_t maps_cap; - struct bpf_secdata sections; - - bool loaded; - bool has_pseudo_calls; - bool relaxed_core_relocs; - - /* - * Information when doing elf related work. Only valid if fd - * is valid. - */ - struct { - int fd; - const void *obj_buf; - size_t obj_buf_sz; - Elf *elf; - GElf_Ehdr ehdr; - Elf_Data *symbols; - Elf_Data *data; - Elf_Data *rodata; - Elf_Data *bss; - size_t strtabidx; - struct { - GElf_Shdr shdr; - Elf_Data *data; - } *reloc_sects; - int nr_reloc_sects; - int maps_shndx; - int btf_maps_shndx; - int text_shndx; - int data_shndx; - int rodata_shndx; - int bss_shndx; - } efile; - /* - * All loaded bpf_object is linked in a list, which is - * hidden to caller. bpf_objects__<func> handlers deal with - * all objects. - */ - struct list_head list; - - struct btf *btf; - struct btf_ext *btf_ext; - - void *priv; - bpf_object_clear_priv_t clear_priv; - - struct bpf_capabilities caps; - - char path[]; -}; -#define obj_elf_valid(o) ((o)->efile.elf) - -void bpf_program__unload(struct bpf_program *prog) -{ - int i; - - if (!prog) - return; - - /* - * If the object is opened but the program was never loaded, - * it is possible that prog->instances.nr == -1. - */ - if (prog->instances.nr > 0) { - for (i = 0; i < prog->instances.nr; i++) - zclose(prog->instances.fds[i]); - } else if (prog->instances.nr != -1) { - pr_warn("Internal error: instances.nr is %d\n", - prog->instances.nr); - } - - prog->instances.nr = -1; - zfree(&prog->instances.fds); - - zfree(&prog->func_info); - zfree(&prog->line_info); -} - -static void bpf_program__exit(struct bpf_program *prog) -{ - if (!prog) - return; - - if (prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = NULL; - prog->clear_priv = NULL; - - bpf_program__unload(prog); - zfree(&prog->name); - zfree(&prog->section_name); - zfree(&prog->pin_name); - zfree(&prog->insns); - zfree(&prog->reloc_desc); - - prog->nr_reloc = 0; - prog->insns_cnt = 0; - prog->idx = -1; -} - -static char *__bpf_program__pin_name(struct bpf_program *prog) -{ - char *name, *p; - - name = p = strdup(prog->section_name); - while ((p = strchr(p, '/'))) - *p = '_'; - - return name; -} - -static int -bpf_program__init(void *data, size_t size, char *section_name, int idx, - struct bpf_program *prog) -{ - const size_t bpf_insn_sz = sizeof(struct bpf_insn); - - if (size == 0 || size % bpf_insn_sz) { - pr_warn("corrupted section '%s', size: %zu\n", - section_name, size); - return -EINVAL; - } - - memset(prog, 0, sizeof(*prog)); - - prog->section_name = strdup(section_name); - if (!prog->section_name) { - pr_warn("failed to alloc name for prog under section(%d) %s\n", - idx, section_name); - goto errout; - } - - prog->pin_name = __bpf_program__pin_name(prog); - if (!prog->pin_name) { - pr_warn("failed to alloc pin name for prog under section(%d) %s\n", - idx, section_name); - goto errout; - } - - prog->insns = malloc(size); - if (!prog->insns) { - pr_warn("failed to alloc insns for prog under section %s\n", - section_name); - goto errout; - } - prog->insns_cnt = size / bpf_insn_sz; - memcpy(prog->insns, data, size); - prog->idx = idx; - prog->instances.fds = NULL; - prog->instances.nr = -1; - prog->type = BPF_PROG_TYPE_UNSPEC; - - return 0; -errout: - bpf_program__exit(prog); - return -ENOMEM; -} - -static int -bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - char *section_name, int idx) -{ - struct bpf_program prog, *progs; - int nr_progs, err; - - err = bpf_program__init(data, size, section_name, idx, &prog); - if (err) - return err; - - prog.caps = &obj->caps; - progs = obj->programs; - nr_progs = obj->nr_programs; - - progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); - if (!progs) { - /* - * In this case the original obj->programs - * is still valid, so don't need special treat for - * bpf_close_object(). - */ - pr_warn("failed to alloc a new program under section '%s'\n", - section_name); - bpf_program__exit(&prog); - return -ENOMEM; - } - - pr_debug("found program %s\n", prog.section_name); - obj->programs = progs; - obj->nr_programs = nr_progs + 1; - prog.obj = obj; - progs[nr_progs] = prog; - return 0; -} - -static int -bpf_object__init_prog_names(struct bpf_object *obj) -{ - Elf_Data *symbols = obj->efile.symbols; - struct bpf_program *prog; - size_t pi, si; - - for (pi = 0; pi < obj->nr_programs; pi++) { - const char *name = NULL; - - prog = &obj->programs[pi]; - - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; - si++) { - GElf_Sym sym; - - if (!gelf_getsym(symbols, si, &sym)) - continue; - if (sym.st_shndx != prog->idx) - continue; - if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) - continue; - - name = elf_strptr(obj->efile.elf, - obj->efile.strtabidx, - sym.st_name); - if (!name) { - pr_warn("failed to get sym name string for prog %s\n", - prog->section_name); - return -LIBBPF_ERRNO__LIBELF; - } - } - - if (!name && prog->idx == obj->efile.text_shndx) - name = ".text"; - - if (!name) { - pr_warn("failed to find sym for prog %s\n", - prog->section_name); - return -EINVAL; - } - - prog->name = strdup(name); - if (!prog->name) { - pr_warn("failed to allocate memory for prog sym %s\n", - name); - return -ENOMEM; - } - } - - return 0; -} - -static __u32 get_kernel_version(void) -{ - __u32 major, minor, patch; - struct utsname info; - - uname(&info); - if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) - return 0; - return KERNEL_VERSION(major, minor, patch); -} - -static struct bpf_object *bpf_object__new(const char *path, - const void *obj_buf, - size_t obj_buf_sz, - const char *obj_name) -{ - struct bpf_object *obj; - char *end; - - obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); - if (!obj) { - pr_warn("alloc memory failed for %s\n", path); - return ERR_PTR(-ENOMEM); - } - - strcpy(obj->path, path); - if (obj_name) { - strncpy(obj->name, obj_name, sizeof(obj->name) - 1); - obj->name[sizeof(obj->name) - 1] = 0; - } else { - /* Using basename() GNU version which doesn't modify arg. */ - strncpy(obj->name, basename((void *)path), - sizeof(obj->name) - 1); - end = strchr(obj->name, '.'); - if (end) - *end = 0; - } - - obj->efile.fd = -1; - /* - * Caller of this function should also call - * bpf_object__elf_finish() after data collection to return - * obj_buf to user. If not, we should duplicate the buffer to - * avoid user freeing them before elf finish. - */ - obj->efile.obj_buf = obj_buf; - obj->efile.obj_buf_sz = obj_buf_sz; - obj->efile.maps_shndx = -1; - obj->efile.btf_maps_shndx = -1; - obj->efile.data_shndx = -1; - obj->efile.rodata_shndx = -1; - obj->efile.bss_shndx = -1; - - obj->kern_version = get_kernel_version(); - obj->loaded = false; - - INIT_LIST_HEAD(&obj->list); - list_add(&obj->list, &bpf_objects_list); - return obj; -} - -static void bpf_object__elf_finish(struct bpf_object *obj) -{ - if (!obj_elf_valid(obj)) - return; - - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } - obj->efile.symbols = NULL; - obj->efile.data = NULL; - obj->efile.rodata = NULL; - obj->efile.bss = NULL; - - zfree(&obj->efile.reloc_sects); - obj->efile.nr_reloc_sects = 0; - zclose(obj->efile.fd); - obj->efile.obj_buf = NULL; - obj->efile.obj_buf_sz = 0; -} - -static int bpf_object__elf_init(struct bpf_object *obj) -{ - int err = 0; - GElf_Ehdr *ep; - - if (obj_elf_valid(obj)) { - pr_warn("elf init: internal error\n"); - return -LIBBPF_ERRNO__LIBELF; - } - - if (obj->efile.obj_buf_sz > 0) { - /* - * obj_buf should have been validated by - * bpf_object__open_buffer(). - */ - obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, - obj->efile.obj_buf_sz); - } else { - obj->efile.fd = open(obj->path, O_RDONLY); - if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE], *cp; - - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to open %s: %s\n", obj->path, cp); - return err; - } - - obj->efile.elf = elf_begin(obj->efile.fd, - LIBBPF_ELF_C_READ_MMAP, NULL); - } - - if (!obj->efile.elf) { - pr_warn("failed to open %s as ELF file\n", obj->path); - err = -LIBBPF_ERRNO__LIBELF; - goto errout; - } - - if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { - pr_warn("failed to get EHDR from %s\n", obj->path); - err = -LIBBPF_ERRNO__FORMAT; - goto errout; - } - ep = &obj->efile.ehdr; - - /* Old LLVM set e_machine to EM_NONE */ - if (ep->e_type != ET_REL || - (ep->e_machine && ep->e_machine != EM_BPF)) { - pr_warn("%s is not an eBPF object file\n", obj->path); - err = -LIBBPF_ERRNO__FORMAT; - goto errout; - } - - return 0; -errout: - bpf_object__elf_finish(obj); - return err; -} - -static int bpf_object__check_endianness(struct bpf_object *obj) -{ -#if __BYTE_ORDER == __LITTLE_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) - return 0; -#elif __BYTE_ORDER == __BIG_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) - return 0; -#else -# error "Unrecognized __BYTE_ORDER__" -#endif - pr_warn("endianness mismatch.\n"); - return -LIBBPF_ERRNO__ENDIAN; -} - -static int -bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) -{ - memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); - pr_debug("license of %s is %s\n", obj->path, obj->license); - return 0; -} - -static int -bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) -{ - __u32 kver; - - if (size != sizeof(kver)) { - pr_warn("invalid kver section in %s\n", obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - memcpy(&kver, data, sizeof(kver)); - obj->kern_version = kver; - pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); - return 0; -} - -static int compare_bpf_map(const void *_a, const void *_b) -{ - const struct bpf_map *a = _a; - const struct bpf_map *b = _b; - - if (a->sec_idx != b->sec_idx) - return a->sec_idx - b->sec_idx; - return a->sec_offset - b->sec_offset; -} - -static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) -{ - if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || - type == BPF_MAP_TYPE_HASH_OF_MAPS) - return true; - return false; -} - -static int bpf_object_search_section_size(const struct bpf_object *obj, - const char *name, size_t *d_size) -{ - const GElf_Ehdr *ep = &obj->efile.ehdr; - Elf *elf = obj->efile.elf; - Elf_Scn *scn = NULL; - int idx = 0; - - while ((scn = elf_nextscn(elf, scn)) != NULL) { - const char *sec_name; - Elf_Data *data; - GElf_Shdr sh; - - idx++; - if (gelf_getshdr(scn, &sh) != &sh) { - pr_warn("failed to get section(%d) header from %s\n", - idx, obj->path); - return -EIO; - } - - sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); - if (!sec_name) { - pr_warn("failed to get section(%d) name from %s\n", - idx, obj->path); - return -EIO; - } - - if (strcmp(name, sec_name)) - continue; - - data = elf_getdata(scn, 0); - if (!data) { - pr_warn("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); - return -EIO; - } - - *d_size = data->d_size; - return 0; - } - - return -ENOENT; -} - -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size) -{ - int ret = -ENOENT; - size_t d_size; - - *size = 0; - if (!name) { - return -EINVAL; - } else if (!strcmp(name, ".data")) { - if (obj->efile.data) - *size = obj->efile.data->d_size; - } else if (!strcmp(name, ".bss")) { - if (obj->efile.bss) - *size = obj->efile.bss->d_size; - } else if (!strcmp(name, ".rodata")) { - if (obj->efile.rodata) - *size = obj->efile.rodata->d_size; - } else { - ret = bpf_object_search_section_size(obj, name, &d_size); - if (!ret) - *size = d_size; - } - - return *size ? 0 : ret; -} - -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off) -{ - Elf_Data *symbols = obj->efile.symbols; - const char *sname; - size_t si; - - if (!name || !off) - return -EINVAL; - - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { - GElf_Sym sym; - - if (!gelf_getsym(symbols, si, &sym)) - continue; - if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || - GELF_ST_TYPE(sym.st_info) != STT_OBJECT) - continue; - - sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name); - if (!sname) { - pr_warn("failed to get sym name string for var %s\n", - name); - return -EIO; - } - if (strcmp(name, sname) == 0) { - *off = sym.st_value; - return 0; - } - } - - return -ENOENT; -} - -static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) -{ - struct bpf_map *new_maps; - size_t new_cap; - int i; - - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; - - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } - - obj->maps_cap = new_cap; - obj->maps = new_maps; - - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). - */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } - - return &obj->maps[obj->nr_maps++]; -} - -static int -bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, Elf_Data *data, void **data_buff) -{ - char map_name[BPF_OBJ_NAME_LEN]; - struct bpf_map_def *def; - struct bpf_map *map; - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - - map->libbpf_type = type; - map->sec_idx = sec_idx; - map->sec_offset = 0; - snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, - libbpf_type_to_btf_name[type]); - map->name = strdup(map_name); - if (!map->name) { - pr_warn("failed to alloc map name\n"); - return -ENOMEM; - } - - def = &map->def; - def->type = BPF_MAP_TYPE_ARRAY; - def->key_size = sizeof(int); - def->value_size = data->d_size; - def->max_entries = 1; - def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; - if (obj->caps.array_mmap) - def->map_flags |= BPF_F_MMAPABLE; - - pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", - map_name, map->sec_idx, map->sec_offset, def->map_flags); - - if (data_buff) { - *data_buff = malloc(data->d_size); - if (!*data_buff) { - zfree(&map->name); - pr_warn("failed to alloc map content buffer\n"); - return -ENOMEM; - } - memcpy(*data_buff, data->d_buf, data->d_size); - } - - pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); - return 0; -} - -static int bpf_object__init_global_data_maps(struct bpf_object *obj) -{ - int err; - - if (!obj->caps.global_data) - return 0; - /* - * Populate obj->maps with libbpf internal maps. - */ - if (obj->efile.data_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, - obj->efile.data_shndx, - obj->efile.data, - &obj->sections.data); - if (err) - return err; - } - if (obj->efile.rodata_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, - obj->efile.rodata_shndx, - obj->efile.rodata, - &obj->sections.rodata); - if (err) - return err; - } - if (obj->efile.bss_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, - obj->efile.bss_shndx, - obj->efile.bss, NULL); - if (err) - return err; - } - return 0; -} - -static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) -{ - Elf_Data *symbols = obj->efile.symbols; - int i, map_def_sz = 0, nr_maps = 0, nr_syms; - Elf_Data *data = NULL; - Elf_Scn *scn; - - if (obj->efile.maps_shndx < 0) - return 0; - - if (!symbols) - return -EINVAL; - - scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); - if (!scn || !data) { - pr_warn("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); - return -EINVAL; - } - - /* - * Count number of maps. Each map has a name. - * Array of maps is not supported: only the first element is - * considered. - * - * TODO: Detect array of map and report error. - */ - nr_syms = symbols->d_size / sizeof(GElf_Sym); - for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; - - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != obj->efile.maps_shndx) - continue; - nr_maps++; - } - /* Assume equally sized map definitions */ - pr_debug("maps in %s: %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); - - if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { - pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); - return -EINVAL; - } - map_def_sz = data->d_size / nr_maps; - - /* Fill obj->maps using data in "maps" section. */ - for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; - const char *map_name; - struct bpf_map_def *def; - struct bpf_map *map; - - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != obj->efile.maps_shndx) - continue; - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - - map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name); - if (!map_name) { - pr_warn("failed to get map #%d name sym string for obj %s\n", - i, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym.st_shndx; - map->sec_offset = sym.st_value; - pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - if (sym.st_value + map_def_sz > data->d_size) { - pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); - return -EINVAL; - } - - map->name = strdup(map_name); - if (!map->name) { - pr_warn("failed to alloc map name\n"); - return -ENOMEM; - } - pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym.st_value); - /* - * If the definition of the map in the object file fits in - * bpf_map_def, copy it. Any extra fields in our version - * of bpf_map_def will default to zero as a result of the - * calloc above. - */ - if (map_def_sz <= sizeof(struct bpf_map_def)) { - memcpy(&map->def, def, map_def_sz); - } else { - /* - * Here the map structure being read is bigger than what - * we expect, truncate if the excess bits are all zero. - * If they are not zero, reject this map as - * incompatible. - */ - char *b; - - for (b = ((char *)def) + sizeof(struct bpf_map_def); - b < ((char *)def) + map_def_sz; b++) { - if (*b != 0) { - pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", - obj->path, map_name); - if (strict) - return -EINVAL; - } - } - memcpy(&map->def, def, sizeof(struct bpf_map_def)); - } - } - return 0; -} - -static const struct btf_type * -skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) -{ - const struct btf_type *t = btf__type_by_id(btf, id); - - if (res_id) - *res_id = id; - - while (btf_is_mod(t) || btf_is_typedef(t)) { - if (res_id) - *res_id = t->type; - t = btf__type_by_id(btf, t->type); - } - - return t; -} - -/* - * Fetch integer attribute of BTF map definition. Such attributes are - * represented using a pointer to an array, in which dimensionality of array - * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; - * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF - * type definition, while using only sizeof(void *) space in ELF data section. - */ -static bool get_map_field_int(const char *map_name, const struct btf *btf, - const struct btf_type *def, - const struct btf_member *m, __u32 *res) -{ - const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); - const char *name = btf__name_by_offset(btf, m->name_off); - const struct btf_array *arr_info; - const struct btf_type *arr_t; - - if (!btf_is_ptr(t)) { - pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, btf_kind(t)); - return false; - } - - arr_t = btf__type_by_id(btf, t->type); - if (!arr_t) { - pr_warn("map '%s': attr '%s': type [%u] not found.\n", - map_name, name, t->type); - return false; - } - if (!btf_is_array(arr_t)) { - pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, btf_kind(arr_t)); - return false; - } - arr_info = btf_array(arr_t); - *res = arr_info->nelems; - return true; -} - -static int build_map_pin_path(struct bpf_map *map, const char *path) -{ - char buf[PATH_MAX]; - int err, len; - - if (!path) - path = "/sys/fs/bpf"; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; - - err = bpf_map__set_pin_path(map, buf); - if (err) - return err; - - return 0; -} - -static int bpf_object__init_user_btf_map(struct bpf_object *obj, - const struct btf_type *sec, - int var_idx, int sec_idx, - const Elf_Data *data, bool strict, - const char *pin_root_path) -{ - const struct btf_type *var, *def, *t; - const struct btf_var_secinfo *vi; - const struct btf_var *var_extra; - const struct btf_member *m; - const char *map_name; - struct bpf_map *map; - int vlen, i; - - vi = btf_var_secinfos(sec) + var_idx; - var = btf__type_by_id(obj->btf, vi->type); - var_extra = btf_var(var); - map_name = btf__name_by_offset(obj->btf, var->name_off); - vlen = btf_vlen(var); - - if (map_name == NULL || map_name[0] == '\0') { - pr_warn("map #%d: empty name.\n", var_idx); - return -EINVAL; - } - if ((__u64)vi->offset + vi->size > data->d_size) { - pr_warn("map '%s' BTF data is corrupted.\n", map_name); - return -EINVAL; - } - if (!btf_is_var(var)) { - pr_warn("map '%s': unexpected var kind %u.\n", - map_name, btf_kind(var)); - return -EINVAL; - } - if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && - var_extra->linkage != BTF_VAR_STATIC) { - pr_warn("map '%s': unsupported var linkage %u.\n", - map_name, var_extra->linkage); - return -EOPNOTSUPP; - } - - def = skip_mods_and_typedefs(obj->btf, var->type, NULL); - if (!btf_is_struct(def)) { - pr_warn("map '%s': unexpected def kind %u.\n", - map_name, btf_kind(var)); - return -EINVAL; - } - if (def->size > vi->size) { - pr_warn("map '%s': invalid def size.\n", map_name); - return -EINVAL; - } - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - map->name = strdup(map_name); - if (!map->name) { - pr_warn("map '%s': failed to alloc map name.\n", map_name); - return -ENOMEM; - } - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->def.type = BPF_MAP_TYPE_UNSPEC; - map->sec_idx = sec_idx; - map->sec_offset = vi->offset; - pr_debug("map '%s': at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - - vlen = btf_vlen(def); - m = btf_members(def); - for (i = 0; i < vlen; i++, m++) { - const char *name = btf__name_by_offset(obj->btf, m->name_off); - - if (!name) { - pr_warn("map '%s': invalid field #%d.\n", map_name, i); - return -EINVAL; - } - if (strcmp(name, "type") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, - &map->def.type)) - return -EINVAL; - pr_debug("map '%s': found type = %u.\n", - map_name, map->def.type); - } else if (strcmp(name, "max_entries") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, - &map->def.max_entries)) - return -EINVAL; - pr_debug("map '%s': found max_entries = %u.\n", - map_name, map->def.max_entries); - } else if (strcmp(name, "map_flags") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, - &map->def.map_flags)) - return -EINVAL; - pr_debug("map '%s': found map_flags = %u.\n", - map_name, map->def.map_flags); - } else if (strcmp(name, "key_size") == 0) { - __u32 sz; - - if (!get_map_field_int(map_name, obj->btf, def, m, - &sz)) - return -EINVAL; - pr_debug("map '%s': found key_size = %u.\n", - map_name, sz); - if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %u.\n", - map_name, map->def.key_size, sz); - return -EINVAL; - } - map->def.key_size = sz; - } else if (strcmp(name, "key") == 0) { - __s64 sz; - - t = btf__type_by_id(obj->btf, m->type); - if (!t) { - pr_warn("map '%s': key type [%d] not found.\n", - map_name, m->type); - return -EINVAL; - } - if (!btf_is_ptr(t)) { - pr_warn("map '%s': key spec is not PTR: %u.\n", - map_name, btf_kind(t)); - return -EINVAL; - } - sz = btf__resolve_size(obj->btf, t->type); - if (sz < 0) { - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); - return sz; - } - pr_debug("map '%s': found key [%u], sz = %lld.\n", - map_name, t->type, sz); - if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); - return -EINVAL; - } - map->def.key_size = sz; - map->btf_key_type_id = t->type; - } else if (strcmp(name, "value_size") == 0) { - __u32 sz; - - if (!get_map_field_int(map_name, obj->btf, def, m, - &sz)) - return -EINVAL; - pr_debug("map '%s': found value_size = %u.\n", - map_name, sz); - if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %u.\n", - map_name, map->def.value_size, sz); - return -EINVAL; - } - map->def.value_size = sz; - } else if (strcmp(name, "value") == 0) { - __s64 sz; - - t = btf__type_by_id(obj->btf, m->type); - if (!t) { - pr_warn("map '%s': value type [%d] not found.\n", - map_name, m->type); - return -EINVAL; - } - if (!btf_is_ptr(t)) { - pr_warn("map '%s': value spec is not PTR: %u.\n", - map_name, btf_kind(t)); - return -EINVAL; - } - sz = btf__resolve_size(obj->btf, t->type); - if (sz < 0) { - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); - return sz; - } - pr_debug("map '%s': found value [%u], sz = %lld.\n", - map_name, t->type, sz); - if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); - return -EINVAL; - } - map->def.value_size = sz; - map->btf_value_type_id = t->type; - } else if (strcmp(name, "pinning") == 0) { - __u32 val; - int err; - - if (!get_map_field_int(map_name, obj->btf, def, m, - &val)) - return -EINVAL; - pr_debug("map '%s': found pinning = %u.\n", - map_name, val); - - if (val != LIBBPF_PIN_NONE && - val != LIBBPF_PIN_BY_NAME) { - pr_warn("map '%s': invalid pinning value %u.\n", - map_name, val); - return -EINVAL; - } - if (val == LIBBPF_PIN_BY_NAME) { - err = build_map_pin_path(map, pin_root_path); - if (err) { - pr_warn("map '%s': couldn't build pin path.\n", - map_name); - return err; - } - } - } else { - if (strict) { - pr_warn("map '%s': unknown field '%s'.\n", - map_name, name); - return -ENOTSUP; - } - pr_debug("map '%s': ignoring unknown field '%s'.\n", - map_name, name); - } - } - - if (map->def.type == BPF_MAP_TYPE_UNSPEC) { - pr_warn("map '%s': map type isn't specified.\n", map_name); - return -EINVAL; - } - - return 0; -} - -static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, - const char *pin_root_path) -{ - const struct btf_type *sec = NULL; - int nr_types, i, vlen, err; - const struct btf_type *t; - const char *name; - Elf_Data *data; - Elf_Scn *scn; - - if (obj->efile.btf_maps_shndx < 0) - return 0; - - scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); - if (!scn || !data) { - pr_warn("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); - return -EINVAL; - } - - nr_types = btf__get_nr_types(obj->btf); - for (i = 1; i <= nr_types; i++) { - t = btf__type_by_id(obj->btf, i); - if (!btf_is_datasec(t)) - continue; - name = btf__name_by_offset(obj->btf, t->name_off); - if (strcmp(name, MAPS_ELF_SEC) == 0) { - sec = t; - break; - } - } - - if (!sec) { - pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); - return -ENOENT; - } - - vlen = btf_vlen(sec); - for (i = 0; i < vlen; i++) { - err = bpf_object__init_user_btf_map(obj, sec, i, - obj->efile.btf_maps_shndx, - data, strict, - pin_root_path); - if (err) - return err; - } - - return 0; -} - -static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) -{ - bool strict = !relaxed_maps; - int err; - - err = bpf_object__init_user_maps(obj, strict); - if (err) - return err; - - err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); - if (err) - return err; - - err = bpf_object__init_global_data_maps(obj); - if (err) - return err; - - if (obj->nr_maps) { - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), - compare_bpf_map); - } - return 0; -} - -static bool section_have_execinstr(struct bpf_object *obj, int idx) -{ - Elf_Scn *scn; - GElf_Shdr sh; - - scn = elf_getscn(obj->efile.elf, idx); - if (!scn) - return false; - - if (gelf_getshdr(scn, &sh) != &sh) - return false; - - if (sh.sh_flags & SHF_EXECINSTR) - return true; - - return false; -} - -static void bpf_object__sanitize_btf(struct bpf_object *obj) -{ - bool has_datasec = obj->caps.btf_datasec; - bool has_func = obj->caps.btf_func; - struct btf *btf = obj->btf; - struct btf_type *t; - int i, j, vlen; - - if (!obj->btf || (has_func && has_datasec)) - return; - - for (i = 1; i <= btf__get_nr_types(btf); i++) { - t = (struct btf_type *)btf__type_by_id(btf, i); - - if (!has_datasec && btf_is_var(t)) { - /* replace VAR with INT */ - t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); - /* - * using size = 1 is the safest choice, 4 will be too - * big and cause kernel BTF validation failure if - * original variable took less than 4 bytes - */ - t->size = 1; - *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); - } else if (!has_datasec && btf_is_datasec(t)) { - /* replace DATASEC with STRUCT */ - const struct btf_var_secinfo *v = btf_var_secinfos(t); - struct btf_member *m = btf_members(t); - struct btf_type *vt; - char *name; - - name = (char *)btf__name_by_offset(btf, t->name_off); - while (*name) { - if (*name == '.') - *name = '_'; - name++; - } - - vlen = btf_vlen(t); - t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); - for (j = 0; j < vlen; j++, v++, m++) { - /* order of field assignments is important */ - m->offset = v->offset * 8; - m->type = v->type; - /* preserve variable name as member name */ - vt = (void *)btf__type_by_id(btf, v->type); - m->name_off = vt->name_off; - } - } else if (!has_func && btf_is_func_proto(t)) { - /* replace FUNC_PROTO with ENUM */ - vlen = btf_vlen(t); - t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); - t->size = sizeof(__u32); /* kernel enforced */ - } else if (!has_func && btf_is_func(t)) { - /* replace FUNC with TYPEDEF */ - t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); - } - } -} - -static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) -{ - if (!obj->btf_ext) - return; - - if (!obj->caps.btf_func) { - btf_ext__free(obj->btf_ext); - obj->btf_ext = NULL; - } -} - -static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) -{ - return obj->efile.btf_maps_shndx >= 0; -} - -static int bpf_object__init_btf(struct bpf_object *obj, - Elf_Data *btf_data, - Elf_Data *btf_ext_data) -{ - bool btf_required = bpf_object__is_btf_mandatory(obj); - int err = 0; - - if (btf_data) { - obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); - if (IS_ERR(obj->btf)) { - pr_warn("Error loading ELF section %s: %d.\n", - BTF_ELF_SEC, err); - goto out; - } - err = btf__finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - goto out; - } - } - if (btf_ext_data) { - if (!obj->btf) { - pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", - BTF_EXT_ELF_SEC, BTF_ELF_SEC); - goto out; - } - obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size); - if (IS_ERR(obj->btf_ext)) { - pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); - obj->btf_ext = NULL; - goto out; - } - } -out: - if (err || IS_ERR(obj->btf)) { - if (btf_required) - err = err ? : PTR_ERR(obj->btf); - else - err = 0; - if (!IS_ERR_OR_NULL(obj->btf)) - btf__free(obj->btf); - obj->btf = NULL; - } - if (btf_required && !obj->btf) { - pr_warn("BTF is required, but is missing or corrupted.\n"); - return err == 0 ? -ENOENT : err; - } - return 0; -} - -static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) -{ - int err = 0; - - if (!obj->btf) - return 0; - - bpf_object__sanitize_btf(obj); - bpf_object__sanitize_btf_ext(obj); - - err = btf__load(obj->btf); - if (err) { - pr_warn("Error loading %s into kernel: %d.\n", - BTF_ELF_SEC, err); - btf__free(obj->btf); - obj->btf = NULL; - /* btf_ext can't exist without btf, so free it as well */ - if (obj->btf_ext) { - btf_ext__free(obj->btf_ext); - obj->btf_ext = NULL; - } - - if (bpf_object__is_btf_mandatory(obj)) - return err; - } - return 0; -} - -static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) -{ - Elf *elf = obj->efile.elf; - GElf_Ehdr *ep = &obj->efile.ehdr; - Elf_Data *btf_ext_data = NULL; - Elf_Data *btf_data = NULL; - Elf_Scn *scn = NULL; - int idx = 0, err = 0; - - /* Elf is corrupted/truncated, avoid calling elf_strptr. */ - if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { - pr_warn("failed to get e_shstrndx from %s\n", obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - while ((scn = elf_nextscn(elf, scn)) != NULL) { - char *name; - GElf_Shdr sh; - Elf_Data *data; - - idx++; - if (gelf_getshdr(scn, &sh) != &sh) { - pr_warn("failed to get section(%d) header from %s\n", - idx, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); - if (!name) { - pr_warn("failed to get section(%d) name from %s\n", - idx, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - data = elf_getdata(scn, 0); - if (!data) { - pr_warn("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", - idx, name, (unsigned long)data->d_size, - (int)sh.sh_link, (unsigned long)sh.sh_flags, - (int)sh.sh_type); - - if (strcmp(name, "license") == 0) { - err = bpf_object__init_license(obj, - data->d_buf, - data->d_size); - if (err) - return err; - } else if (strcmp(name, "version") == 0) { - err = bpf_object__init_kversion(obj, - data->d_buf, - data->d_size); - if (err) - return err; - } else if (strcmp(name, "maps") == 0) { - obj->efile.maps_shndx = idx; - } else if (strcmp(name, MAPS_ELF_SEC) == 0) { - obj->efile.btf_maps_shndx = idx; - } else if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = data; - } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { - btf_ext_data = data; - } else if (sh.sh_type == SHT_SYMTAB) { - if (obj->efile.symbols) { - pr_warn("bpf: multiple SYMTAB in %s\n", - obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - obj->efile.symbols = data; - obj->efile.strtabidx = sh.sh_link; - } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { - if (sh.sh_flags & SHF_EXECINSTR) { - if (strcmp(name, ".text") == 0) - obj->efile.text_shndx = idx; - err = bpf_object__add_program(obj, data->d_buf, - data->d_size, - name, idx); - if (err) { - char errmsg[STRERR_BUFSIZE]; - char *cp; - - cp = libbpf_strerror_r(-err, errmsg, - sizeof(errmsg)); - pr_warn("failed to alloc program %s (%s): %s", - name, obj->path, cp); - return err; - } - } else if (strcmp(name, ".data") == 0) { - obj->efile.data = data; - obj->efile.data_shndx = idx; - } else if (strcmp(name, ".rodata") == 0) { - obj->efile.rodata = data; - obj->efile.rodata_shndx = idx; - } else { - pr_debug("skip section(%d) %s\n", idx, name); - } - } else if (sh.sh_type == SHT_REL) { - int nr_sects = obj->efile.nr_reloc_sects; - void *sects = obj->efile.reloc_sects; - int sec = sh.sh_info; /* points to other section */ - - /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec)) { - pr_debug("skip relo %s(%d) for section(%d)\n", - name, idx, sec); - continue; - } - - sects = reallocarray(sects, nr_sects + 1, - sizeof(*obj->efile.reloc_sects)); - if (!sects) { - pr_warn("reloc_sects realloc failed\n"); - return -ENOMEM; - } - - obj->efile.reloc_sects = sects; - obj->efile.nr_reloc_sects++; - - obj->efile.reloc_sects[nr_sects].shdr = sh; - obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { - obj->efile.bss = data; - obj->efile.bss_shndx = idx; - } else { - pr_debug("skip section(%d) %s\n", idx, name); - } - } - - if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { - pr_warn("Corrupted ELF file: index of strtab invalid\n"); - return -LIBBPF_ERRNO__FORMAT; - } - err = bpf_object__init_btf(obj, btf_data, btf_ext_data); - if (!err) - err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); - if (!err) - err = bpf_object__sanitize_and_load_btf(obj); - if (!err) - err = bpf_object__init_prog_names(obj); - return err; -} - -static struct bpf_program * -bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) -{ - struct bpf_program *prog; - size_t i; - - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (prog->idx == idx) - return prog; - } - return NULL; -} - -struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title) -{ - struct bpf_program *pos; - - bpf_object__for_each_program(pos, obj) { - if (pos->section_name && !strcmp(pos->section_name, title)) - return pos; - } - return NULL; -} - -static bool bpf_object__shndx_is_data(const struct bpf_object *obj, - int shndx) -{ - return shndx == obj->efile.data_shndx || - shndx == obj->efile.bss_shndx || - shndx == obj->efile.rodata_shndx; -} - -static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, - int shndx) -{ - return shndx == obj->efile.maps_shndx || - shndx == obj->efile.btf_maps_shndx; -} - -static enum libbpf_map_type -bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) -{ - if (shndx == obj->efile.data_shndx) - return LIBBPF_MAP_DATA; - else if (shndx == obj->efile.bss_shndx) - return LIBBPF_MAP_BSS; - else if (shndx == obj->efile.rodata_shndx) - return LIBBPF_MAP_RODATA; - else - return LIBBPF_MAP_UNSPEC; -} - -static int bpf_program__record_reloc(struct bpf_program *prog, - struct reloc_desc *reloc_desc, - __u32 insn_idx, const char *name, - const GElf_Sym *sym, const GElf_Rel *rel) -{ - struct bpf_insn *insn = &prog->insns[insn_idx]; - size_t map_idx, nr_maps = prog->obj->nr_maps; - struct bpf_object *obj = prog->obj; - __u32 shdr_idx = sym->st_shndx; - enum libbpf_map_type type; - struct bpf_map *map; - - /* sub-program call relocation */ - if (insn->code == (BPF_JMP | BPF_CALL)) { - if (insn->src_reg != BPF_PSEUDO_CALL) { - pr_warn("incorrect bpf_call opcode\n"); - return -LIBBPF_ERRNO__RELOC; - } - /* text_shndx can be 0, if no default "main" program exists */ - if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { - pr_warn("bad call relo against section %u\n", shdr_idx); - return -LIBBPF_ERRNO__RELOC; - } - if (sym->st_value % 8) { - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); - return -LIBBPF_ERRNO__RELOC; - } - reloc_desc->type = RELO_CALL; - reloc_desc->insn_idx = insn_idx; - reloc_desc->sym_off = sym->st_value; - obj->has_pseudo_calls = true; - return 0; - } - - if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { - pr_warn("invalid relo for insns[%d].code 0x%x\n", - insn_idx, insn->code); - return -LIBBPF_ERRNO__RELOC; - } - if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { - pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", - name, shdr_idx); - return -LIBBPF_ERRNO__RELOC; - } - - type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); - - /* generic map reference relocation */ - if (type == LIBBPF_MAP_UNSPEC) { - if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { - pr_warn("bad map relo against section %u\n", - shdr_idx); - return -LIBBPF_ERRNO__RELOC; - } - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - map = &obj->maps[map_idx]; - if (map->libbpf_type != type || - map->sec_idx != sym->st_shndx || - map->sec_offset != sym->st_value) - continue; - pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n", - map_idx, map->name, map->sec_idx, - map->sec_offset, insn_idx); - break; - } - if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %llu\n", - shdr_idx, (__u64)sym->st_value); - return -LIBBPF_ERRNO__RELOC; - } - reloc_desc->type = RELO_LD64; - reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = map_idx; - reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ - return 0; - } - - /* global data map relocation */ - if (!bpf_object__shndx_is_data(obj, shdr_idx)) { - pr_warn("bad data relo against section %u\n", shdr_idx); - return -LIBBPF_ERRNO__RELOC; - } - if (!obj->caps.global_data) { - pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); - return -LIBBPF_ERRNO__RELOC; - } - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - map = &obj->maps[map_idx]; - if (map->libbpf_type != type) - continue; - pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n", - map_idx, map->name, map->sec_idx, map->sec_offset, - insn_idx); - break; - } - if (map_idx >= nr_maps) { - pr_warn("data relo failed to find map for sec %u\n", - shdr_idx); - return -LIBBPF_ERRNO__RELOC; - } - - reloc_desc->type = RELO_DATA; - reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = map_idx; - reloc_desc->sym_off = sym->st_value; - return 0; -} - -static int -bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, - Elf_Data *data, struct bpf_object *obj) -{ - Elf_Data *symbols = obj->efile.symbols; - int err, i, nrels; - - pr_debug("collecting relocating info for: '%s'\n", prog->section_name); - nrels = shdr->sh_size / shdr->sh_entsize; - - prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); - if (!prog->reloc_desc) { - pr_warn("failed to alloc memory in relocation\n"); - return -ENOMEM; - } - prog->nr_reloc = nrels; - - for (i = 0; i < nrels; i++) { - const char *name; - __u32 insn_idx; - GElf_Sym sym; - GElf_Rel rel; - - if (!gelf_getrel(data, i, &rel)) { - pr_warn("relocation: failed to get %d reloc\n", i); - return -LIBBPF_ERRNO__FORMAT; - } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { - pr_warn("relocation: symbol %"PRIx64" not found\n", - GELF_R_SYM(rel.r_info)); - return -LIBBPF_ERRNO__FORMAT; - } - if (rel.r_offset % sizeof(struct bpf_insn)) - return -LIBBPF_ERRNO__FORMAT; - - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, - sym.st_name) ? : "<?>"; - - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), - GELF_ST_BIND(sym.st_info), sym.st_name, name, - insn_idx); - - err = bpf_program__record_reloc(prog, &prog->reloc_desc[i], - insn_idx, name, &sym, &rel); - if (err) - return err; - } - return 0; -} - -static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) -{ - struct bpf_map_def *def = &map->def; - __u32 key_type_id = 0, value_type_id = 0; - int ret; - - /* if it's BTF-defined map, we don't need to search for type IDs */ - if (map->sec_idx == obj->efile.btf_maps_shndx) - return 0; - - if (!bpf_map__is_internal(map)) { - ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); - } else { - /* - * LLVM annotates global data differently in BTF, that is, - * only as '.data', '.bss' or '.rodata'. - */ - ret = btf__find_by_name(obj->btf, - libbpf_type_to_btf_name[map->libbpf_type]); - } - if (ret < 0) - return ret; - - map->btf_key_type_id = key_type_id; - map->btf_value_type_id = bpf_map__is_internal(map) ? - ret : value_type_id; - return 0; -} - -int bpf_map__reuse_fd(struct bpf_map *map, int fd) -{ - struct bpf_map_info info = {}; - __u32 len = sizeof(info); - int new_fd, err; - char *new_name; - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) - return err; - - new_name = strdup(info.name); - if (!new_name) - return -errno; - - new_fd = open("/", O_RDONLY | O_CLOEXEC); - if (new_fd < 0) { - err = -errno; - goto err_free_new_name; - } - - new_fd = dup3(fd, new_fd, O_CLOEXEC); - if (new_fd < 0) { - err = -errno; - goto err_close_new_fd; - } - - err = zclose(map->fd); - if (err) { - err = -errno; - goto err_close_new_fd; - } - free(map->name); - - map->fd = new_fd; - map->name = new_name; - map->def.type = info.type; - map->def.key_size = info.key_size; - map->def.value_size = info.value_size; - map->def.max_entries = info.max_entries; - map->def.map_flags = info.map_flags; - map->btf_key_type_id = info.btf_key_type_id; - map->btf_value_type_id = info.btf_value_type_id; - map->reused = true; - - return 0; - -err_close_new_fd: - close(new_fd); -err_free_new_name: - free(new_name); - return err; -} - -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) -{ - if (!map || !max_entries) - return -EINVAL; - - /* If map already created, its attributes can't be changed. */ - if (map->fd >= 0) - return -EBUSY; - - map->def.max_entries = max_entries; - - return 0; -} - -static int -bpf_object__probe_name(struct bpf_object *obj) -{ - struct bpf_load_program_attr attr; - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret; - - /* make sure basic loading works */ - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - - ret = bpf_load_program_xattr(&attr, NULL, 0); - if (ret < 0) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", - __func__, cp, errno); - return -errno; - } - close(ret); - - /* now try the same program, but with the name */ - - attr.name = "test"; - ret = bpf_load_program_xattr(&attr, NULL, 0); - if (ret >= 0) { - obj->caps.name = 1; - close(ret); - } - - return 0; -} - -static int -bpf_object__probe_global_data(struct bpf_object *obj) -{ - struct bpf_load_program_attr prg_attr; - struct bpf_create_map_attr map_attr; - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_ARRAY; - map_attr.key_size = sizeof(int); - map_attr.value_size = 32; - map_attr.max_entries = 1; - - map = bpf_create_map_xattr(&map_attr); - if (map < 0) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, errno); - return -errno; - } - - insns[0].imm = map; - - memset(&prg_attr, 0, sizeof(prg_attr)); - prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - prg_attr.insns = insns; - prg_attr.insns_cnt = ARRAY_SIZE(insns); - prg_attr.license = "GPL"; - - ret = bpf_load_program_xattr(&prg_attr, NULL, 0); - if (ret >= 0) { - obj->caps.global_data = 1; - close(ret); - } - - close(map); - return 0; -} - -static int bpf_object__probe_btf_func(struct bpf_object *obj) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - int btf_fd; - - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); - if (btf_fd >= 0) { - obj->caps.btf_func = 1; - close(btf_fd); - return 1; - } - - return 0; -} - -static int bpf_object__probe_btf_datasec(struct bpf_object *obj) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - int btf_fd; - - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); - if (btf_fd >= 0) { - obj->caps.btf_datasec = 1; - close(btf_fd); - return 1; - } - - return 0; -} - -static int bpf_object__probe_array_mmap(struct bpf_object *obj) -{ - struct bpf_create_map_attr attr = { - .map_type = BPF_MAP_TYPE_ARRAY, - .map_flags = BPF_F_MMAPABLE, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1, - }; - int fd; - - fd = bpf_create_map_xattr(&attr); - if (fd >= 0) { - obj->caps.array_mmap = 1; - close(fd); - return 1; - } - - return 0; -} - -static int -bpf_object__probe_caps(struct bpf_object *obj) -{ - int (*probe_fn[])(struct bpf_object *obj) = { - bpf_object__probe_name, - bpf_object__probe_global_data, - bpf_object__probe_btf_func, - bpf_object__probe_btf_datasec, - bpf_object__probe_array_mmap, - }; - int i, ret; - - for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { - ret = probe_fn[i](obj); - if (ret < 0) - pr_debug("Probe #%d failed with %d.\n", i, ret); - } - - return 0; -} - -static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) -{ - struct bpf_map_info map_info = {}; - char msg[STRERR_BUFSIZE]; - __u32 map_info_len; - - map_info_len = sizeof(map_info); - - if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { - pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); - return false; - } - - return (map_info.type == map->def.type && - map_info.key_size == map->def.key_size && - map_info.value_size == map->def.value_size && - map_info.max_entries == map->def.max_entries && - map_info.map_flags == map->def.map_flags); -} - -static int -bpf_object__reuse_map(struct bpf_map *map) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - int err, pin_fd; - - pin_fd = bpf_obj_get(map->pin_path); - if (pin_fd < 0) { - err = -errno; - if (err == -ENOENT) { - pr_debug("found no pinned map to reuse at '%s'\n", - map->pin_path); - return 0; - } - - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("couldn't retrieve pinned map '%s': %s\n", - map->pin_path, cp); - return err; - } - - if (!map_is_reuse_compat(map, pin_fd)) { - pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", - map->pin_path); - close(pin_fd); - return -EINVAL; - } - - err = bpf_map__reuse_fd(map, pin_fd); - if (err) { - close(pin_fd); - return err; - } - map->pinned = true; - pr_debug("reused pinned map at '%s'\n", map->pin_path); - - return 0; -} - -static int -bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - int err, zero = 0; - __u8 *data; - - /* Nothing to do here since kernel already zero-initializes .bss map. */ - if (map->libbpf_type == LIBBPF_MAP_BSS) - return 0; - - data = map->libbpf_type == LIBBPF_MAP_DATA ? - obj->sections.data : obj->sections.rodata; - - err = bpf_map_update_elem(map->fd, &zero, data, 0); - /* Freeze .rodata map as read-only from syscall side. */ - if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { - err = bpf_map_freeze(map->fd); - if (err) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("Error freezing map(%s) as read-only: %s\n", - map->name, cp); - err = 0; - } - } - return err; -} - -static int -bpf_object__create_maps(struct bpf_object *obj) -{ - struct bpf_create_map_attr create_attr = {}; - int nr_cpus = 0; - unsigned int i; - int err; - - for (i = 0; i < obj->nr_maps; i++) { - struct bpf_map *map = &obj->maps[i]; - struct bpf_map_def *def = &map->def; - char *cp, errmsg[STRERR_BUFSIZE]; - int *pfd = &map->fd; - - if (map->pin_path) { - err = bpf_object__reuse_map(map); - if (err) { - pr_warn("error reusing pinned map %s\n", - map->name); - return err; - } - } - - if (map->fd >= 0) { - pr_debug("skip map create (preset) %s: fd=%d\n", - map->name, map->fd); - continue; - } - - if (obj->caps.name) - create_attr.name = map->name; - create_attr.map_ifindex = map->map_ifindex; - create_attr.map_type = def->type; - create_attr.map_flags = def->map_flags; - create_attr.key_size = def->key_size; - create_attr.value_size = def->value_size; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && - !def->max_entries) { - if (!nr_cpus) - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("failed to determine number of system CPUs: %d\n", - nr_cpus); - err = nr_cpus; - goto err_out; - } - pr_debug("map '%s': setting size to %d\n", - map->name, nr_cpus); - create_attr.max_entries = nr_cpus; - } else { - create_attr.max_entries = def->max_entries; - } - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; - if (bpf_map_type__is_map_in_map(def->type) && - map->inner_map_fd >= 0) - create_attr.inner_map_fd = map->inner_map_fd; - - if (obj->btf && !bpf_map_find_btf_info(obj, map)) { - create_attr.btf_fd = btf__fd(obj->btf); - create_attr.btf_key_type_id = map->btf_key_type_id; - create_attr.btf_value_type_id = map->btf_value_type_id; - } - - *pfd = bpf_create_map_xattr(&create_attr); - if (*pfd < 0 && (create_attr.btf_key_type_id || - create_attr.btf_value_type_id)) { - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; - map->btf_key_type_id = 0; - map->btf_value_type_id = 0; - *pfd = bpf_create_map_xattr(&create_attr); - } - - if (*pfd < 0) { - size_t j; - - err = -errno; -err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to create map (name: '%s'): %s(%d)\n", - map->name, cp, err); - for (j = 0; j < i; j++) - zclose(obj->maps[j].fd); - return err; - } - - if (bpf_map__is_internal(map)) { - err = bpf_object__populate_internal_map(obj, map); - if (err < 0) { - zclose(*pfd); - goto err_out; - } - } - - if (map->pin_path && !map->pinned) { - err = bpf_map__pin(map, NULL); - if (err) { - pr_warn("failed to auto-pin map name '%s' at '%s'\n", - map->name, map->pin_path); - return err; - } - } - - pr_debug("created map %s: fd=%d\n", map->name, *pfd); - } - - return 0; -} - -static int -check_btf_ext_reloc_err(struct bpf_program *prog, int err, - void *btf_prog_info, const char *info_name) -{ - if (err != -ENOENT) { - pr_warn("Error in loading %s for sec %s.\n", - info_name, prog->section_name); - return err; - } - - /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ - - if (btf_prog_info) { - /* - * Some info has already been found but has problem - * in the last btf_ext reloc. Must have to error out. - */ - pr_warn("Error in relocating %s for sec %s.\n", - info_name, prog->section_name); - return err; - } - - /* Have problem loading the very first info. Ignore the rest. */ - pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", - info_name, prog->section_name, info_name); - return 0; -} - -static int -bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, - const char *section_name, __u32 insn_offset) -{ - int err; - - if (!insn_offset || prog->func_info) { - /* - * !insn_offset => main program - * - * For sub prog, the main program's func_info has to - * be loaded first (i.e. prog->func_info != NULL) - */ - err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, - section_name, insn_offset, - &prog->func_info, - &prog->func_info_cnt); - if (err) - return check_btf_ext_reloc_err(prog, err, - prog->func_info, - "bpf_func_info"); - - prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); - } - - if (!insn_offset || prog->line_info) { - err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, - section_name, insn_offset, - &prog->line_info, - &prog->line_info_cnt); - if (err) - return check_btf_ext_reloc_err(prog, err, - prog->line_info, - "bpf_line_info"); - - prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); - } - - return 0; -} - -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { - __u32 type_id; /* struct/union type or array element type */ - __u32 idx; /* field index or array index */ - const char *name; /* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { - const struct btf *btf; - /* high-level spec: named fields and array indices only */ - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; - /* high-level spec length */ - int len; - /* raw, low-level spec: 1-to-1 with accessor spec string */ - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; - /* raw spec length */ - int raw_len; - /* field bit offset represented by spec */ - __u32 bit_offset; -}; - -static bool str_is_empty(const char *s) -{ - return !s || !s[0]; -} - -/* - * Turn bpf_field_reloc into a low- and high-level spec representation, - * validating correctness along the way, as well as calculating resulting - * field bit offset, specified by accessor string. Low-level spec captures - * every single level of nestedness, including traversing anonymous - * struct/union members. High-level one only captures semantically meaningful - * "turning points": named fields and array indicies. - * E.g., for this case: - * - * struct sample { - * int __unimportant; - * struct { - * int __1; - * int __2; - * int a[7]; - * }; - * }; - * - * struct sample *s = ...; - * - * int x = &s->a[3]; // access string = '0:1:2:3' - * - * Low-level spec has 1:1 mapping with each element of access string (it's - * just a parsed access string representation): [0, 1, 2, 3]. - * - * High-level spec will capture only 3 points: - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); - * - field 'a' access (corresponds to '2' in low-level spec); - * - array element #3 access (corresponds to '3' in low-level spec). - * - */ -static int bpf_core_spec_parse(const struct btf *btf, - __u32 type_id, - const char *spec_str, - struct bpf_core_spec *spec) -{ - int access_idx, parsed_len, i; - const struct btf_type *t; - const char *name; - __u32 id; - __s64 sz; - - if (str_is_empty(spec_str) || *spec_str == ':') - return -EINVAL; - - memset(spec, 0, sizeof(*spec)); - spec->btf = btf; - - /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ - while (*spec_str) { - if (*spec_str == ':') - ++spec_str; - if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) - return -EINVAL; - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - spec_str += parsed_len; - spec->raw_spec[spec->raw_len++] = access_idx; - } - - if (spec->raw_len == 0) - return -EINVAL; - - /* first spec value is always reloc type array index */ - t = skip_mods_and_typedefs(btf, type_id, &id); - if (!t) - return -EINVAL; - - access_idx = spec->raw_spec[0]; - spec->spec[0].type_id = id; - spec->spec[0].idx = access_idx; - spec->len++; - - sz = btf__resolve_size(btf, id); - if (sz < 0) - return sz; - spec->bit_offset = access_idx * sz * 8; - - for (i = 1; i < spec->raw_len; i++) { - t = skip_mods_and_typedefs(btf, id, &id); - if (!t) - return -EINVAL; - - access_idx = spec->raw_spec[i]; - - if (btf_is_composite(t)) { - const struct btf_member *m; - __u32 bit_offset; - - if (access_idx >= btf_vlen(t)) - return -EINVAL; - - bit_offset = btf_member_bit_offset(t, access_idx); - spec->bit_offset += bit_offset; - - m = btf_members(t) + access_idx; - if (m->name_off) { - name = btf__name_by_offset(btf, m->name_off); - if (str_is_empty(name)) - return -EINVAL; - - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->spec[spec->len].name = name; - spec->len++; - } - - id = m->type; - } else if (btf_is_array(t)) { - const struct btf_array *a = btf_array(t); - - t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t || access_idx >= a->nelems) - return -EINVAL; - - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->len++; - - sz = btf__resolve_size(btf, id); - if (sz < 0) - return sz; - spec->bit_offset += access_idx * sz * 8; - } else { - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", - type_id, spec_str, i, id, btf_kind(t)); - return -EINVAL; - } - } - - return 0; -} - -static bool bpf_core_is_flavor_sep(const char *s) -{ - /* check X___Y name pattern, where X and Y are not underscores */ - return s[0] != '_' && /* X */ - s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ - s[4] != '_'; /* Y */ -} - -/* Given 'some_struct_name___with_flavor' return the length of a name prefix - * before last triple underscore. Struct name part after last triple - * underscore is ignored by BPF CO-RE relocation during relocation matching. - */ -static size_t bpf_core_essential_name_len(const char *name) -{ - size_t n = strlen(name); - int i; - - for (i = n - 5; i >= 0; i--) { - if (bpf_core_is_flavor_sep(name + i)) - return i + 1; - } - return n; -} - -/* dynamically sized list of type IDs */ -struct ids_vec { - __u32 *data; - int len; -}; - -static void bpf_core_free_cands(struct ids_vec *cand_ids) -{ - free(cand_ids->data); - free(cand_ids); -} - -static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, - __u32 local_type_id, - const struct btf *targ_btf) -{ - size_t local_essent_len, targ_essent_len; - const char *local_name, *targ_name; - const struct btf_type *t; - struct ids_vec *cand_ids; - __u32 *new_ids; - int i, err, n; - - t = btf__type_by_id(local_btf, local_type_id); - if (!t) - return ERR_PTR(-EINVAL); - - local_name = btf__name_by_offset(local_btf, t->name_off); - if (str_is_empty(local_name)) - return ERR_PTR(-EINVAL); - local_essent_len = bpf_core_essential_name_len(local_name); - - cand_ids = calloc(1, sizeof(*cand_ids)); - if (!cand_ids) - return ERR_PTR(-ENOMEM); - - n = btf__get_nr_types(targ_btf); - for (i = 1; i <= n; i++) { - t = btf__type_by_id(targ_btf, i); - targ_name = btf__name_by_offset(targ_btf, t->name_off); - if (str_is_empty(targ_name)) - continue; - - targ_essent_len = bpf_core_essential_name_len(targ_name); - if (targ_essent_len != local_essent_len) - continue; - - if (strncmp(local_name, targ_name, local_essent_len) == 0) { - pr_debug("[%d] %s: found candidate [%d] %s\n", - local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); - if (!new_ids) { - err = -ENOMEM; - goto err_out; - } - cand_ids->data = new_ids; - cand_ids->data[cand_ids->len++] = i; - } - } - return cand_ids; -err_out: - bpf_core_free_cands(cand_ids); - return ERR_PTR(err); -} - -/* Check two types for compatibility, skipping const/volatile/restrict and - * typedefs, to ensure we are relocating compatible entities: - * - any two STRUCTs/UNIONs are compatible and can be mixed; - * - any two FWDs are compatible, if their names match (modulo flavor suffix); - * - any two PTRs are always compatible; - * - for ENUMs, names should be the same (ignoring flavor suffix) or at - * least one of enums should be anonymous; - * - for ENUMs, check sizes, names are ignored; - * - for INT, size and signedness are ignored; - * - for ARRAY, dimensionality is ignored, element types are checked for - * compatibility recursively; - * - everything else shouldn't be ever a target of relocation. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - */ -static int bpf_core_fields_are_compat(const struct btf *local_btf, - __u32 local_id, - const struct btf *targ_btf, - __u32 targ_id) -{ - const struct btf_type *local_type, *targ_type; - -recur: - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_is_composite(local_type) && btf_is_composite(targ_type)) - return 1; - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_PTR: - return 1; - case BTF_KIND_FWD: - case BTF_KIND_ENUM: { - const char *local_name, *targ_name; - size_t local_len, targ_len; - - local_name = btf__name_by_offset(local_btf, - local_type->name_off); - targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); - local_len = bpf_core_essential_name_len(local_name); - targ_len = bpf_core_essential_name_len(targ_name); - /* one of them is anonymous or both w/ same flavor-less names */ - return local_len == 0 || targ_len == 0 || - (local_len == targ_len && - strncmp(local_name, targ_name, local_len) == 0); - } - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && - btf_int_offset(targ_type) == 0; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - default: - pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); - return 0; - } -} - -/* - * Given single high-level named field accessor in local type, find - * corresponding high-level accessor for a target type. Along the way, - * maintain low-level spec for target as well. Also keep updating target - * bit offset. - * - * Searching is performed through recursive exhaustive enumeration of all - * fields of a struct/union. If there are any anonymous (embedded) - * structs/unions, they are recursively searched as well. If field with - * desired name is found, check compatibility between local and target types, - * before returning result. - * - * 1 is returned, if field is found. - * 0 is returned if no compatible field is found. - * <0 is returned on error. - */ -static int bpf_core_match_member(const struct btf *local_btf, - const struct bpf_core_accessor *local_acc, - const struct btf *targ_btf, - __u32 targ_id, - struct bpf_core_spec *spec, - __u32 *next_targ_id) -{ - const struct btf_type *local_type, *targ_type; - const struct btf_member *local_member, *m; - const char *local_name, *targ_name; - __u32 local_id; - int i, n, found; - - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!targ_type) - return -EINVAL; - if (!btf_is_composite(targ_type)) - return 0; - - local_id = local_acc->type_id; - local_type = btf__type_by_id(local_btf, local_id); - local_member = btf_members(local_type) + local_acc->idx; - local_name = btf__name_by_offset(local_btf, local_member->name_off); - - n = btf_vlen(targ_type); - m = btf_members(targ_type); - for (i = 0; i < n; i++, m++) { - __u32 bit_offset; - - bit_offset = btf_member_bit_offset(targ_type, i); - - /* too deep struct/union/array nesting */ - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - - /* speculate this member will be the good one */ - spec->bit_offset += bit_offset; - spec->raw_spec[spec->raw_len++] = i; - - targ_name = btf__name_by_offset(targ_btf, m->name_off); - if (str_is_empty(targ_name)) { - /* embedded struct/union, we need to go deeper */ - found = bpf_core_match_member(local_btf, local_acc, - targ_btf, m->type, - spec, next_targ_id); - if (found) /* either found or error */ - return found; - } else if (strcmp(local_name, targ_name) == 0) { - /* matching named field */ - struct bpf_core_accessor *targ_acc; - - targ_acc = &spec->spec[spec->len++]; - targ_acc->type_id = targ_id; - targ_acc->idx = i; - targ_acc->name = targ_name; - - *next_targ_id = m->type; - found = bpf_core_fields_are_compat(local_btf, - local_member->type, - targ_btf, m->type); - if (!found) - spec->len--; /* pop accessor */ - return found; - } - /* member turned out not to be what we looked for */ - spec->bit_offset -= bit_offset; - spec->raw_len--; - } - - return 0; -} - -/* - * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + bit offset). - */ -static int bpf_core_spec_match(struct bpf_core_spec *local_spec, - const struct btf *targ_btf, __u32 targ_id, - struct bpf_core_spec *targ_spec) -{ - const struct btf_type *targ_type; - const struct bpf_core_accessor *local_acc; - struct bpf_core_accessor *targ_acc; - int i, sz, matched; - - memset(targ_spec, 0, sizeof(*targ_spec)); - targ_spec->btf = targ_btf; - - local_acc = &local_spec->spec[0]; - targ_acc = &targ_spec->spec[0]; - - for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, - &targ_id); - if (!targ_type) - return -EINVAL; - - if (local_acc->name) { - matched = bpf_core_match_member(local_spec->btf, - local_acc, - targ_btf, targ_id, - targ_spec, &targ_id); - if (matched <= 0) - return matched; - } else { - /* for i=0, targ_id is already treated as array element - * type (because it's the original struct), for others - * we should find array element type first - */ - if (i > 0) { - const struct btf_array *a; - - if (!btf_is_array(targ_type)) - return 0; - - a = btf_array(targ_type); - if (local_acc->idx >= a->nelems) - return 0; - if (!skip_mods_and_typedefs(targ_btf, a->type, - &targ_id)) - return -EINVAL; - } - - /* too deep struct/union/array nesting */ - if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - - targ_acc->type_id = targ_id; - targ_acc->idx = local_acc->idx; - targ_acc->name = NULL; - targ_spec->len++; - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; - targ_spec->raw_len++; - - sz = btf__resolve_size(targ_btf, targ_id); - if (sz < 0) - return sz; - targ_spec->bit_offset += local_acc->idx * sz * 8; - } - } - - return 1; -} - -static int bpf_core_calc_field_relo(const struct bpf_program *prog, - const struct bpf_field_reloc *relo, - const struct bpf_core_spec *spec, - __u32 *val, bool *validate) -{ - const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; - const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); - __u32 byte_off, byte_sz, bit_off, bit_sz; - const struct btf_member *m; - const struct btf_type *mt; - bool bitfield; - __s64 sz; - - /* a[n] accessor needs special handling */ - if (!acc->name) { - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { - *val = spec->bit_offset / 8; - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { - sz = btf__resolve_size(spec->btf, acc->type_id); - if (sz < 0) - return -EINVAL; - *val = sz; - } else { - pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", - bpf_program__title(prog, false), - relo->kind, relo->insn_off / 8); - return -EINVAL; - } - if (validate) - *validate = true; - return 0; - } - - m = btf_members(t) + acc->idx; - mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); - bit_off = spec->bit_offset; - bit_sz = btf_member_bitfield_size(t, acc->idx); - - bitfield = bit_sz > 0; - if (bitfield) { - byte_sz = mt->size; - byte_off = bit_off / 8 / byte_sz * byte_sz; - /* figure out smallest int size necessary for bitfield load */ - while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { - if (byte_sz >= 8) { - /* bitfield can't be read with 64-bit read */ - pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", - bpf_program__title(prog, false), - relo->kind, relo->insn_off / 8); - return -E2BIG; - } - byte_sz *= 2; - byte_off = bit_off / 8 / byte_sz * byte_sz; - } - } else { - sz = btf__resolve_size(spec->btf, m->type); - if (sz < 0) - return -EINVAL; - byte_sz = sz; - byte_off = spec->bit_offset / 8; - bit_sz = byte_sz * 8; - } - - /* for bitfields, all the relocatable aspects are ambiguous and we - * might disagree with compiler, so turn off validation of expected - * value, except for signedness - */ - if (validate) - *validate = !bitfield; - - switch (relo->kind) { - case BPF_FIELD_BYTE_OFFSET: - *val = byte_off; - break; - case BPF_FIELD_BYTE_SIZE: - *val = byte_sz; - break; - case BPF_FIELD_SIGNED: - /* enums will be assumed unsigned */ - *val = btf_is_enum(mt) || - (btf_int_encoding(mt) & BTF_INT_SIGNED); - if (validate) - *validate = true; /* signedness is never ambiguous */ - break; - case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN - *val = 64 - (bit_off + bit_sz - byte_off * 8); -#else - *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); -#endif - break; - case BPF_FIELD_RSHIFT_U64: - *val = 64 - bit_sz; - if (validate) - *validate = true; /* right shift is never ambiguous */ - break; - case BPF_FIELD_EXISTS: - default: - pr_warn("prog '%s': unknown relo %d at insn #%d\n", - bpf_program__title(prog, false), - relo->kind, relo->insn_off / 8); - return -EINVAL; - } - - return 0; -} - -/* - * Patch relocatable BPF instruction. - * - * Patched value is determined by relocation kind and target specification. - * For field existence relocation target spec will be NULL if field is not - * found. - * Expected insn->imm value is determined using relocation kind and local - * spec, and is checked before patching instruction. If actual insn->imm value - * is wrong, bail out with error. - * - * Currently three kinds of BPF instructions are supported: - * 1. rX = <imm> (assignment with immediate operand); - * 2. rX += <imm> (arithmetic operations with immediate operand); - */ -static int bpf_core_reloc_insn(struct bpf_program *prog, - const struct bpf_field_reloc *relo, - const struct bpf_core_spec *local_spec, - const struct bpf_core_spec *targ_spec) -{ - bool failed = false, validate = true; - __u32 orig_val, new_val; - struct bpf_insn *insn; - int insn_idx, err; - __u8 class; - - if (relo->insn_off % sizeof(struct bpf_insn)) - return -EINVAL; - insn_idx = relo->insn_off / sizeof(struct bpf_insn); - - if (relo->kind == BPF_FIELD_EXISTS) { - orig_val = 1; /* can't generate EXISTS relo w/o local field */ - new_val = targ_spec ? 1 : 0; - } else if (!targ_spec) { - failed = true; - new_val = (__u32)-1; - } else { - err = bpf_core_calc_field_relo(prog, relo, local_spec, - &orig_val, &validate); - if (err) - return err; - err = bpf_core_calc_field_relo(prog, relo, targ_spec, - &new_val, NULL); - if (err) - return err; - } - - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - - if (class == BPF_ALU || class == BPF_ALU64) { - if (BPF_SRC(insn->code) != BPF_K) - return -EINVAL; - if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->imm, orig_val, new_val); - return -EINVAL; - } - orig_val = insn->imm; - insn->imm = new_val; - pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); - } else { - pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), - insn_idx, insn->code, insn->src_reg, insn->dst_reg, - insn->off, insn->imm); - return -EINVAL; - } - - return 0; -} - -static struct btf *btf_load_raw(const char *path) -{ - struct btf *btf; - size_t read_cnt; - struct stat st; - void *data; - FILE *f; - - if (stat(path, &st)) - return ERR_PTR(-errno); - - data = malloc(st.st_size); - if (!data) - return ERR_PTR(-ENOMEM); - - f = fopen(path, "rb"); - if (!f) { - btf = ERR_PTR(-errno); - goto cleanup; - } - - read_cnt = fread(data, 1, st.st_size, f); - fclose(f); - if (read_cnt < st.st_size) { - btf = ERR_PTR(-EBADF); - goto cleanup; - } - - btf = btf__new(data, read_cnt); - -cleanup: - free(data); - return btf; -} - -/* - * Probe few well-known locations for vmlinux kernel image and try to load BTF - * data out of it to use for target BTF. - */ -static struct btf *bpf_core_find_kernel_btf(void) -{ - struct { - const char *path_fmt; - bool raw_btf; - } locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, - /* fall back to trying to find vmlinux ELF on disk otherwise */ - { "/boot/vmlinux-%1$s" }, - { "/lib/modules/%1$s/vmlinux-%1$s" }, - { "/lib/modules/%1$s/build/vmlinux" }, - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, - { "/usr/lib/debug/boot/vmlinux-%1$s" }, - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, - }; - char path[PATH_MAX + 1]; - struct utsname buf; - struct btf *btf; - int i; - - uname(&buf); - - for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); - - if (access(path, R_OK)) - continue; - - if (locations[i].raw_btf) - btf = btf_load_raw(path); - else - btf = btf__parse_elf(path, NULL); - - pr_debug("loading kernel BTF '%s': %ld\n", - path, IS_ERR(btf) ? PTR_ERR(btf) : 0); - if (IS_ERR(btf)) - continue; - - return btf; - } - - pr_warn("failed to find valid kernel BTF\n"); - return ERR_PTR(-ESRCH); -} - -/* Output spec definition in the format: - * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, - * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b - */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) -{ - const struct btf_type *t; - const char *s; - __u32 type_id; - int i; - - type_id = spec->spec[0].type_id; - t = btf__type_by_id(spec->btf, type_id); - s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s + ", type_id, s); - - for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%d%s", spec->raw_spec[i], - i == spec->raw_len - 1 ? " => " : ":"); - - libbpf_print(level, "%u.%u @ &x", - spec->bit_offset / 8, spec->bit_offset % 8); - - for (i = 0; i < spec->len; i++) { - if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); - else - libbpf_print(level, "[%u]", spec->spec[i].idx); - } - -} - -static size_t bpf_core_hash_fn(const void *key, void *ctx) -{ - return (size_t)key; -} - -static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) -{ - return k1 == k2; -} - -static void *u32_as_hash_key(__u32 x) -{ - return (void *)(uintptr_t)x; -} - -/* - * CO-RE relocate single instruction. - * - * The outline and important points of the algorithm: - * 1. For given local type, find corresponding candidate target types. - * Candidate type is a type with the same "essential" name, ignoring - * everything after last triple underscore (___). E.g., `sample`, - * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates - * for each other. Names with triple underscore are referred to as - * "flavors" and are useful, among other things, to allow to - * specify/support incompatible variations of the same kernel struct, which - * might differ between different kernel versions and/or build - * configurations. - * - * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C - * converter, when deduplicated BTF of a kernel still contains more than - * one different types with the same name. In that case, ___2, ___3, etc - * are appended starting from second name conflict. But start flavors are - * also useful to be defined "locally", in BPF program, to extract same - * data from incompatible changes between different kernel - * versions/configurations. For instance, to handle field renames between - * kernel versions, one can use two flavors of the struct name with the - * same common name and use conditional relocations to extract that field, - * depending on target kernel version. - * 2. For each candidate type, try to match local specification to this - * candidate target type. Matching involves finding corresponding - * high-level spec accessors, meaning that all named fields should match, - * as well as all array accesses should be within the actual bounds. Also, - * types should be compatible (see bpf_core_fields_are_compat for details). - * 3. It is supported and expected that there might be multiple flavors - * matching the spec. As long as all the specs resolve to the same set of - * offsets across all candidates, there is no error. If there is any - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate - * imprefection of BTF deduplication, which can cause slight duplication of - * the same BTF type, if some directly or indirectly referenced (by - * pointer) type gets resolved to different actual types in different - * object files. If such situation occurs, deduplicated BTF will end up - * with two (or more) structurally identical types, which differ only in - * types they refer to through pointer. This should be OK in most cases and - * is not an error. - * 4. Candidate types search is performed by linearly scanning through all - * types in target BTF. It is anticipated that this is overall more - * efficient memory-wise and not significantly worse (if not better) - * CPU-wise compared to prebuilding a map from all local type names to - * a list of candidate type names. It's also sped up by caching resolved - * list of matching candidates per each local "root" type ID, that has at - * least one bpf_field_reloc associated with it. This list is shared - * between multiple relocations for the same type ID and is updated as some - * of the candidates are pruned due to structural incompatibility. - */ -static int bpf_core_reloc_field(struct bpf_program *prog, - const struct bpf_field_reloc *relo, - int relo_idx, - const struct btf *local_btf, - const struct btf *targ_btf, - struct hashmap *cand_cache) -{ - const char *prog_name = bpf_program__title(prog, false); - struct bpf_core_spec local_spec, cand_spec, targ_spec; - const void *type_key = u32_as_hash_key(relo->type_id); - const struct btf_type *local_type, *cand_type; - const char *local_name, *cand_name; - struct ids_vec *cand_ids; - __u32 local_id, cand_id; - const char *spec_str; - int i, j, err; - - local_id = relo->type_id; - local_type = btf__type_by_id(local_btf, local_id); - if (!local_type) - return -EINVAL; - - local_name = btf__name_by_offset(local_btf, local_type->name_off); - if (str_is_empty(local_name)) - return -EINVAL; - - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - - err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); - if (err) { - pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", - prog_name, relo_idx, local_id, local_name, spec_str, - err); - return -EINVAL; - } - - pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, - relo->kind); - bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); - - if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { - cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); - if (IS_ERR(cand_ids)) { - pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", - prog_name, relo_idx, local_id, local_name, - PTR_ERR(cand_ids)); - return PTR_ERR(cand_ids); - } - err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); - if (err) { - bpf_core_free_cands(cand_ids); - return err; - } - } - - for (i = 0, j = 0; i < cand_ids->len; i++) { - cand_id = cand_ids->data[i]; - cand_type = btf__type_by_id(targ_btf, cand_id); - cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); - - err = bpf_core_spec_match(&local_spec, targ_btf, - cand_id, &cand_spec); - pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", - prog_name, relo_idx, i, cand_name); - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); - libbpf_print(LIBBPF_DEBUG, ": %d\n", err); - if (err < 0) { - pr_warn("prog '%s': relo #%d: matching error: %d\n", - prog_name, relo_idx, err); - return err; - } - if (err == 0) - continue; - - if (j == 0) { - targ_spec = cand_spec; - } else if (cand_spec.bit_offset != targ_spec.bit_offset) { - /* if there are many candidates, they should all - * resolve to the same bit offset - */ - pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec.bit_offset, - targ_spec.bit_offset); - return -EINVAL; - } - - cand_ids->data[j++] = cand_spec.spec[0].type_id; - } - - /* - * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is - * requested, it's expected that we might not find any candidates. - * In this case, if field wasn't found in any candidate, the list of - * candidates shouldn't change at all, we'll just handle relocating - * appropriately, depending on relo's kind. - */ - if (j > 0) - cand_ids->len = j; - - if (j == 0 && !prog->obj->relaxed_core_relocs && - relo->kind != BPF_FIELD_EXISTS) { - pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); - return -ESRCH; - } - - /* bpf_core_reloc_insn should know how to handle missing targ_spec */ - err = bpf_core_reloc_insn(prog, relo, &local_spec, - j ? &targ_spec : NULL); - if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", - prog_name, relo_idx, relo->insn_off, err); - return -EINVAL; - } - - return 0; -} - -static int -bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) -{ - const struct btf_ext_info_sec *sec; - const struct bpf_field_reloc *rec; - const struct btf_ext_info *seg; - struct hashmap_entry *entry; - struct hashmap *cand_cache = NULL; - struct bpf_program *prog; - struct btf *targ_btf; - const char *sec_name; - int i, err = 0; - - if (targ_btf_path) - targ_btf = btf__parse_elf(targ_btf_path, NULL); - else - targ_btf = bpf_core_find_kernel_btf(); - if (IS_ERR(targ_btf)) { - pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); - return PTR_ERR(targ_btf); - } - - cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); - if (IS_ERR(cand_cache)) { - err = PTR_ERR(cand_cache); - goto out; - } - - seg = &obj->btf_ext->field_reloc_info; - for_each_btf_ext_sec(seg, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (str_is_empty(sec_name)) { - err = -EINVAL; - goto out; - } - prog = bpf_object__find_program_by_title(obj, sec_name); - if (!prog) { - pr_warn("failed to find program '%s' for CO-RE offset relocation\n", - sec_name); - err = -EINVAL; - goto out; - } - - pr_debug("prog '%s': performing %d CO-RE offset relocs\n", - sec_name, sec->num_info); - - for_each_btf_ext_rec(seg, sec, i, rec) { - err = bpf_core_reloc_field(prog, rec, i, obj->btf, - targ_btf, cand_cache); - if (err) { - pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - sec_name, i, err); - goto out; - } - } - } - -out: - btf__free(targ_btf); - if (!IS_ERR_OR_NULL(cand_cache)) { - hashmap__for_each_entry(cand_cache, entry, i) { - bpf_core_free_cands(entry->value); - } - hashmap__free(cand_cache); - } - return err; -} - -static int -bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) -{ - int err = 0; - - if (obj->btf_ext->field_reloc_info.len) - err = bpf_core_reloc_fields(obj, targ_btf_path); - - return err; -} - -static int -bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, - struct reloc_desc *relo) -{ - struct bpf_insn *insn, *new_insn; - struct bpf_program *text; - size_t new_cnt; - int err; - - if (relo->type != RELO_CALL) - return -LIBBPF_ERRNO__RELOC; - - if (prog->idx == obj->efile.text_shndx) { - pr_warn("relo in .text insn %d into off %d (insn #%d)\n", - relo->insn_idx, relo->sym_off, relo->sym_off / 8); - return -LIBBPF_ERRNO__RELOC; - } - - if (prog->main_prog_cnt == 0) { - text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); - if (!text) { - pr_warn("no .text section found yet relo into text exist\n"); - return -LIBBPF_ERRNO__RELOC; - } - new_cnt = prog->insns_cnt + text->insns_cnt; - new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); - if (!new_insn) { - pr_warn("oom in prog realloc\n"); - return -ENOMEM; - } - prog->insns = new_insn; - - if (obj->btf_ext) { - err = bpf_program_reloc_btf_ext(prog, obj, - text->section_name, - prog->insns_cnt); - if (err) - return err; - } - - memcpy(new_insn + prog->insns_cnt, text->insns, - text->insns_cnt * sizeof(*insn)); - prog->main_prog_cnt = prog->insns_cnt; - prog->insns_cnt = new_cnt; - pr_debug("added %zd insn from %s to prog %s\n", - text->insns_cnt, text->section_name, - prog->section_name); - } - insn = &prog->insns[relo->insn_idx]; - insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; - return 0; -} - -static int -bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) -{ - int i, err; - - if (!prog) - return 0; - - if (obj->btf_ext) { - err = bpf_program_reloc_btf_ext(prog, obj, - prog->section_name, 0); - if (err) - return err; - } - - if (!prog->reloc_desc) - return 0; - - for (i = 0; i < prog->nr_reloc; i++) { - struct reloc_desc *relo = &prog->reloc_desc[i]; - - if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { - struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warn("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; - } - - if (relo->type != RELO_DATA) { - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - } else { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[1].imm = insn[0].imm + relo->sym_off; - } - insn[0].imm = obj->maps[relo->map_idx].fd; - } else if (relo->type == RELO_CALL) { - err = bpf_program__reloc_text(prog, obj, relo); - if (err) - return err; - } - } - - zfree(&prog->reloc_desc); - prog->nr_reloc = 0; - return 0; -} - -static int -bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) -{ - struct bpf_program *prog; - size_t i; - int err; - - if (obj->btf_ext) { - err = bpf_object__relocate_core(obj, targ_btf_path); - if (err) { - pr_warn("failed to perform CO-RE relocations: %d\n", - err); - return err; - } - } - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - - err = bpf_program__relocate(prog, obj); - if (err) { - pr_warn("failed to relocate '%s'\n", prog->section_name); - return err; - } - } - return 0; -} - -static int bpf_object__collect_reloc(struct bpf_object *obj) -{ - int i, err; - - if (!obj_elf_valid(obj)) { - pr_warn("Internal error: elf object is closed\n"); - return -LIBBPF_ERRNO__INTERNAL; - } - - for (i = 0; i < obj->efile.nr_reloc_sects; i++) { - GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; - Elf_Data *data = obj->efile.reloc_sects[i].data; - int idx = shdr->sh_info; - struct bpf_program *prog; - - if (shdr->sh_type != SHT_REL) { - pr_warn("internal error at %d\n", __LINE__); - return -LIBBPF_ERRNO__INTERNAL; - } - - prog = bpf_object__find_prog_by_idx(obj, idx); - if (!prog) { - pr_warn("relocation failed: no section(%d)\n", idx); - return -LIBBPF_ERRNO__RELOC; - } - - err = bpf_program__collect_reloc(prog, shdr, data, obj); - if (err) - return err; - } - return 0; -} - -static int -load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd) -{ - struct bpf_load_program_attr load_attr; - char *cp, errmsg[STRERR_BUFSIZE]; - int log_buf_size = BPF_LOG_BUF_SIZE; - char *log_buf; - int btf_fd, ret; - - if (!insns || !insns_cnt) - return -EINVAL; - - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = prog->type; - load_attr.expected_attach_type = prog->expected_attach_type; - if (prog->caps->name) - load_attr.name = prog->name; - load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; - load_attr.license = license; - if (prog->type == BPF_PROG_TYPE_TRACING) { - load_attr.attach_prog_fd = prog->attach_prog_fd; - load_attr.attach_btf_id = prog->attach_btf_id; - } else { - load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog->prog_ifindex; - } - /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ - if (prog->obj->btf_ext) - btf_fd = bpf_object__btf_fd(prog->obj); - else - btf_fd = -1; - load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; - load_attr.func_info = prog->func_info; - load_attr.func_info_rec_size = prog->func_info_rec_size; - load_attr.func_info_cnt = prog->func_info_cnt; - load_attr.line_info = prog->line_info; - load_attr.line_info_rec_size = prog->line_info_rec_size; - load_attr.line_info_cnt = prog->line_info_cnt; - load_attr.log_level = prog->log_level; - load_attr.prog_flags = prog->prog_flags; - -retry_load: - log_buf = malloc(log_buf_size); - if (!log_buf) - pr_warn("Alloc log buffer for bpf loader error, continue without log\n"); - - ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); - - if (ret >= 0) { - if (load_attr.log_level) - pr_debug("verifier log:\n%s", log_buf); - *pfd = ret; - ret = 0; - goto out; - } - - if (errno == ENOSPC) { - log_buf_size <<= 1; - free(log_buf); - goto retry_load; - } - ret = -errno; - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("load bpf program failed: %s\n", cp); - - if (log_buf && log_buf[0] != '\0') { - ret = -LIBBPF_ERRNO__VERIFY; - pr_warn("-- BEGIN DUMP LOG ---\n"); - pr_warn("\n%s\n", log_buf); - pr_warn("-- END LOG --\n"); - } else if (load_attr.insns_cnt >= BPF_MAXINSNS) { - pr_warn("Program too large (%zu insns), at most %d insns\n", - load_attr.insns_cnt, BPF_MAXINSNS); - ret = -LIBBPF_ERRNO__PROG2BIG; - } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { - /* Wrong program type? */ - int fd; - - load_attr.prog_type = BPF_PROG_TYPE_KPROBE; - load_attr.expected_attach_type = 0; - fd = bpf_load_program_xattr(&load_attr, NULL, 0); - if (fd >= 0) { - close(fd); - ret = -LIBBPF_ERRNO__PROGTYPE; - goto out; - } - } - -out: - free(log_buf); - return ret; -} - -int -bpf_program__load(struct bpf_program *prog, - char *license, __u32 kern_version) -{ - int err = 0, fd, i; - - if (prog->instances.nr < 0 || !prog->instances.fds) { - if (prog->preprocessor) { - pr_warn("Internal error: can't load program '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__INTERNAL; - } - - prog->instances.fds = malloc(sizeof(int)); - if (!prog->instances.fds) { - pr_warn("Not enough memory for BPF fds\n"); - return -ENOMEM; - } - prog->instances.nr = 1; - prog->instances.fds[0] = -1; - } - - if (!prog->preprocessor) { - if (prog->instances.nr != 1) { - pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", - prog->section_name, prog->instances.nr); - } - err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd); - if (!err) - prog->instances.fds[0] = fd; - goto out; - } - - for (i = 0; i < prog->instances.nr; i++) { - struct bpf_prog_prep_result result; - bpf_program_prep_t preprocessor = prog->preprocessor; - - memset(&result, 0, sizeof(result)); - err = preprocessor(prog, i, prog->insns, - prog->insns_cnt, &result); - if (err) { - pr_warn("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->section_name); - goto out; - } - - if (!result.new_insn_ptr || !result.new_insn_cnt) { - pr_debug("Skip loading the %dth instance of program '%s'\n", - i, prog->section_name); - prog->instances.fds[i] = -1; - if (result.pfd) - *result.pfd = -1; - continue; - } - - err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, - license, kern_version, &fd); - - if (err) { - pr_warn("Loading the %dth instance of program '%s' failed\n", - i, prog->section_name); - goto out; - } - - if (result.pfd) - *result.pfd = fd; - prog->instances.fds[i] = fd; - } -out: - if (err) - pr_warn("failed to load program '%s'\n", prog->section_name); - zfree(&prog->insns); - prog->insns_cnt = 0; - return err; -} - -static bool bpf_program__is_function_storage(const struct bpf_program *prog, - const struct bpf_object *obj) -{ - return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls; -} - -static int -bpf_object__load_progs(struct bpf_object *obj, int log_level) -{ - size_t i; - int err; - - for (i = 0; i < obj->nr_programs; i++) { - if (bpf_program__is_function_storage(&obj->programs[i], obj)) - continue; - obj->programs[i].log_level |= log_level; - err = bpf_program__load(&obj->programs[i], - obj->license, - obj->kern_version); - if (err) - return err; - } - return 0; -} - -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd); -static struct bpf_object * -__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) -{ - const char *pin_root_path; - struct bpf_program *prog; - struct bpf_object *obj; - const char *obj_name; - char tmp_name[64]; - bool relaxed_maps; - __u32 attach_prog_fd; - int err; - - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warn("failed to init libelf for %s\n", - path ? : "(mem buf)"); - return ERR_PTR(-LIBBPF_ERRNO__LIBELF); - } - - if (!OPTS_VALID(opts, bpf_object_open_opts)) - return ERR_PTR(-EINVAL); - - obj_name = OPTS_GET(opts, object_name, NULL); - if (obj_buf) { - if (!obj_name) { - snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", - (unsigned long)obj_buf, - (unsigned long)obj_buf_sz); - obj_name = tmp_name; - } - path = obj_name; - pr_debug("loading object '%s' from buffer\n", obj_name); - } - - obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); - if (IS_ERR(obj)) - return obj; - - obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - - CHECK_ERR(bpf_object__elf_init(obj), err, out); - CHECK_ERR(bpf_object__check_endianness(obj), err, out); - CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), - err, out); - CHECK_ERR(bpf_object__collect_reloc(obj), err, out); - bpf_object__elf_finish(obj); - - bpf_object__for_each_program(prog, obj) { - enum bpf_prog_type prog_type; - enum bpf_attach_type attach_type; - - err = libbpf_prog_type_by_name(prog->section_name, &prog_type, - &attach_type); - if (err == -ESRCH) - /* couldn't guess, but user might manually specify */ - continue; - if (err) - goto out; - - bpf_program__set_type(prog, prog_type); - bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING) { - err = libbpf_find_attach_btf_id(prog->section_name, - attach_type, - attach_prog_fd); - if (err <= 0) - goto out; - prog->attach_btf_id = err; - prog->attach_prog_fd = attach_prog_fd; - } - } - - return obj; -out: - bpf_object__close(obj); - return ERR_PTR(err); -} - -static struct bpf_object * -__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_maps = flags & MAPS_RELAX_COMPAT, - ); - - /* param validation */ - if (!attr->file) - return NULL; - - pr_debug("loading %s\n", attr->file); - return __bpf_object__open(attr->file, NULL, 0, &opts); -} - -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) -{ - return __bpf_object__open_xattr(attr, 0); -} - -struct bpf_object *bpf_object__open(const char *path) -{ - struct bpf_object_open_attr attr = { - .file = path, - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - - return bpf_object__open_xattr(&attr); -} - -struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) -{ - if (!path) - return ERR_PTR(-EINVAL); - - pr_debug("loading %s\n", path); - - return __bpf_object__open(path, NULL, 0, opts); -} - -struct bpf_object * -bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) -{ - if (!obj_buf || obj_buf_sz == 0) - return ERR_PTR(-EINVAL); - - return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts); -} - -struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .object_name = name, - /* wrong default, but backwards-compatible */ - .relaxed_maps = true, - ); - - /* returning NULL is wrong, but backwards-compatible */ - if (!obj_buf || obj_buf_sz == 0) - return NULL; - - return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); -} - -int bpf_object__unload(struct bpf_object *obj) -{ - size_t i; - - if (!obj) - return -EINVAL; - - for (i = 0; i < obj->nr_maps; i++) - zclose(obj->maps[i].fd); - - for (i = 0; i < obj->nr_programs; i++) - bpf_program__unload(&obj->programs[i]); - - return 0; -} - -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) -{ - struct bpf_object *obj; - int err, i; - - if (!attr) - return -EINVAL; - obj = attr->obj; - if (!obj) - return -EINVAL; - - if (obj->loaded) { - pr_warn("object should not be loaded twice\n"); - return -EINVAL; - } - - obj->loaded = true; - - CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); - CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); - - return 0; -out: - /* unpin any maps that were auto-pinned during load */ - for (i = 0; i < obj->nr_maps; i++) - if (obj->maps[i].pinned && !obj->maps[i].reused) - bpf_map__unpin(&obj->maps[i], NULL); - - bpf_object__unload(obj); - pr_warn("failed to load object '%s'\n", obj->path); - return err; -} - -int bpf_object__load(struct bpf_object *obj) -{ - struct bpf_object_load_attr attr = { - .obj = obj, - }; - - return bpf_object__load_xattr(&attr); -} - -static int make_parent_dir(const char *path) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - char *dname, *dir; - int err = 0; - - dname = strdup(path); - if (dname == NULL) - return -ENOMEM; - - dir = dirname(dname); - if (mkdir(dir, 0700) && errno != EEXIST) - err = -errno; - - free(dname); - if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); - } - return err; -} - -static int check_path(const char *path) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct statfs st_fs; - char *dname, *dir; - int err = 0; - - if (path == NULL) - return -EINVAL; - - dname = strdup(path); - if (dname == NULL) - return -ENOMEM; - - dir = dirname(dname); - if (statfs(dir, &st_fs)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to statfs %s: %s\n", dir, cp); - err = -errno; - } - free(dname); - - if (!err && st_fs.f_type != BPF_FS_MAGIC) { - pr_warn("specified path %s is not on BPF FS\n", path); - err = -EINVAL; - } - - return err; -} - -int bpf_program__pin_instance(struct bpf_program *prog, const char *path, - int instance) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - int err; - - err = make_parent_dir(path); - if (err) - return err; - - err = check_path(path); - if (err) - return err; - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return -EINVAL; - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); - return -EINVAL; - } - - if (bpf_obj_pin(prog->instances.fds[instance], path)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to pin program: %s\n", cp); - return -errno; - } - pr_debug("pinned program '%s'\n", path); - - return 0; -} - -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, - int instance) -{ - int err; - - err = check_path(path); - if (err) - return err; - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return -EINVAL; - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); - return -EINVAL; - } - - err = unlink(path); - if (err != 0) - return -errno; - pr_debug("unpinned program '%s'\n", path); - - return 0; -} - -int bpf_program__pin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = make_parent_dir(path); - if (err) - return err; - - err = check_path(path); - if (err) - return err; - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return -EINVAL; - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", - prog->section_name); - return -EINVAL; - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program__pin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) { - err = -EINVAL; - goto err_unpin; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin; - } - - err = bpf_program__pin_instance(prog, buf, i); - if (err) - goto err_unpin; - } - - return 0; - -err_unpin: - for (i = i - 1; i >= 0; i--) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - continue; - else if (len >= PATH_MAX) - continue; - - bpf_program__unpin_instance(prog, buf, i); - } - - rmdir(path); - - return err; -} - -int bpf_program__unpin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = check_path(path); - if (err) - return err; - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return -EINVAL; - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", - prog->section_name); - return -EINVAL; - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program__unpin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; - - err = bpf_program__unpin_instance(prog, buf, i); - if (err) - return err; - } - - err = rmdir(path); - if (err) - return -errno; - - return 0; -} - -int bpf_map__pin(struct bpf_map *map, const char *path) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - int err; - - if (map == NULL) { - pr_warn("invalid map pointer\n"); - return -EINVAL; - } - - if (map->pin_path) { - if (path && strcmp(path, map->pin_path)) { - pr_warn("map '%s' already has pin path '%s' different from '%s'\n", - bpf_map__name(map), map->pin_path, path); - return -EINVAL; - } else if (map->pinned) { - pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", - bpf_map__name(map), map->pin_path); - return 0; - } - } else { - if (!path) { - pr_warn("missing a path to pin map '%s' at\n", - bpf_map__name(map)); - return -EINVAL; - } else if (map->pinned) { - pr_warn("map '%s' already pinned\n", bpf_map__name(map)); - return -EEXIST; - } - - map->pin_path = strdup(path); - if (!map->pin_path) { - err = -errno; - goto out_err; - } - } - - err = make_parent_dir(map->pin_path); - if (err) - return err; - - err = check_path(map->pin_path); - if (err) - return err; - - if (bpf_obj_pin(map->fd, map->pin_path)) { - err = -errno; - goto out_err; - } - - map->pinned = true; - pr_debug("pinned map '%s'\n", map->pin_path); - - return 0; - -out_err: - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); - return err; -} - -int bpf_map__unpin(struct bpf_map *map, const char *path) -{ - int err; - - if (map == NULL) { - pr_warn("invalid map pointer\n"); - return -EINVAL; - } - - if (map->pin_path) { - if (path && strcmp(path, map->pin_path)) { - pr_warn("map '%s' already has pin path '%s' different from '%s'\n", - bpf_map__name(map), map->pin_path, path); - return -EINVAL; - } - path = map->pin_path; - } else if (!path) { - pr_warn("no path to unpin map '%s' from\n", - bpf_map__name(map)); - return -EINVAL; - } - - err = check_path(path); - if (err) - return err; - - err = unlink(path); - if (err != 0) - return -errno; - - map->pinned = false; - pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); - - return 0; -} - -int bpf_map__set_pin_path(struct bpf_map *map, const char *path) -{ - char *new = NULL; - - if (path) { - new = strdup(path); - if (!new) - return -errno; - } - - free(map->pin_path); - map->pin_path = new; - return 0; -} - -const char *bpf_map__get_pin_path(const struct bpf_map *map) -{ - return map->pin_path; -} - -bool bpf_map__is_pinned(const struct bpf_map *map) -{ - return map->pinned; -} - -int bpf_object__pin_maps(struct bpf_object *obj, const char *path) -{ - struct bpf_map *map; - int err; - - if (!obj) - return -ENOENT; - - if (!obj->loaded) { - pr_warn("object not yet loaded; load it first\n"); - return -ENOENT; - } - - bpf_object__for_each_map(map, obj) { - char *pin_path = NULL; - char buf[PATH_MAX]; - - if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) { - err = -EINVAL; - goto err_unpin_maps; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin_maps; - } - pin_path = buf; - } else if (!map->pin_path) { - continue; - } - - err = bpf_map__pin(map, pin_path); - if (err) - goto err_unpin_maps; - } - - return 0; - -err_unpin_maps: - while ((map = bpf_map__prev(map, obj))) { - if (!map->pin_path) - continue; - - bpf_map__unpin(map, NULL); - } - - return err; -} - -int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) -{ - struct bpf_map *map; - int err; - - if (!obj) - return -ENOENT; - - bpf_object__for_each_map(map, obj) { - char *pin_path = NULL; - char buf[PATH_MAX]; - - if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; - pin_path = buf; - } else if (!map->pin_path) { - continue; - } - - err = bpf_map__unpin(map, pin_path); - if (err) - return err; - } - - return 0; -} - -int bpf_object__pin_programs(struct bpf_object *obj, const char *path) -{ - struct bpf_program *prog; - int err; - - if (!obj) - return -ENOENT; - - if (!obj->loaded) { - pr_warn("object not yet loaded; load it first\n"); - return -ENOENT; - } - - bpf_object__for_each_program(prog, obj) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) { - err = -EINVAL; - goto err_unpin_programs; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin_programs; - } - - err = bpf_program__pin(prog, buf); - if (err) - goto err_unpin_programs; - } - - return 0; - -err_unpin_programs: - while ((prog = bpf_program__prev(prog, obj))) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) - continue; - else if (len >= PATH_MAX) - continue; - - bpf_program__unpin(prog, buf); - } - - return err; -} - -int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) -{ - struct bpf_program *prog; - int err; - - if (!obj) - return -ENOENT; - - bpf_object__for_each_program(prog, obj) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; - - err = bpf_program__unpin(prog, buf); - if (err) - return err; - } - - return 0; -} - -int bpf_object__pin(struct bpf_object *obj, const char *path) -{ - int err; - - err = bpf_object__pin_maps(obj, path); - if (err) - return err; - - err = bpf_object__pin_programs(obj, path); - if (err) { - bpf_object__unpin_maps(obj, path); - return err; - } - - return 0; -} - -void bpf_object__close(struct bpf_object *obj) -{ - size_t i; - - if (!obj) - return; - - if (obj->clear_priv) - obj->clear_priv(obj, obj->priv); - - bpf_object__elf_finish(obj); - bpf_object__unload(obj); - btf__free(obj->btf); - btf_ext__free(obj->btf_ext); - - for (i = 0; i < obj->nr_maps; i++) { - zfree(&obj->maps[i].name); - zfree(&obj->maps[i].pin_path); - if (obj->maps[i].clear_priv) - obj->maps[i].clear_priv(&obj->maps[i], - obj->maps[i].priv); - obj->maps[i].priv = NULL; - obj->maps[i].clear_priv = NULL; - } - - zfree(&obj->sections.rodata); - zfree(&obj->sections.data); - zfree(&obj->maps); - obj->nr_maps = 0; - - if (obj->programs && obj->nr_programs) { - for (i = 0; i < obj->nr_programs; i++) - bpf_program__exit(&obj->programs[i]); - } - zfree(&obj->programs); - - list_del(&obj->list); - free(obj); -} - -struct bpf_object * -bpf_object__next(struct bpf_object *prev) -{ - struct bpf_object *next; - - if (!prev) - next = list_first_entry(&bpf_objects_list, - struct bpf_object, - list); - else - next = list_next_entry(prev, list); - - /* Empty list is noticed here so don't need checking on entry. */ - if (&next->list == &bpf_objects_list) - return NULL; - - return next; -} - -const char *bpf_object__name(const struct bpf_object *obj) -{ - return obj ? obj->name : ERR_PTR(-EINVAL); -} - -unsigned int bpf_object__kversion(const struct bpf_object *obj) -{ - return obj ? obj->kern_version : 0; -} - -struct btf *bpf_object__btf(const struct bpf_object *obj) -{ - return obj ? obj->btf : NULL; -} - -int bpf_object__btf_fd(const struct bpf_object *obj) -{ - return obj->btf ? btf__fd(obj->btf) : -1; -} - -int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv) -{ - if (obj->priv && obj->clear_priv) - obj->clear_priv(obj, obj->priv); - - obj->priv = priv; - obj->clear_priv = clear_priv; - return 0; -} - -void *bpf_object__priv(const struct bpf_object *obj) -{ - return obj ? obj->priv : ERR_PTR(-EINVAL); -} - -static struct bpf_program * -__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, - bool forward) -{ - size_t nr_programs = obj->nr_programs; - ssize_t idx; - - if (!nr_programs) - return NULL; - - if (!p) - /* Iter from the beginning */ - return forward ? &obj->programs[0] : - &obj->programs[nr_programs - 1]; - - if (p->obj != obj) { - pr_warn("error: program handler doesn't match object\n"); - return NULL; - } - - idx = (p - obj->programs) + (forward ? 1 : -1); - if (idx >= obj->nr_programs || idx < 0) - return NULL; - return &obj->programs[idx]; -} - -struct bpf_program * -bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) -{ - struct bpf_program *prog = prev; - - do { - prog = __bpf_program__iter(prog, obj, true); - } while (prog && bpf_program__is_function_storage(prog, obj)); - - return prog; -} - -struct bpf_program * -bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) -{ - struct bpf_program *prog = next; - - do { - prog = __bpf_program__iter(prog, obj, false); - } while (prog && bpf_program__is_function_storage(prog, obj)); - - return prog; -} - -int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv) -{ - if (prog->priv && prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = priv; - prog->clear_priv = clear_priv; - return 0; -} - -void *bpf_program__priv(const struct bpf_program *prog) -{ - return prog ? prog->priv : ERR_PTR(-EINVAL); -} - -void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) -{ - prog->prog_ifindex = ifindex; -} - -const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) -{ - const char *title; - - title = prog->section_name; - if (needs_copy) { - title = strdup(title); - if (!title) { - pr_warn("failed to strdup program title\n"); - return ERR_PTR(-ENOMEM); - } - } - - return title; -} - -int bpf_program__fd(const struct bpf_program *prog) -{ - return bpf_program__nth_fd(prog, 0); -} - -size_t bpf_program__size(const struct bpf_program *prog) -{ - return prog->insns_cnt * sizeof(struct bpf_insn); -} - -int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, - bpf_program_prep_t prep) -{ - int *instances_fds; - - if (nr_instances <= 0 || !prep) - return -EINVAL; - - if (prog->instances.nr > 0 || prog->instances.fds) { - pr_warn("Can't set pre-processor after loading\n"); - return -EINVAL; - } - - instances_fds = malloc(sizeof(int) * nr_instances); - if (!instances_fds) { - pr_warn("alloc memory failed for fds\n"); - return -ENOMEM; - } - - /* fill all fd with -1 */ - memset(instances_fds, -1, sizeof(int) * nr_instances); - - prog->instances.nr = nr_instances; - prog->instances.fds = instances_fds; - prog->preprocessor = prep; - return 0; -} - -int bpf_program__nth_fd(const struct bpf_program *prog, int n) -{ - int fd; - - if (!prog) - return -EINVAL; - - if (n >= prog->instances.nr || n < 0) { - pr_warn("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->section_name, prog->instances.nr); - return -EINVAL; - } - - fd = prog->instances.fds[n]; - if (fd < 0) { - pr_warn("%dth instance of program '%s' is invalid\n", - n, prog->section_name); - return -ENOENT; - } - - return fd; -} - -enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog) -{ - return prog->type; -} - -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) -{ - prog->type = type; -} - -static bool bpf_program__is_type(const struct bpf_program *prog, - enum bpf_prog_type type) -{ - return prog ? (prog->type == type) : false; -} - -#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ -int bpf_program__set_##NAME(struct bpf_program *prog) \ -{ \ - if (!prog) \ - return -EINVAL; \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ -} \ - \ -bool bpf_program__is_##NAME(const struct bpf_program *prog) \ -{ \ - return bpf_program__is_type(prog, TYPE); \ -} \ - -BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); -BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); -BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); -BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); -BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); -BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); -BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); -BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); - -enum bpf_attach_type -bpf_program__get_expected_attach_type(struct bpf_program *prog) -{ - return prog->expected_attach_type; -} - -void bpf_program__set_expected_attach_type(struct bpf_program *prog, - enum bpf_attach_type type) -{ - prog->expected_attach_type = type; -} - -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \ - { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype } - -/* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) - -/* Programs that can be attached. */ -#define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype) - -/* Programs that must specify expected attach type at load time. */ -#define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype) - -/* Programs that use BTF to identify attach point */ -#define BPF_PROG_BTF(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0) - -/* Programs that can be attached but attach type can't be identified by section - * name. Kept for backward compatibility. - */ -#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) - -static const struct { - const char *sec; - size_t len; - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - bool is_attachable; - bool is_attach_btf; - enum bpf_attach_type attach_type; -} section_names[] = { - BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), - BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_RAW_TP), - BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FENTRY), - BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FEXIT), - BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), - BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), - BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), - BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), - BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), - BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), - BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, - BPF_CGROUP_INET_INGRESS), - BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, - BPF_CGROUP_INET_EGRESS), - BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), - BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_CREATE), - BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET4_POST_BIND), - BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET6_POST_BIND), - BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE, - BPF_CGROUP_DEVICE), - BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS, - BPF_CGROUP_SOCK_OPS), - BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB, - BPF_SK_SKB_STREAM_PARSER), - BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, - BPF_SK_SKB_STREAM_VERDICT), - BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB), - BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, - BPF_SK_MSG_VERDICT), - BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, - BPF_LIRC_MODE2), - BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR, - BPF_FLOW_DISSECTOR), - BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_BIND), - BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_BIND), - BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_CONNECT), - BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_CONNECT), - BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP4_SENDMSG), - BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP6_SENDMSG), - BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP4_RECVMSG), - BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP6_RECVMSG), - BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, - BPF_CGROUP_SYSCTL), - BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_CGROUP_GETSOCKOPT), - BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_CGROUP_SETSOCKOPT), -}; - -#undef BPF_PROG_SEC_IMPL -#undef BPF_PROG_SEC -#undef BPF_APROG_SEC -#undef BPF_EAPROG_SEC -#undef BPF_APROG_COMPAT - -#define MAX_TYPE_NAME_SIZE 32 - -static char *libbpf_get_type_names(bool attach_type) -{ - int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; - char *buf; - - buf = malloc(len); - if (!buf) - return NULL; - - buf[0] = '\0'; - /* Forge string buf with all available names */ - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (attach_type && !section_names[i].is_attachable) - continue; - - if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { - free(buf); - return NULL; - } - strcat(buf, " "); - strcat(buf, section_names[i].sec); - } - - return buf; -} - -int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, - enum bpf_attach_type *expected_attach_type) -{ - char *type_names; - int i; - - if (!name) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - *prog_type = section_names[i].prog_type; - *expected_attach_type = section_names[i].expected_attach_type; - return 0; - } - pr_warn("failed to guess program type from ELF section '%s'\n", name); - type_names = libbpf_get_type_names(false); - if (type_names != NULL) { - pr_info("supported section(type) names are:%s\n", type_names); - free(type_names); - } - - return -ESRCH; -} - -#define BTF_PREFIX "btf_trace_" -int libbpf_find_vmlinux_btf_id(const char *name, - enum bpf_attach_type attach_type) -{ - struct btf *btf = bpf_core_find_kernel_btf(); - char raw_tp_btf[128] = BTF_PREFIX; - char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; - const char *btf_name; - int err = -EINVAL; - __u32 kind; - - if (IS_ERR(btf)) { - pr_warn("vmlinux BTF is not found\n"); - return -EINVAL; - } - - if (attach_type == BPF_TRACE_RAW_TP) { - /* prepend "btf_trace_" prefix per kernel convention */ - strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); - btf_name = raw_tp_btf; - kind = BTF_KIND_TYPEDEF; - } else { - btf_name = name; - kind = BTF_KIND_FUNC; - } - err = btf__find_by_name_kind(btf, btf_name, kind); - btf__free(btf); - return err; -} - -static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) -{ - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info *info; - struct btf *btf = NULL; - int err = -EINVAL; - - info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); - if (IS_ERR_OR_NULL(info_linear)) { - pr_warn("failed get_prog_info_linear for FD %d\n", - attach_prog_fd); - return -EINVAL; - } - info = &info_linear->info; - if (!info->btf_id) { - pr_warn("The target program doesn't have BTF\n"); - goto out; - } - if (btf__get_from_id(info->btf_id, &btf)) { - pr_warn("Failed to get BTF of the program\n"); - goto out; - } - err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); - btf__free(btf); - if (err <= 0) { - pr_warn("%s is not found in prog's BTF\n", name); - goto out; - } -out: - free(info_linear); - return err; -} - -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd) -{ - int i, err; - - if (!name) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (!section_names[i].is_attach_btf) - continue; - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - if (attach_prog_fd) - err = libbpf_find_prog_btf_id(name + section_names[i].len, - attach_prog_fd); - else - err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, - attach_type); - if (err <= 0) - pr_warn("%s is not found in vmlinux BTF\n", name); - return err; - } - pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); - return -ESRCH; -} - -int libbpf_attach_type_by_name(const char *name, - enum bpf_attach_type *attach_type) -{ - char *type_names; - int i; - - if (!name) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - if (!section_names[i].is_attachable) - return -EINVAL; - *attach_type = section_names[i].attach_type; - return 0; - } - pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); - type_names = libbpf_get_type_names(true); - if (type_names != NULL) { - pr_info("attachable section(type) names are:%s\n", type_names); - free(type_names); - } - - return -EINVAL; -} - -int bpf_map__fd(const struct bpf_map *map) -{ - return map ? map->fd : -EINVAL; -} - -const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) -{ - return map ? &map->def : ERR_PTR(-EINVAL); -} - -const char *bpf_map__name(const struct bpf_map *map) -{ - return map ? map->name : NULL; -} - -__u32 bpf_map__btf_key_type_id(const struct bpf_map *map) -{ - return map ? map->btf_key_type_id : 0; -} - -__u32 bpf_map__btf_value_type_id(const struct bpf_map *map) -{ - return map ? map->btf_value_type_id : 0; -} - -int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv) -{ - if (!map) - return -EINVAL; - - if (map->priv) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - } - - map->priv = priv; - map->clear_priv = clear_priv; - return 0; -} - -void *bpf_map__priv(const struct bpf_map *map) -{ - return map ? map->priv : ERR_PTR(-EINVAL); -} - -bool bpf_map__is_offload_neutral(const struct bpf_map *map) -{ - return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; -} - -bool bpf_map__is_internal(const struct bpf_map *map) -{ - return map->libbpf_type != LIBBPF_MAP_UNSPEC; -} - -void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) -{ - map->map_ifindex = ifindex; -} - -int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) -{ - if (!bpf_map_type__is_map_in_map(map->def.type)) { - pr_warn("error: unsupported map type\n"); - return -EINVAL; - } - if (map->inner_map_fd != -1) { - pr_warn("error: inner_map_fd already specified\n"); - return -EINVAL; - } - map->inner_map_fd = fd; - return 0; -} - -static struct bpf_map * -__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) -{ - ssize_t idx; - struct bpf_map *s, *e; - - if (!obj || !obj->maps) - return NULL; - - s = obj->maps; - e = obj->maps + obj->nr_maps; - - if ((m < s) || (m >= e)) { - pr_warn("error in %s: map handler doesn't belong to object\n", - __func__); - return NULL; - } - - idx = (m - obj->maps) + i; - if (idx >= obj->nr_maps || idx < 0) - return NULL; - return &obj->maps[idx]; -} - -struct bpf_map * -bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) -{ - if (prev == NULL) - return obj->maps; - - return __bpf_map__iter(prev, obj, 1); -} - -struct bpf_map * -bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) -{ - if (next == NULL) { - if (!obj->nr_maps) - return NULL; - return obj->maps + obj->nr_maps - 1; - } - - return __bpf_map__iter(next, obj, -1); -} - -struct bpf_map * -bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) -{ - struct bpf_map *pos; - - bpf_object__for_each_map(pos, obj) { - if (pos->name && !strcmp(pos->name, name)) - return pos; - } - return NULL; -} - -int -bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) -{ - return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); -} - -struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) -{ - return ERR_PTR(-ENOTSUP); -} - -long libbpf_get_error(const void *ptr) -{ - return PTR_ERR_OR_ZERO(ptr); -} - -int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_prog_load_attr attr; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; - - return bpf_prog_load_xattr(&attr, pobj, prog_fd); -} - -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_object_open_attr open_attr = {}; - struct bpf_program *prog, *first_prog = NULL; - struct bpf_object *obj; - struct bpf_map *map; - int err; - - if (!attr) - return -EINVAL; - if (!attr->file) - return -EINVAL; - - open_attr.file = attr->file; - open_attr.prog_type = attr->prog_type; - - obj = bpf_object__open_xattr(&open_attr); - if (IS_ERR_OR_NULL(obj)) - return -ENOENT; - - bpf_object__for_each_program(prog, obj) { - enum bpf_attach_type attach_type = attr->expected_attach_type; - /* - * to preserve backwards compatibility, bpf_prog_load treats - * attr->prog_type, if specified, as an override to whatever - * bpf_object__open guessed - */ - if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); - } - if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { - /* - * we haven't guessed from section name and user - * didn't provide a fallback type, too bad... - */ - bpf_object__close(obj); - return -EINVAL; - } - - prog->prog_ifindex = attr->ifindex; - prog->log_level = attr->log_level; - prog->prog_flags = attr->prog_flags; - if (!first_prog) - first_prog = prog; - } - - bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) - map->map_ifindex = attr->ifindex; - } - - if (!first_prog) { - pr_warn("object file doesn't contain bpf program\n"); - bpf_object__close(obj); - return -ENOENT; - } - - err = bpf_object__load(obj); - if (err) { - bpf_object__close(obj); - return -EINVAL; - } - - *pobj = obj; - *prog_fd = bpf_program__fd(first_prog); - return 0; -} - -struct bpf_link { - int (*destroy)(struct bpf_link *link); -}; - -int bpf_link__destroy(struct bpf_link *link) -{ - int err; - - if (!link) - return 0; - - err = link->destroy(link); - free(link); - - return err; -} - -struct bpf_link_fd { - struct bpf_link link; /* has to be at the top of struct */ - int fd; /* hook FD */ -}; - -static int bpf_link__destroy_perf_event(struct bpf_link *link) -{ - struct bpf_link_fd *l = (void *)link; - int err; - - err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); - if (err) - err = -errno; - - close(l->fd); - return err; -} - -struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, - int pfd) -{ - char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; - int prog_fd, err; - - if (pfd < 0) { - pr_warn("program '%s': invalid perf event FD %d\n", - bpf_program__title(prog, false), pfd); - return ERR_PTR(-EINVAL); - } - prog_fd = bpf_program__fd(prog); - if (prog_fd < 0) { - pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", - bpf_program__title(prog, false)); - return ERR_PTR(-EINVAL); - } - - link = malloc(sizeof(*link)); - if (!link) - return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_perf_event; - link->fd = pfd; - - if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { - err = -errno; - free(link); - pr_warn("program '%s': failed to attach to pfd %d: %s\n", - bpf_program__title(prog, false), pfd, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return ERR_PTR(err); - } - if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - free(link); - pr_warn("program '%s': failed to enable pfd %d: %s\n", - bpf_program__title(prog, false), pfd, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return ERR_PTR(err); - } - return (struct bpf_link *)link; -} - -/* - * this function is expected to parse integer in the range of [0, 2^31-1] from - * given file using scanf format string fmt. If actual parsed value is - * negative, the result might be indistinguishable from error - */ -static int parse_uint_from_file(const char *file, const char *fmt) -{ - char buf[STRERR_BUFSIZE]; - int err, ret; - FILE *f; - - f = fopen(file, "r"); - if (!f) { - err = -errno; - pr_debug("failed to open '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); - return err; - } - err = fscanf(f, fmt, &ret); - if (err != 1) { - err = err == EOF ? -EIO : -errno; - pr_debug("failed to parse '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); - fclose(f); - return err; - } - fclose(f); - return ret; -} - -static int determine_kprobe_perf_type(void) -{ - const char *file = "/sys/bus/event_source/devices/kprobe/type"; - - return parse_uint_from_file(file, "%d\n"); -} - -static int determine_uprobe_perf_type(void) -{ - const char *file = "/sys/bus/event_source/devices/uprobe/type"; - - return parse_uint_from_file(file, "%d\n"); -} - -static int determine_kprobe_retprobe_bit(void) -{ - const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; - - return parse_uint_from_file(file, "config:%d\n"); -} - -static int determine_uprobe_retprobe_bit(void) -{ - const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; - - return parse_uint_from_file(file, "config:%d\n"); -} - -static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, - uint64_t offset, int pid) -{ - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; - int type, pfd, err; - - type = uprobe ? determine_uprobe_perf_type() - : determine_kprobe_perf_type(); - if (type < 0) { - pr_warn("failed to determine %s perf type: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); - return type; - } - if (retprobe) { - int bit = uprobe ? determine_uprobe_retprobe_bit() - : determine_kprobe_retprobe_bit(); - - if (bit < 0) { - pr_warn("failed to determine %s retprobe bit: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); - return bit; - } - attr.config |= 1 << bit; - } - attr.size = sizeof(attr); - attr.type = type; - attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ - attr.config2 = offset; /* kprobe_addr or probe_offset */ - - /* pid filter is meaningful only for uprobes */ - pfd = syscall(__NR_perf_event_open, &attr, - pid < 0 ? -1 : pid /* pid */, - pid == -1 ? 0 : -1 /* cpu */, - -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; -} - -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, - bool retprobe, - const char *func_name) -{ - char errmsg[STRERR_BUFSIZE]; - struct bpf_link *link; - int pfd, err; - - pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, - 0 /* offset */, -1 /* pid */); - if (pfd < 0) { - pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); - } - link = bpf_program__attach_perf_event(prog, pfd); - if (IS_ERR(link)) { - close(pfd); - err = PTR_ERR(link); - pr_warn("program '%s': failed to attach to %s '%s': %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return link; - } - return link; -} - -struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, - bool retprobe, pid_t pid, - const char *binary_path, - size_t func_offset) -{ - char errmsg[STRERR_BUFSIZE]; - struct bpf_link *link; - int pfd, err; - - pfd = perf_event_open_probe(true /* uprobe */, retprobe, - binary_path, func_offset, pid); - if (pfd < 0) { - pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", - binary_path, func_offset, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); - } - link = bpf_program__attach_perf_event(prog, pfd); - if (IS_ERR(link)) { - close(pfd); - err = PTR_ERR(link); - pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", - binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return link; - } - return link; -} - -static int determine_tracepoint_id(const char *tp_category, - const char *tp_name) -{ - char file[PATH_MAX]; - int ret; - - ret = snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - tp_category, tp_name); - if (ret < 0) - return -errno; - if (ret >= sizeof(file)) { - pr_debug("tracepoint %s/%s path is too long\n", - tp_category, tp_name); - return -E2BIG; - } - return parse_uint_from_file(file, "%d\n"); -} - -static int perf_event_open_tracepoint(const char *tp_category, - const char *tp_name) -{ - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; - int tp_id, pfd, err; - - tp_id = determine_tracepoint_id(tp_category, tp_name); - if (tp_id < 0) { - pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", - tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); - return tp_id; - } - - attr.type = PERF_TYPE_TRACEPOINT; - attr.size = sizeof(attr); - attr.config = tp_id; - - pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, - -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", - tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; -} - -struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, - const char *tp_category, - const char *tp_name) -{ - char errmsg[STRERR_BUFSIZE]; - struct bpf_link *link; - int pfd, err; - - pfd = perf_event_open_tracepoint(tp_category, tp_name); - if (pfd < 0) { - pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); - } - link = bpf_program__attach_perf_event(prog, pfd); - if (IS_ERR(link)) { - close(pfd); - err = PTR_ERR(link); - pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return link; - } - return link; -} - -static int bpf_link__destroy_fd(struct bpf_link *link) -{ - struct bpf_link_fd *l = (void *)link; - - return close(l->fd); -} - -struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, - const char *tp_name) -{ - char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; - int prog_fd, pfd; - - prog_fd = bpf_program__fd(prog); - if (prog_fd < 0) { - pr_warn("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); - return ERR_PTR(-EINVAL); - } - - link = malloc(sizeof(*link)); - if (!link) - return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; - - pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); - if (pfd < 0) { - pfd = -errno; - free(link); - pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", - bpf_program__title(prog, false), tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); - } - link->fd = pfd; - return (struct bpf_link *)link; -} - -struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) -{ - char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; - int prog_fd, pfd; - - prog_fd = bpf_program__fd(prog); - if (prog_fd < 0) { - pr_warn("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); - return ERR_PTR(-EINVAL); - } - - link = malloc(sizeof(*link)); - if (!link) - return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; - - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); - if (pfd < 0) { - pfd = -errno; - free(link); - pr_warn("program '%s': failed to attach to trace: %s\n", - bpf_program__title(prog, false), - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); - } - link->fd = pfd; - return (struct bpf_link *)link; -} - -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data) -{ - struct perf_event_mmap_page *header = mmap_mem; - __u64 data_head = ring_buffer_read_head(header); - __u64 data_tail = header->data_tail; - void *base = ((__u8 *)header) + page_size; - int ret = LIBBPF_PERF_EVENT_CONT; - struct perf_event_header *ehdr; - size_t ehdr_size; - - while (data_head != data_tail) { - ehdr = base + (data_tail & (mmap_size - 1)); - ehdr_size = ehdr->size; - - if (((void *)ehdr) + ehdr_size > base + mmap_size) { - void *copy_start = ehdr; - size_t len_first = base + mmap_size - copy_start; - size_t len_secnd = ehdr_size - len_first; - - if (*copy_size < ehdr_size) { - free(*copy_mem); - *copy_mem = malloc(ehdr_size); - if (!*copy_mem) { - *copy_size = 0; - ret = LIBBPF_PERF_EVENT_ERROR; - break; - } - *copy_size = ehdr_size; - } - - memcpy(*copy_mem, copy_start, len_first); - memcpy(*copy_mem + len_first, base, len_secnd); - ehdr = *copy_mem; - } - - ret = fn(ehdr, private_data); - data_tail += ehdr_size; - if (ret != LIBBPF_PERF_EVENT_CONT) - break; - } - - ring_buffer_write_tail(header, data_tail); - return ret; -} - -struct perf_buffer; - -struct perf_buffer_params { - struct perf_event_attr *attr; - /* if event_cb is specified, it takes precendence */ - perf_buffer_event_fn event_cb; - /* sample_cb and lost_cb are higher-level common-case callbacks */ - perf_buffer_sample_fn sample_cb; - perf_buffer_lost_fn lost_cb; - void *ctx; - int cpu_cnt; - int *cpus; - int *map_keys; -}; - -struct perf_cpu_buf { - struct perf_buffer *pb; - void *base; /* mmap()'ed memory */ - void *buf; /* for reconstructing segmented data */ - size_t buf_size; - int fd; - int cpu; - int map_key; -}; - -struct perf_buffer { - perf_buffer_event_fn event_cb; - perf_buffer_sample_fn sample_cb; - perf_buffer_lost_fn lost_cb; - void *ctx; /* passed into callbacks */ - - size_t page_size; - size_t mmap_size; - struct perf_cpu_buf **cpu_bufs; - struct epoll_event *events; - int cpu_cnt; - int epoll_fd; /* perf event FD */ - int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ -}; - -static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, - struct perf_cpu_buf *cpu_buf) -{ - if (!cpu_buf) - return; - if (cpu_buf->base && - munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) - pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); - if (cpu_buf->fd >= 0) { - ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); - close(cpu_buf->fd); - } - free(cpu_buf->buf); - free(cpu_buf); -} - -void perf_buffer__free(struct perf_buffer *pb) -{ - int i; - - if (!pb) - return; - if (pb->cpu_bufs) { - for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) { - struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; - - bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); - perf_buffer__free_cpu_buf(pb, cpu_buf); - } - free(pb->cpu_bufs); - } - if (pb->epoll_fd >= 0) - close(pb->epoll_fd); - free(pb->events); - free(pb); -} - -static struct perf_cpu_buf * -perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, - int cpu, int map_key) -{ - struct perf_cpu_buf *cpu_buf; - char msg[STRERR_BUFSIZE]; - int err; - - cpu_buf = calloc(1, sizeof(*cpu_buf)); - if (!cpu_buf) - return ERR_PTR(-ENOMEM); - - cpu_buf->pb = pb; - cpu_buf->cpu = cpu; - cpu_buf->map_key = map_key; - - cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, - -1, PERF_FLAG_FD_CLOEXEC); - if (cpu_buf->fd < 0) { - err = -errno; - pr_warn("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); - goto error; - } - - cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, - PROT_READ | PROT_WRITE, MAP_SHARED, - cpu_buf->fd, 0); - if (cpu_buf->base == MAP_FAILED) { - cpu_buf->base = NULL; - err = -errno; - pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); - goto error; - } - - if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); - goto error; - } - - return cpu_buf; - -error: - perf_buffer__free_cpu_buf(pb, cpu_buf); - return (struct perf_cpu_buf *)ERR_PTR(err); -} - -static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, - struct perf_buffer_params *p); - -struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) -{ - struct perf_buffer_params p = {}; - struct perf_event_attr attr = { 0, }; - - attr.config = PERF_COUNT_SW_BPF_OUTPUT, - attr.type = PERF_TYPE_SOFTWARE; - attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = 1; - attr.wakeup_events = 1; - - p.attr = &attr; - p.sample_cb = opts ? opts->sample_cb : NULL; - p.lost_cb = opts ? opts->lost_cb : NULL; - p.ctx = opts ? opts->ctx : NULL; - - return __perf_buffer__new(map_fd, page_cnt, &p); -} - -struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) -{ - struct perf_buffer_params p = {}; - - p.attr = opts->attr; - p.event_cb = opts->event_cb; - p.ctx = opts->ctx; - p.cpu_cnt = opts->cpu_cnt; - p.cpus = opts->cpus; - p.map_keys = opts->map_keys; - - return __perf_buffer__new(map_fd, page_cnt, &p); -} - -static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, - struct perf_buffer_params *p) -{ - struct bpf_map_info map = {}; - char msg[STRERR_BUFSIZE]; - struct perf_buffer *pb; - __u32 map_info_len; - int err, i; - - if (page_cnt & (page_cnt - 1)) { - pr_warn("page count should be power of two, but is %zu\n", - page_cnt); - return ERR_PTR(-EINVAL); - } - - map_info_len = sizeof(map); - err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); - if (err) { - err = -errno; - pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); - return ERR_PTR(err); - } - - if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { - pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", - map.name); - return ERR_PTR(-EINVAL); - } - - pb = calloc(1, sizeof(*pb)); - if (!pb) - return ERR_PTR(-ENOMEM); - - pb->event_cb = p->event_cb; - pb->sample_cb = p->sample_cb; - pb->lost_cb = p->lost_cb; - pb->ctx = p->ctx; - - pb->page_size = getpagesize(); - pb->mmap_size = pb->page_size * page_cnt; - pb->map_fd = map_fd; - - pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (pb->epoll_fd < 0) { - err = -errno; - pr_warn("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); - goto error; - } - - if (p->cpu_cnt > 0) { - pb->cpu_cnt = p->cpu_cnt; - } else { - pb->cpu_cnt = libbpf_num_possible_cpus(); - if (pb->cpu_cnt < 0) { - err = pb->cpu_cnt; - goto error; - } - if (map.max_entries < pb->cpu_cnt) - pb->cpu_cnt = map.max_entries; - } - - pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); - if (!pb->events) { - err = -ENOMEM; - pr_warn("failed to allocate events: out of memory\n"); - goto error; - } - pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); - if (!pb->cpu_bufs) { - err = -ENOMEM; - pr_warn("failed to allocate buffers: out of memory\n"); - goto error; - } - - for (i = 0; i < pb->cpu_cnt; i++) { - struct perf_cpu_buf *cpu_buf; - int cpu, map_key; - - cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; - map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; - - cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); - if (IS_ERR(cpu_buf)) { - err = PTR_ERR(cpu_buf); - goto error; - } - - pb->cpu_bufs[i] = cpu_buf; - - err = bpf_map_update_elem(pb->map_fd, &map_key, - &cpu_buf->fd, 0); - if (err) { - err = -errno; - pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", - cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); - goto error; - } - - pb->events[i].events = EPOLLIN; - pb->events[i].data.ptr = cpu_buf; - if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, - &pb->events[i]) < 0) { - err = -errno; - pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", - cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); - goto error; - } - } - - return pb; - -error: - if (pb) - perf_buffer__free(pb); - return ERR_PTR(err); -} - -struct perf_sample_raw { - struct perf_event_header header; - uint32_t size; - char data[0]; -}; - -struct perf_sample_lost { - struct perf_event_header header; - uint64_t id; - uint64_t lost; - uint64_t sample_id; -}; - -static enum bpf_perf_event_ret -perf_buffer__process_record(struct perf_event_header *e, void *ctx) -{ - struct perf_cpu_buf *cpu_buf = ctx; - struct perf_buffer *pb = cpu_buf->pb; - void *data = e; - - /* user wants full control over parsing perf event */ - if (pb->event_cb) - return pb->event_cb(pb->ctx, cpu_buf->cpu, e); - - switch (e->type) { - case PERF_RECORD_SAMPLE: { - struct perf_sample_raw *s = data; - - if (pb->sample_cb) - pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); - break; - } - case PERF_RECORD_LOST: { - struct perf_sample_lost *s = data; - - if (pb->lost_cb) - pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); - break; - } - default: - pr_warn("unknown perf sample type %d\n", e->type); - return LIBBPF_PERF_EVENT_ERROR; - } - return LIBBPF_PERF_EVENT_CONT; -} - -static int perf_buffer__process_records(struct perf_buffer *pb, - struct perf_cpu_buf *cpu_buf) -{ - enum bpf_perf_event_ret ret; - - ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, - pb->page_size, &cpu_buf->buf, - &cpu_buf->buf_size, - perf_buffer__process_record, cpu_buf); - if (ret != LIBBPF_PERF_EVENT_CONT) - return ret; - return 0; -} - -int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) -{ - int i, cnt, err; - - cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); - for (i = 0; i < cnt; i++) { - struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; - - err = perf_buffer__process_records(pb, cpu_buf); - if (err) { - pr_warn("error while processing records: %d\n", err); - return err; - } - } - return cnt < 0 ? -errno : cnt; -} - -struct bpf_prog_info_array_desc { - int array_offset; /* e.g. offset of jited_prog_insns */ - int count_offset; /* e.g. offset of jited_prog_len */ - int size_offset; /* > 0: offset of rec size, - * < 0: fix size of -size_offset - */ -}; - -static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { - [BPF_PROG_INFO_JITED_INSNS] = { - offsetof(struct bpf_prog_info, jited_prog_insns), - offsetof(struct bpf_prog_info, jited_prog_len), - -1, - }, - [BPF_PROG_INFO_XLATED_INSNS] = { - offsetof(struct bpf_prog_info, xlated_prog_insns), - offsetof(struct bpf_prog_info, xlated_prog_len), - -1, - }, - [BPF_PROG_INFO_MAP_IDS] = { - offsetof(struct bpf_prog_info, map_ids), - offsetof(struct bpf_prog_info, nr_map_ids), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_JITED_KSYMS] = { - offsetof(struct bpf_prog_info, jited_ksyms), - offsetof(struct bpf_prog_info, nr_jited_ksyms), - -(int)sizeof(__u64), - }, - [BPF_PROG_INFO_JITED_FUNC_LENS] = { - offsetof(struct bpf_prog_info, jited_func_lens), - offsetof(struct bpf_prog_info, nr_jited_func_lens), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_FUNC_INFO] = { - offsetof(struct bpf_prog_info, func_info), - offsetof(struct bpf_prog_info, nr_func_info), - offsetof(struct bpf_prog_info, func_info_rec_size), - }, - [BPF_PROG_INFO_LINE_INFO] = { - offsetof(struct bpf_prog_info, line_info), - offsetof(struct bpf_prog_info, nr_line_info), - offsetof(struct bpf_prog_info, line_info_rec_size), - }, - [BPF_PROG_INFO_JITED_LINE_INFO] = { - offsetof(struct bpf_prog_info, jited_line_info), - offsetof(struct bpf_prog_info, nr_jited_line_info), - offsetof(struct bpf_prog_info, jited_line_info_rec_size), - }, - [BPF_PROG_INFO_PROG_TAGS] = { - offsetof(struct bpf_prog_info, prog_tags), - offsetof(struct bpf_prog_info, nr_prog_tags), - -(int)sizeof(__u8) * BPF_TAG_SIZE, - }, - -}; - -static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, - int offset) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u32)]; - return -(int)offset; -} - -static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, - int offset) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u64)]; - return -(int)offset; -} - -static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, - __u32 val) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - array[offset / sizeof(__u32)] = val; -} - -static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, - __u64 val) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - array[offset / sizeof(__u64)] = val; -} - -struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays) -{ - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 data_len = 0; - int i, err; - void *ptr; - - if (arrays >> BPF_PROG_INFO_LAST_ARRAY) - return ERR_PTR(-EINVAL); - - /* step 1: get array dimensions */ - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - return ERR_PTR(-EFAULT); - } - - /* step 2: calculate total size of all arrays */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - bool include_array = (arrays & (1UL << i)) > 0; - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - desc = bpf_prog_info_array_desc + i; - - /* kernel is too old to support this field */ - if (info_len < desc->array_offset + sizeof(__u32) || - info_len < desc->count_offset + sizeof(__u32) || - (desc->size_offset > 0 && info_len < desc->size_offset)) - include_array = false; - - if (!include_array) { - arrays &= ~(1UL << i); /* clear the bit */ - continue; - } - - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - - data_len += count * size; - } - - /* step 3: allocate continuous memory */ - data_len = roundup(data_len, sizeof(__u64)); - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); - if (!info_linear) - return ERR_PTR(-ENOMEM); - - /* step 4: fill data to info_linear->info */ - info_linear->arrays = arrays; - memset(&info_linear->info, 0, sizeof(info)); - ptr = info_linear->data; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->count_offset, count); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->size_offset, size); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, - ptr_to_u64(ptr)); - ptr += count * size; - } - - /* step 5: call syscall again to get required arrays */ - err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - free(info_linear); - return ERR_PTR(-EFAULT); - } - - /* step 6: verify the data */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 v1, v2; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->count_offset); - if (v1 != v2) - pr_warn("%s: mismatch in element count\n", __func__); - - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->size_offset); - if (v1 != v2) - pr_warn("%s: mismatch in rec size\n", __func__); - } - - /* step 7: update info_len and data_len */ - info_linear->info_len = sizeof(struct bpf_prog_info); - info_linear->data_len = data_len; - - return info_linear; -} - -void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - addr = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - offs = addr - ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, offs); - } -} - -void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - offs = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - addr = offs + ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, addr); - } -} - -int libbpf_num_possible_cpus(void) -{ - static const char *fcpu = "/sys/devices/system/cpu/possible"; - int len = 0, n = 0, il = 0, ir = 0; - unsigned int start = 0, end = 0; - int tmp_cpus = 0; - static int cpus; - char buf[128]; - int error = 0; - int fd = -1; - - tmp_cpus = READ_ONCE(cpus); - if (tmp_cpus > 0) - return tmp_cpus; - - fd = open(fcpu, O_RDONLY); - if (fd < 0) { - error = errno; - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); - return -error; - } - len = read(fd, buf, sizeof(buf)); - close(fd); - if (len <= 0) { - error = len ? errno : EINVAL; - pr_warn("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); - return -error; - } - if (len == sizeof(buf)) { - pr_warn("File %s size overflow\n", fcpu); - return -EOVERFLOW; - } - buf[len] = '\0'; - - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ - if (buf[ir] == ',' || buf[ir] == '\0') { - buf[ir] = '\0'; - n = sscanf(&buf[il], "%u-%u", &start, &end); - if (n <= 0) { - pr_warn("Failed to get # CPUs from %s\n", - &buf[il]); - return -EINVAL; - } else if (n == 1) { - end = start; - } - tmp_cpus += end - start + 1; - il = ir + 1; - } - } - if (tmp_cpus <= 0) { - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); - return -EINVAL; - } - - WRITE_ONCE(cpus, tmp_cpus); - return tmp_cpus; -} diff --git a/src/contrib/libbpf/bpf/libbpf.h b/src/contrib/libbpf/bpf/libbpf.h deleted file mode 100644 index 0dbf4bfba..000000000 --- a/src/contrib/libbpf/bpf/libbpf.h +++ /dev/null @@ -1,637 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * Common eBPF ELF object loading operations. - * - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - */ -#ifndef __LIBBPF_LIBBPF_H -#define __LIBBPF_LIBBPF_H - -#include <stdarg.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <sys/types.h> // for size_t -#include <linux/bpf.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - -enum libbpf_errno { - __LIBBPF_ERRNO__START = 4000, - - /* Something wrong in libelf */ - LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START, - LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */ - LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */ - LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */ - LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */ - LIBBPF_ERRNO__RELOC, /* Relocation failed */ - LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */ - LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */ - LIBBPF_ERRNO__PROG2BIG, /* Program too big */ - LIBBPF_ERRNO__KVER, /* Incorrect kernel version */ - LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ - LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ - LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ - LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */ - __LIBBPF_ERRNO__END, -}; - -LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); - -enum libbpf_print_level { - LIBBPF_WARN, - LIBBPF_INFO, - LIBBPF_DEBUG, -}; - -typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, - const char *, va_list ap); - -LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); - -/* Hide internal to user */ -struct bpf_object; - -struct bpf_object_open_attr { - const char *file; - enum bpf_prog_type prog_type; -}; - -/* Helper macro to declare and initialize libbpf options struct - * - * This dance with uninitialized declaration, followed by memset to zero, - * followed by assignment using compound literal syntax is done to preserve - * ability to use a nice struct field initialization syntax and **hopefully** - * have all the padding bytes initialized to zero. It's not guaranteed though, - * when copying literal, that compiler won't copy garbage in literal's padding - * bytes, but that's the best way I've found and it seems to work in practice. - * - * Macro declares opts struct of given type and name, zero-initializes, - * including any extra padding, it with memset() and then assigns initial - * values provided by users in struct initializer-syntax as varargs. - */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ - struct TYPE NAME = ({ \ - memset(&NAME, 0, sizeof(struct TYPE)); \ - (struct TYPE) { \ - .sz = sizeof(struct TYPE), \ - __VA_ARGS__ \ - }; \ - }) - -struct bpf_object_open_opts { - /* size of this struct, for forward/backward compatiblity */ - size_t sz; - /* object name override, if provided: - * - for object open from file, this will override setting object - * name from file path's base name; - * - for object open from memory buffer, this will specify an object - * name and will override default "<addr>-<buf-size>" name; - */ - const char *object_name; - /* parse map definitions non-strictly, allowing extra attributes/data */ - bool relaxed_maps; - /* process CO-RE relocations non-strictly, allowing them to fail */ - bool relaxed_core_relocs; - /* maps that set the 'pinning' attribute in their definition will have - * their pin_path attribute set to a file in this directory, and be - * auto-pinned to that path on load; defaults to "/sys/fs/bpf". - */ - const char *pin_root_path; - __u32 attach_prog_fd; -}; -#define bpf_object_open_opts__last_field attach_prog_fd - -LIBBPF_API struct bpf_object *bpf_object__open(const char *path); -LIBBPF_API struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); -LIBBPF_API struct bpf_object * -bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts); - -/* deprecated bpf_object__open variants */ -LIBBPF_API struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name); -LIBBPF_API struct bpf_object * -bpf_object__open_xattr(struct bpf_object_open_attr *attr); - -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); - -enum libbpf_pin_type { - LIBBPF_PIN_NONE, - /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ - LIBBPF_PIN_BY_NAME, -}; - -/* pin_maps and unpin_maps can both be called with a NULL path, in which case - * they will use the pin_path attribute of each map (and ignore all maps that - * don't have a pin_path set). - */ -LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); -LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, - const char *path); -LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, - const char *path); -LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, - const char *path); -LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); -LIBBPF_API void bpf_object__close(struct bpf_object *object); - -struct bpf_object_load_attr { - struct bpf_object *obj; - int log_level; - const char *target_btf_path; -}; - -/* Load/unload object into/from kernel */ -LIBBPF_API int bpf_object__load(struct bpf_object *obj); -LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); -LIBBPF_API int bpf_object__unload(struct bpf_object *obj); -LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); -LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); - -struct btf; -LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); -LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); - -LIBBPF_API struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title); - -LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); -#define bpf_object__for_each_safe(pos, tmp) \ - for ((pos) = bpf_object__next(NULL), \ - (tmp) = bpf_object__next(pos); \ - (pos) != NULL; \ - (pos) = (tmp), (tmp) = bpf_object__next(tmp)) - -typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); -LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv); -LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog); - -LIBBPF_API int -libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, - enum bpf_attach_type *expected_attach_type); -LIBBPF_API int libbpf_attach_type_by_name(const char *name, - enum bpf_attach_type *attach_type); -LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, - enum bpf_attach_type attach_type); - -/* Accessors of bpf_program */ -struct bpf_program; -LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); - -#define bpf_object__for_each_program(pos, obj) \ - for ((pos) = bpf_program__next(NULL, (obj)); \ - (pos) != NULL; \ - (pos) = bpf_program__next((pos), (obj))) - -LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); - -typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); - -LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv); - -LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); -LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, - __u32 ifindex); - -LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, - bool needs_copy); - -/* returns program size in bytes */ -LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); - -LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, - __u32 kern_version); -LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); -LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, - const char *path, - int instance); -LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, - const char *path, - int instance); -LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); -LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); -LIBBPF_API void bpf_program__unload(struct bpf_program *prog); - -struct bpf_link; - -LIBBPF_API int bpf_link__destroy(struct bpf_link *link); - -LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); -LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, - const char *func_name); -LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, - pid_t pid, const char *binary_path, - size_t func_offset); -LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint(struct bpf_program *prog, - const char *tp_category, - const char *tp_name); -LIBBPF_API struct bpf_link * -bpf_program__attach_raw_tracepoint(struct bpf_program *prog, - const char *tp_name); - -LIBBPF_API struct bpf_link * -bpf_program__attach_trace(struct bpf_program *prog); -struct bpf_insn; - -/* - * Libbpf allows callers to adjust BPF programs before being loaded - * into kernel. One program in an object file can be transformed into - * multiple variants to be attached to different hooks. - * - * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd - * form an API for this purpose. - * - * - bpf_program_prep_t: - * Defines a 'preprocessor', which is a caller defined function - * passed to libbpf through bpf_program__set_prep(), and will be - * called before program is loaded. The processor should adjust - * the program one time for each instance according to the instance id - * passed to it. - * - * - bpf_program__set_prep: - * Attaches a preprocessor to a BPF program. The number of instances - * that should be created is also passed through this function. - * - * - bpf_program__nth_fd: - * After the program is loaded, get resulting FD of a given instance - * of the BPF program. - * - * If bpf_program__set_prep() is not used, the program would be loaded - * without adjustment during bpf_object__load(). The program has only - * one instance. In this case bpf_program__fd(prog) is equal to - * bpf_program__nth_fd(prog, 0). - */ - -struct bpf_prog_prep_result { - /* - * If not NULL, load new instruction array. - * If set to NULL, don't load this instance. - */ - struct bpf_insn *new_insn_ptr; - int new_insn_cnt; - - /* If not NULL, result FD is written to it. */ - int *pfd; -}; - -/* - * Parameters of bpf_program_prep_t: - * - prog: The bpf_program being loaded. - * - n: Index of instance being generated. - * - insns: BPF instructions array. - * - insns_cnt:Number of instructions in insns. - * - res: Output parameter, result of transformation. - * - * Return value: - * - Zero: pre-processing success. - * - Non-zero: pre-processing error, stop loading. - */ -typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, - struct bpf_insn *insns, int insns_cnt, - struct bpf_prog_prep_result *res); - -LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, - bpf_program_prep_t prep); - -LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -/* - * Adjust type of BPF program. Default is kprobe. - */ -LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); - -LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); -LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type); - -LIBBPF_API enum bpf_attach_type -bpf_program__get_expected_attach_type(struct bpf_program *prog); -LIBBPF_API void -bpf_program__set_expected_attach_type(struct bpf_program *prog, - enum bpf_attach_type type); - -LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); - -/* - * No need for __attribute__((packed)), all members of 'bpf_map_def' - * are all aligned. In addition, using __attribute__((packed)) - * would trigger a -Wpacked warning message, and lead to an error - * if -Werror is set. - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; - -/* - * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, - * so no need to worry about a name clash. - */ -struct bpf_map; -LIBBPF_API struct bpf_map * -bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); - -LIBBPF_API int -bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); - -/* - * Get bpf_map through the offset of corresponding struct bpf_map_def - * in the BPF object file. - */ -LIBBPF_API struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); - -LIBBPF_API struct bpf_map * -bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); -#define bpf_object__for_each_map(pos, obj) \ - for ((pos) = bpf_map__next(NULL, (obj)); \ - (pos) != NULL; \ - (pos) = bpf_map__next((pos), (obj))) -#define bpf_map__for_each bpf_object__for_each_map - -LIBBPF_API struct bpf_map * -bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); - -LIBBPF_API int bpf_map__fd(const struct bpf_map *map); -LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); -LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); -LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); -LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); - -typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); -LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv); -LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); -LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); -LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); -LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); -LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); -LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); -LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); -LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); -LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); -LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); -LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); - -LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); - -LIBBPF_API long libbpf_get_error(const void *ptr); - -struct bpf_prog_load_attr { - const char *file; - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - int ifindex; - int log_level; - int prog_flags; -}; - -LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); -LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); - -struct xdp_link_info { - __u32 prog_id; - __u32 drv_prog_id; - __u32 hw_prog_id; - __u32 skb_prog_id; - __u8 attach_mode; -}; - -LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); -LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); -LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags); - -struct perf_buffer; - -typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu, - void *data, __u32 size); -typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); - -/* common use perf buffer options */ -struct perf_buffer_opts { - /* if specified, sample_cb is called for each sample */ - perf_buffer_sample_fn sample_cb; - /* if specified, lost_cb is called for each batch of lost samples */ - perf_buffer_lost_fn lost_cb; - /* ctx is provided to sample_cb and lost_cb */ - void *ctx; -}; - -LIBBPF_API struct perf_buffer * -perf_buffer__new(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts); - -enum bpf_perf_event_ret { - LIBBPF_PERF_EVENT_DONE = 0, - LIBBPF_PERF_EVENT_ERROR = -1, - LIBBPF_PERF_EVENT_CONT = -2, -}; - -struct perf_event_header; - -typedef enum bpf_perf_event_ret -(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event); - -/* raw perf buffer options, giving most power and control */ -struct perf_buffer_raw_opts { - /* perf event attrs passed directly into perf_event_open() */ - struct perf_event_attr *attr; - /* raw event callback */ - perf_buffer_event_fn event_cb; - /* ctx is provided to event_cb */ - void *ctx; - /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of - * max_entries of given PERF_EVENT_ARRAY map) - */ - int cpu_cnt; - /* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */ - int *cpus; - /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ - int *map_keys; -}; - -LIBBPF_API struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts); - -LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); -LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); - -typedef enum bpf_perf_event_ret - (*bpf_perf_event_print_t)(struct perf_event_header *hdr, - void *private_data); -LIBBPF_API enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); - -struct nlattr; -typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); -int libbpf_netlink_open(unsigned int *nl_pid); -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); - -struct bpf_prog_linfo; -struct bpf_prog_info; - -LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); -LIBBPF_API struct bpf_prog_linfo * -bpf_prog_linfo__new(const struct bpf_prog_info *info); -LIBBPF_API const struct bpf_line_info * -bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, - __u64 addr, __u32 func_idx, __u32 nr_skip); -LIBBPF_API const struct bpf_line_info * -bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, - __u32 insn_off, __u32 nr_skip); - -/* - * Probe for supported system features - * - * Note that running many of these probes in a short amount of time can cause - * the kernel to reach the maximal size of lockable memory allowed for the - * user, causing subsequent probes to fail. In this case, the caller may want - * to adjust that limit with setrlimit(). - */ -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, - __u32 ifindex); -LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, - enum bpf_prog_type prog_type, __u32 ifindex); - -/* - * Get bpf_prog_info in continuous memory - * - * struct bpf_prog_info has multiple arrays. The user has option to choose - * arrays to fetch from kernel. The following APIs provide an uniform way to - * fetch these data. All arrays in bpf_prog_info are stored in a single - * continuous memory region. This makes it easy to store the info in a - * file. - * - * Before writing bpf_prog_info_linear to files, it is necessary to - * translate pointers in bpf_prog_info to offsets. Helper functions - * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() - * are introduced to switch between pointers and offsets. - * - * Examples: - * # To fetch map_ids and prog_tags: - * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | - * (1UL << BPF_PROG_INFO_PROG_TAGS); - * struct bpf_prog_info_linear *info_linear = - * bpf_program__get_prog_info_linear(fd, arrays); - * - * # To save data in file - * bpf_program__bpil_addr_to_offs(info_linear); - * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); - * - * # To read data from file - * read(f, info_linear, <proper_size>); - * bpf_program__bpil_offs_to_addr(info_linear); - */ -enum bpf_prog_info_array { - BPF_PROG_INFO_FIRST_ARRAY = 0, - BPF_PROG_INFO_JITED_INSNS = 0, - BPF_PROG_INFO_XLATED_INSNS, - BPF_PROG_INFO_MAP_IDS, - BPF_PROG_INFO_JITED_KSYMS, - BPF_PROG_INFO_JITED_FUNC_LENS, - BPF_PROG_INFO_FUNC_INFO, - BPF_PROG_INFO_LINE_INFO, - BPF_PROG_INFO_JITED_LINE_INFO, - BPF_PROG_INFO_PROG_TAGS, - BPF_PROG_INFO_LAST_ARRAY, -}; - -struct bpf_prog_info_linear { - /* size of struct bpf_prog_info, when the tool is compiled */ - __u32 info_len; - /* total bytes allocated for data, round up to 8 bytes */ - __u32 data_len; - /* which arrays are included in data */ - __u64 arrays; - struct bpf_prog_info info; - __u8 data[]; -}; - -LIBBPF_API struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays); - -LIBBPF_API void -bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); - -LIBBPF_API void -bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); - -/* - * A helper function to get the number of possible CPUs before looking up - * per-CPU maps. Negative errno is returned on failure. - * - * Example usage: - * - * int ncpus = libbpf_num_possible_cpus(); - * if (ncpus < 0) { - * // error handling - * } - * long values[ncpus]; - * bpf_map_lookup_elem(per_cpu_map_fd, key, values); - * - */ -LIBBPF_API int libbpf_num_possible_cpus(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __LIBBPF_LIBBPF_H */ diff --git a/src/contrib/libbpf/bpf/libbpf_errno.c b/src/contrib/libbpf/bpf/libbpf_errno.c deleted file mode 100644 index 4343e4058..000000000 --- a/src/contrib/libbpf/bpf/libbpf_errno.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - * Copyright (C) 2017 Nicira, Inc. - */ - -#undef _GNU_SOURCE -#include <stdio.h> -#include <string.h> - -#include "libbpf.h" - -#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) -#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) -#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) - -static const char *libbpf_strerror_table[NR_ERRNO] = { - [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", - [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", - [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", - [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", - [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", - [ERRCODE_OFFSET(RELOC)] = "Relocation failed", - [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", - [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", - [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", - [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", - [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", - [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", - [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", -}; - -int libbpf_strerror(int err, char *buf, size_t size) -{ - if (!buf || !size) - return -1; - - err = err > 0 ? err : -err; - - if (err < __LIBBPF_ERRNO__START) { - int ret; - - ret = strerror_r(err, buf, size); - buf[size - 1] = '\0'; - return ret; - } - - if (err < __LIBBPF_ERRNO__END) { - const char *msg; - - msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; - snprintf(buf, size, "%s", msg); - buf[size - 1] = '\0'; - return 0; - } - - snprintf(buf, size, "Unknown libbpf error %d", err); - buf[size - 1] = '\0'; - return -1; -} diff --git a/src/contrib/libbpf/bpf/libbpf_internal.h b/src/contrib/libbpf/bpf/libbpf_internal.h deleted file mode 100644 index 97ac17a64..000000000 --- a/src/contrib/libbpf/bpf/libbpf_internal.h +++ /dev/null @@ -1,217 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * Internal libbpf helpers. - * - * Copyright (c) 2019 Facebook - */ - -#ifndef __LIBBPF_LIBBPF_INTERNAL_H -#define __LIBBPF_LIBBPF_INTERNAL_H - -#include "libbpf.h" - -#define BTF_INFO_ENC(kind, kind_flag, vlen) \ - ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) -#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) -#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ - ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) -#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ - BTF_INT_ENC(encoding, bits_offset, bits) -#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset) -#define BTF_PARAM_ENC(name, type) (name), (type) -#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) - -#ifndef min -# define min(x, y) ((x) < (y) ? (x) : (y)) -#endif -#ifndef max -# define max(x, y) ((x) < (y) ? (y) : (x)) -#endif -#ifndef offsetofend -# define offsetofend(TYPE, FIELD) \ - (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) -#endif - -/* Symbol versioning is different between static and shared library. - * Properly versioned symbols are needed for shared library, but - * only the symbol of the new version is needed for static library. - */ -#ifdef SHARED -# define COMPAT_VERSION(internal_name, api_name, version) \ - asm(".symver " #internal_name "," #api_name "@" #version); -# define DEFAULT_VERSION(internal_name, api_name, version) \ - asm(".symver " #internal_name "," #api_name "@@" #version); -#else -# define COMPAT_VERSION(internal_name, api_name, version) -# define DEFAULT_VERSION(internal_name, api_name, version) \ - extern typeof(internal_name) api_name \ - __attribute__((alias(#internal_name))); -#endif - -extern void libbpf_print(enum libbpf_print_level level, - const char *format, ...) - __attribute__((format(printf, 2, 3))); - -#define __pr(level, fmt, ...) \ -do { \ - libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) - -static inline bool libbpf_validate_opts(const char *opts, - size_t opts_sz, size_t user_sz, - const char *type_name) -{ - if (user_sz < sizeof(size_t)) { - pr_warn("%s size (%zu) is too small\n", type_name, user_sz); - return false; - } - if (user_sz > opts_sz) { - size_t i; - - for (i = opts_sz; i < user_sz; i++) { - if (opts[i]) { - pr_warn("%s has non-zero extra bytes", - type_name); - return false; - } - } - } - return true; -} - -#define OPTS_VALID(opts, type) \ - (!(opts) || libbpf_validate_opts((const char *)opts, \ - offsetofend(struct type, \ - type##__last_field), \ - (opts)->sz, #type)) -#define OPTS_HAS(opts, field) \ - ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) -#define OPTS_GET(opts, field, fallback_value) \ - (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) - -int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); - -struct btf_ext_info { - /* - * info points to the individual info section (e.g. func_info and - * line_info) from the .BTF.ext. It does not include the __u32 rec_size. - */ - void *info; - __u32 rec_size; - __u32 len; -}; - -#define for_each_btf_ext_sec(seg, sec) \ - for (sec = (seg)->info; \ - (void *)sec < (seg)->info + (seg)->len; \ - sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \ - (seg)->rec_size * sec->num_info) - -#define for_each_btf_ext_rec(seg, sec, i, rec) \ - for (i = 0, rec = (void *)&(sec)->data; \ - i < (sec)->num_info; \ - i++, rec = (void *)rec + (seg)->rec_size) - -struct btf_ext { - union { - struct btf_ext_header *hdr; - void *data; - }; - struct btf_ext_info func_info; - struct btf_ext_info line_info; - struct btf_ext_info field_reloc_info; - __u32 data_size; -}; - -struct btf_ext_info_sec { - __u32 sec_name_off; - __u32 num_info; - /* Followed by num_info * record_size number of bytes */ - __u8 data[0]; -}; - -/* The minimum bpf_func_info checked by the loader */ -struct bpf_func_info_min { - __u32 insn_off; - __u32 type_id; -}; - -/* The minimum bpf_line_info checked by the loader */ -struct bpf_line_info_min { - __u32 insn_off; - __u32 file_name_off; - __u32 line_off; - __u32 line_col; -}; - -/* bpf_field_info_kind encodes which aspect of captured field has to be - * adjusted by relocations. Currently supported values are: - * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); - * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); - */ -enum bpf_field_info_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, - BPF_FIELD_LSHIFT_U64 = 4, - BPF_FIELD_RSHIFT_U64 = 5, -}; - -/* The minimum bpf_field_reloc checked by the loader - * - * Field relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * field; - * - access_str_off - offset into corresponding .BTF string section. String - * itself encodes an accessed field using a sequence of field and array - * indicies, separated by colon (:). It's conceptually very close to LLVM's - * getelementptr ([0]) instruction's arguments for identifying offset to - * a field. - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_field_reloc { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_field_info_kind kind; -}; - -#endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/src/contrib/libbpf/bpf/libbpf_probes.c b/src/contrib/libbpf/bpf/libbpf_probes.c deleted file mode 100644 index a9eb8b322..000000000 --- a/src/contrib/libbpf/bpf/libbpf_probes.c +++ /dev/null @@ -1,323 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2019 Netronome Systems, Inc. */ - -#include <errno.h> -#include <fcntl.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> -#include <net/if.h> -#include <sys/utsname.h> - -#include <linux/btf.h> -#include <linux/filter.h> -#include <linux/kernel.h> - -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" - -static bool grep(const char *buffer, const char *pattern) -{ - return !!strstr(buffer, pattern); -} - -static int get_vendor_id(int ifindex) -{ - char ifname[IF_NAMESIZE], path[64], buf[8]; - ssize_t len; - int fd; - - if (!if_indextoname(ifindex, ifname)) - return -1; - - snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); - - fd = open(path, O_RDONLY); - if (fd < 0) - return -1; - - len = read(fd, buf, sizeof(buf)); - close(fd); - if (len < 0) - return -1; - if (len >= (ssize_t)sizeof(buf)) - return -1; - buf[len] = '\0'; - - return strtol(buf, NULL, 0); -} - -static int get_kernel_version(void) -{ - int version, subversion, patchlevel; - struct utsname utsn; - - /* Return 0 on failure, and attempt to probe with empty kversion */ - if (uname(&utsn)) - return 0; - - if (sscanf(utsn.release, "%d.%d.%d", - &version, &subversion, &patchlevel) != 3) - return 0; - - return (version << 16) + (subversion << 8) + patchlevel; -} - -static void -probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, - size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) -{ - struct bpf_load_program_attr xattr = {}; - int fd; - - switch (prog_type) { - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; - break; - case BPF_PROG_TYPE_KPROBE: - xattr.kern_version = get_kernel_version(); - break; - case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_SOCKET_FILTER: - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_SCHED_ACT: - case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_XDP: - case BPF_PROG_TYPE_PERF_EVENT: - case BPF_PROG_TYPE_CGROUP_SKB: - case BPF_PROG_TYPE_CGROUP_SOCK: - case BPF_PROG_TYPE_LWT_IN: - case BPF_PROG_TYPE_LWT_OUT: - case BPF_PROG_TYPE_LWT_XMIT: - case BPF_PROG_TYPE_SOCK_OPS: - case BPF_PROG_TYPE_SK_SKB: - case BPF_PROG_TYPE_CGROUP_DEVICE: - case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_RAW_TRACEPOINT: - case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: - case BPF_PROG_TYPE_LWT_SEG6LOCAL: - case BPF_PROG_TYPE_LIRC_MODE2: - case BPF_PROG_TYPE_SK_REUSEPORT: - case BPF_PROG_TYPE_FLOW_DISSECTOR: - case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_CGROUP_SOCKOPT: - case BPF_PROG_TYPE_TRACING: - default: - break; - } - - xattr.prog_type = prog_type; - xattr.insns = insns; - xattr.insns_cnt = insns_cnt; - xattr.license = "GPL"; - xattr.prog_ifindex = ifindex; - - fd = bpf_load_program_xattr(&xattr, buf, buf_len); - if (fd >= 0) - close(fd); -} - -bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) -{ - struct bpf_insn insns[2] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN() - }; - - if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) - /* nfp returns -EINVAL on exit(0) with TC offload */ - insns[0].imm = 2; - - errno = 0; - probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); - - return errno != EINVAL && errno != EOPNOTSUPP; -} - -int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) -{ - struct btf_header hdr = { - .magic = BTF_MAGIC, - .version = BTF_VERSION, - .hdr_len = sizeof(struct btf_header), - .type_len = types_len, - .str_off = types_len, - .str_len = str_len, - }; - int btf_fd, btf_len; - __u8 *raw_btf; - - btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len; - raw_btf = malloc(btf_len); - if (!raw_btf) - return -ENOMEM; - - memcpy(raw_btf, &hdr, sizeof(hdr)); - memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); - memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - - btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); - - free(raw_btf); - return btf_fd; -} - -static int load_sk_storage_btf(void) -{ - const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; - /* struct bpf_spin_lock { - * int val; - * }; - * struct val { - * int cnt; - * struct bpf_spin_lock l; - * }; - */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* struct bpf_spin_lock */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), - BTF_MEMBER_ENC(15, 1, 0), /* int val; */ - /* struct val */ /* [3] */ - BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), - BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ - BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ - }; - - return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); -} - -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) -{ - int key_size, value_size, max_entries, map_flags; - __u32 btf_key_type_id = 0, btf_value_type_id = 0; - struct bpf_create_map_attr attr = {}; - int fd = -1, btf_fd = -1, fd_inner; - - key_size = sizeof(__u32); - value_size = sizeof(__u32); - max_entries = 1; - map_flags = 0; - - switch (map_type) { - case BPF_MAP_TYPE_STACK_TRACE: - value_size = sizeof(__u64); - break; - case BPF_MAP_TYPE_LPM_TRIE: - key_size = sizeof(__u64); - value_size = sizeof(__u64); - map_flags = BPF_F_NO_PREALLOC; - break; - case BPF_MAP_TYPE_CGROUP_STORAGE: - case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: - key_size = sizeof(struct bpf_cgroup_storage_key); - value_size = sizeof(__u64); - max_entries = 0; - break; - case BPF_MAP_TYPE_QUEUE: - case BPF_MAP_TYPE_STACK: - key_size = 0; - break; - case BPF_MAP_TYPE_SK_STORAGE: - btf_key_type_id = 1; - btf_value_type_id = 3; - value_size = 8; - max_entries = 0; - map_flags = BPF_F_NO_PREALLOC; - btf_fd = load_sk_storage_btf(); - if (btf_fd < 0) - return false; - break; - case BPF_MAP_TYPE_UNSPEC: - case BPF_MAP_TYPE_HASH: - case BPF_MAP_TYPE_ARRAY: - case BPF_MAP_TYPE_PROG_ARRAY: - case BPF_MAP_TYPE_PERF_EVENT_ARRAY: - case BPF_MAP_TYPE_PERCPU_HASH: - case BPF_MAP_TYPE_PERCPU_ARRAY: - case BPF_MAP_TYPE_CGROUP_ARRAY: - case BPF_MAP_TYPE_LRU_HASH: - case BPF_MAP_TYPE_LRU_PERCPU_HASH: - case BPF_MAP_TYPE_ARRAY_OF_MAPS: - case BPF_MAP_TYPE_HASH_OF_MAPS: - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - case BPF_MAP_TYPE_SOCKMAP: - case BPF_MAP_TYPE_CPUMAP: - case BPF_MAP_TYPE_XSKMAP: - case BPF_MAP_TYPE_SOCKHASH: - case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: - default: - break; - } - - if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || - map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - /* TODO: probe for device, once libbpf has a function to create - * map-in-map for offload - */ - if (ifindex) - return false; - - fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(__u32), sizeof(__u32), 1, 0); - if (fd_inner < 0) - return false; - fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), - fd_inner, 1, 0); - close(fd_inner); - } else { - /* Note: No other restriction on map type probes for offload */ - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - attr.map_ifindex = ifindex; - if (btf_fd >= 0) { - attr.btf_fd = btf_fd; - attr.btf_key_type_id = btf_key_type_id; - attr.btf_value_type_id = btf_value_type_id; - } - - fd = bpf_create_map_xattr(&attr); - } - if (fd >= 0) - close(fd); - if (btf_fd >= 0) - close(btf_fd); - - return fd >= 0; -} - -bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, - __u32 ifindex) -{ - struct bpf_insn insns[2] = { - BPF_EMIT_CALL(id), - BPF_EXIT_INSN() - }; - char buf[4096] = {}; - bool res; - - probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), - ifindex); - res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); - - if (ifindex) { - switch (get_vendor_id(ifindex)) { - case 0x19ee: /* Netronome specific */ - res = res && !grep(buf, "not supported by FW") && - !grep(buf, "unsupported function id"); - break; - default: - break; - } - } - - return res; -} diff --git a/src/contrib/libbpf/bpf/libbpf_util.h b/src/contrib/libbpf/bpf/libbpf_util.h deleted file mode 100644 index 59c779c57..000000000 --- a/src/contrib/libbpf/bpf/libbpf_util.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -/* Copyright (c) 2019 Facebook */ - -#ifndef __LIBBPF_LIBBPF_UTIL_H -#define __LIBBPF_LIBBPF_UTIL_H - -#include <stdbool.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* Use these barrier functions instead of smp_[rw]mb() when they are - * used in a libbpf header file. That way they can be built into the - * application that uses libbpf. - */ -#if defined(__i386__) || defined(__x86_64__) -# define libbpf_smp_rmb() asm volatile("" : : : "memory") -# define libbpf_smp_wmb() asm volatile("" : : : "memory") -# define libbpf_smp_mb() \ - asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") -/* Hinders stores to be observed before older loads. */ -# define libbpf_smp_rwmb() asm volatile("" : : : "memory") -#elif defined(__aarch64__) -# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") -# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") -# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") -# define libbpf_smp_rwmb() libbpf_smp_mb() -#elif defined(__arm__) -/* These are only valid for armv7 and above */ -# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") -# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") -# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") -# define libbpf_smp_rwmb() libbpf_smp_mb() -#else -/* Architecture missing native barrier functions. */ -# define libbpf_smp_rmb() __sync_synchronize() -# define libbpf_smp_wmb() __sync_synchronize() -# define libbpf_smp_mb() __sync_synchronize() -# define libbpf_smp_rwmb() __sync_synchronize() -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/contrib/libbpf/bpf/netlink.c b/src/contrib/libbpf/bpf/netlink.c deleted file mode 100644 index 5065c1aa1..000000000 --- a/src/contrib/libbpf/bpf/netlink.c +++ /dev/null @@ -1,451 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2018 Facebook */ - -#include <stdlib.h> -#include <memory.h> -#include <unistd.h> -#include <linux/bpf.h> -#include <linux/rtnetlink.h> -#include <sys/socket.h> -#include <errno.h> -#include <time.h> - -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" -#include "nlattr.h" - -#ifndef SOL_NETLINK -#define SOL_NETLINK 270 -#endif - -typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, - void *cookie); - -struct xdp_id_md { - int ifindex; - __u32 flags; - struct xdp_link_info info; -}; - -int libbpf_netlink_open(__u32 *nl_pid) -{ - struct sockaddr_nl sa; - socklen_t addrlen; - int one = 1, ret; - int sock; - - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (sock < 0) - return -errno; - - if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, - &one, sizeof(one)) < 0) { - pr_warn("Netlink error reporting not supported\n"); - } - - if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - ret = -errno; - goto cleanup; - } - - addrlen = sizeof(sa); - if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { - ret = -errno; - goto cleanup; - } - - if (addrlen != sizeof(sa)) { - ret = -LIBBPF_ERRNO__INTERNAL; - goto cleanup; - } - - *nl_pid = sa.nl_pid; - return sock; - -cleanup: - close(sock); - return ret; -} - -static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, - __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, - void *cookie) -{ - bool multipart = true; - struct nlmsgerr *err; - struct nlmsghdr *nh; - char buf[4096]; - int len, ret; - - while (multipart) { - multipart = false; - len = recv(sock, buf, sizeof(buf), 0); - if (len < 0) { - ret = -errno; - goto done; - } - - if (len == 0) - break; - - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); - nh = NLMSG_NEXT(nh, len)) { - if (nh->nlmsg_pid != nl_pid) { - ret = -LIBBPF_ERRNO__WRNGPID; - goto done; - } - if (nh->nlmsg_seq != seq) { - ret = -LIBBPF_ERRNO__INVSEQ; - goto done; - } - if (nh->nlmsg_flags & NLM_F_MULTI) - multipart = true; - switch (nh->nlmsg_type) { - case NLMSG_ERROR: - err = (struct nlmsgerr *)NLMSG_DATA(nh); - if (!err->error) - continue; - ret = err->error; - libbpf_nla_dump_errormsg(nh); - goto done; - case NLMSG_DONE: - return 0; - default: - break; - } - if (_fn) { - ret = _fn(nh, fn, cookie); - if (ret) - return ret; - } - } - } - ret = 0; -done: - return ret; -} - -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) -{ - int sock, seq = 0, ret; - struct nlattr *nla, *nla_xdp; - struct { - struct nlmsghdr nh; - struct ifinfomsg ifinfo; - char attrbuf[64]; - } req; - __u32 nl_pid; - - sock = libbpf_netlink_open(&nl_pid); - if (sock < 0) - return sock; - - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; - req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; - - /* started nested attribute for XDP */ - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | IFLA_XDP; - nla->nla_len = NLA_HDRLEN; - - /* add XDP fd */ - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len += nla_xdp->nla_len; - - /* if user passed in any flags, add those too */ - if (flags) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FLAGS; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); - memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); - nla->nla_len += nla_xdp->nla_len; - } - - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - ret = -errno; - goto cleanup; - } - ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); - -cleanup: - close(sock); - return ret; -} - -static int __dump_link_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) -{ - struct nlattr *tb[IFLA_MAX + 1], *attr; - struct ifinfomsg *ifi = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); - attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); - if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_link_nlmsg(cookie, ifi, tb); -} - -static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) -{ - struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; - struct xdp_id_md *xdp_id = cookie; - struct ifinfomsg *ifinfo = msg; - int ret; - - if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) - return 0; - - if (!tb[IFLA_XDP]) - return 0; - - ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); - if (ret) - return ret; - - if (!xdp_tb[IFLA_XDP_ATTACHED]) - return 0; - - xdp_id->info.attach_mode = libbpf_nla_getattr_u8( - xdp_tb[IFLA_XDP_ATTACHED]); - - if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE) - return 0; - - if (xdp_tb[IFLA_XDP_PROG_ID]) - xdp_id->info.prog_id = libbpf_nla_getattr_u32( - xdp_tb[IFLA_XDP_PROG_ID]); - - if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) - xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( - xdp_tb[IFLA_XDP_SKB_PROG_ID]); - - if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) - xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( - xdp_tb[IFLA_XDP_DRV_PROG_ID]); - - if (xdp_tb[IFLA_XDP_HW_PROG_ID]) - xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( - xdp_tb[IFLA_XDP_HW_PROG_ID]); - - return 0; -} - -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) -{ - struct xdp_id_md xdp_id = {}; - int sock, ret; - __u32 nl_pid; - __u32 mask; - - if (flags & ~XDP_FLAGS_MASK || !info_size) - return -EINVAL; - - /* Check whether the single {HW,DRV,SKB} mode is set */ - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); - mask = flags - 1; - if (flags && flags & mask) - return -EINVAL; - - sock = libbpf_netlink_open(&nl_pid); - if (sock < 0) - return sock; - - xdp_id.ifindex = ifindex; - xdp_id.flags = flags; - - ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); - if (!ret) { - size_t sz = min(info_size, sizeof(xdp_id.info)); - - memcpy(info, &xdp_id.info, sz); - memset((void *) info + sz, 0, info_size - sz); - } - - close(sock); - return ret; -} - -static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) -{ - if (info->attach_mode != XDP_ATTACHED_MULTI) - return info->prog_id; - if (flags & XDP_FLAGS_DRV_MODE) - return info->drv_prog_id; - if (flags & XDP_FLAGS_HW_MODE) - return info->hw_prog_id; - if (flags & XDP_FLAGS_SKB_MODE) - return info->skb_prog_id; - - return 0; -} - -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) -{ - struct xdp_link_info info; - int ret; - - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); - if (!ret) - *prog_id = get_xdp_id(&info, flags); - - return ret; -} - -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct ifinfomsg ifm; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlh.nlmsg_type = RTM_GETLINK, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .ifm.ifi_family = AF_PACKET, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, - dump_link_nlmsg, cookie); -} - -static int __dump_class_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_class_nlmsg, - void *cookie) -{ - struct nlattr *tb[TCA_MAX + 1], *attr; - struct tcmsg *t = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); - attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); - if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_class_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct tcmsg t; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - .nlh.nlmsg_type = RTM_GETTCLASS, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .t.tcm_family = AF_UNSPEC, - .t.tcm_ifindex = ifindex, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, - dump_class_nlmsg, cookie); -} - -static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, - void *cookie) -{ - struct nlattr *tb[TCA_MAX + 1], *attr; - struct tcmsg *t = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); - attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); - if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_qdisc_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct tcmsg t; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - .nlh.nlmsg_type = RTM_GETQDISC, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .t.tcm_family = AF_UNSPEC, - .t.tcm_ifindex = ifindex, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, - dump_qdisc_nlmsg, cookie); -} - -static int __dump_filter_nlmsg(struct nlmsghdr *nlh, - libbpf_dump_nlmsg_t dump_filter_nlmsg, - void *cookie) -{ - struct nlattr *tb[TCA_MAX + 1], *attr; - struct tcmsg *t = NLMSG_DATA(nlh); - int len; - - len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); - attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); - if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) - return -LIBBPF_ERRNO__NLPARSE; - - return dump_filter_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie) -{ - struct { - struct nlmsghdr nlh; - struct tcmsg t; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - .nlh.nlmsg_type = RTM_GETTFILTER, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .t.tcm_family = AF_UNSPEC, - .t.tcm_ifindex = ifindex, - .t.tcm_parent = handle, - }; - int seq = time(NULL); - - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; - - return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, - dump_filter_nlmsg, cookie); -} diff --git a/src/contrib/libbpf/bpf/nlattr.c b/src/contrib/libbpf/bpf/nlattr.c deleted file mode 100644 index 8db44bbfc..000000000 --- a/src/contrib/libbpf/bpf/nlattr.c +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * NETLINK Netlink attributes - * - * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> - */ - -#include <errno.h> -#include "nlattr.h" -#include "libbpf_internal.h" -#include <linux/rtnetlink.h> -#include <string.h> -#include <stdio.h> - -static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { - [LIBBPF_NLA_U8] = sizeof(uint8_t), - [LIBBPF_NLA_U16] = sizeof(uint16_t), - [LIBBPF_NLA_U32] = sizeof(uint32_t), - [LIBBPF_NLA_U64] = sizeof(uint64_t), - [LIBBPF_NLA_STRING] = 1, - [LIBBPF_NLA_FLAG] = 0, -}; - -static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) -{ - int totlen = NLA_ALIGN(nla->nla_len); - - *remaining -= totlen; - return (struct nlattr *) ((char *) nla + totlen); -} - -static int nla_ok(const struct nlattr *nla, int remaining) -{ - return remaining >= sizeof(*nla) && - nla->nla_len >= sizeof(*nla) && - nla->nla_len <= remaining; -} - -static int nla_type(const struct nlattr *nla) -{ - return nla->nla_type & NLA_TYPE_MASK; -} - -static int validate_nla(struct nlattr *nla, int maxtype, - struct libbpf_nla_policy *policy) -{ - struct libbpf_nla_policy *pt; - unsigned int minlen = 0; - int type = nla_type(nla); - - if (type < 0 || type > maxtype) - return 0; - - pt = &policy[type]; - - if (pt->type > LIBBPF_NLA_TYPE_MAX) - return 0; - - if (pt->minlen) - minlen = pt->minlen; - else if (pt->type != LIBBPF_NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; - - if (libbpf_nla_len(nla) < minlen) - return -1; - - if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; - - if (pt->type == LIBBPF_NLA_STRING) { - char *data = libbpf_nla_data(nla); - - if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; - } - - return 0; -} - -static inline int nlmsg_len(const struct nlmsghdr *nlh) -{ - return nlh->nlmsg_len - NLMSG_HDRLEN; -} - -/** - * Create attribute index based on a stream of attributes. - * @arg tb Index array to be filled (maxtype+1 elements). - * @arg maxtype Maximum attribute type expected and accepted. - * @arg head Head of attribute stream. - * @arg len Length of attribute stream. - * @arg policy Attribute validation policy. - * - * Iterates over the stream of attributes and stores a pointer to each - * attribute in the index array using the attribute type as index to - * the array. Attribute with a type greater than the maximum type - * specified will be silently ignored in order to maintain backwards - * compatibility. If \a policy is not NULL, the attribute will be - * validated using the specified policy. - * - * @see nla_validate - * @return 0 on success or a negative error code. - */ -int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, - int len, struct libbpf_nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - libbpf_nla_for_each_attr(nla, head, len, rem) { - int type = nla_type(nla); - - if (type > maxtype) - continue; - - if (policy) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - if (tb[type]) - pr_warn("Attribute of type %#x found multiple times in message, " - "previous attribute is being ignored.\n", type); - - tb[type] = nla; - } - - err = 0; -errout: - return err; -} - -/** - * Create attribute index based on nested attribute - * @arg tb Index array to be filled (maxtype+1 elements). - * @arg maxtype Maximum attribute type expected and accepted. - * @arg nla Nested Attribute. - * @arg policy Attribute validation policy. - * - * Feeds the stream of attributes nested into the specified attribute - * to libbpf_nla_parse(). - * - * @see libbpf_nla_parse - * @return 0 on success or a negative error code. - */ -int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - struct libbpf_nla_policy *policy) -{ - return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla), - libbpf_nla_len(nla), policy); -} - -/* dump netlink extended ack error message */ -int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) -{ - struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { - [NLMSGERR_ATTR_MSG] = { .type = LIBBPF_NLA_STRING }, - [NLMSGERR_ATTR_OFFS] = { .type = LIBBPF_NLA_U32 }, - }; - struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; - struct nlmsgerr *err; - char *errmsg = NULL; - int hlen, alen; - - /* no TLVs, nothing to do here */ - if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) - return 0; - - err = (struct nlmsgerr *)NLMSG_DATA(nlh); - hlen = sizeof(*err); - - /* if NLM_F_CAPPED is set then the inner err msg was capped */ - if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) - hlen += nlmsg_len(&err->msg); - - attr = (struct nlattr *) ((void *) err + hlen); - alen = nlh->nlmsg_len - hlen; - - if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, - extack_policy) != 0) { - pr_warn("Failed to parse extended error attributes\n"); - return 0; - } - - if (tb[NLMSGERR_ATTR_MSG]) - errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); - - pr_warn("Kernel error message: %s\n", errmsg); - - return 0; -} diff --git a/src/contrib/libbpf/bpf/nlattr.h b/src/contrib/libbpf/bpf/nlattr.h deleted file mode 100644 index 6cc3ac916..000000000 --- a/src/contrib/libbpf/bpf/nlattr.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * NETLINK Netlink attributes - * - * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> - */ - -#ifndef __LIBBPF_NLATTR_H -#define __LIBBPF_NLATTR_H - -#include <stdint.h> -#include <linux/netlink.h> -/* avoid multiple definition of netlink features */ -#define __LINUX_NETLINK_H - -/** - * Standard attribute types to specify validation policy - */ -enum { - LIBBPF_NLA_UNSPEC, /**< Unspecified type, binary data chunk */ - LIBBPF_NLA_U8, /**< 8 bit integer */ - LIBBPF_NLA_U16, /**< 16 bit integer */ - LIBBPF_NLA_U32, /**< 32 bit integer */ - LIBBPF_NLA_U64, /**< 64 bit integer */ - LIBBPF_NLA_STRING, /**< NUL terminated character string */ - LIBBPF_NLA_FLAG, /**< Flag */ - LIBBPF_NLA_MSECS, /**< Micro seconds (64bit) */ - LIBBPF_NLA_NESTED, /**< Nested attributes */ - __LIBBPF_NLA_TYPE_MAX, -}; - -#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1) - -/** - * @ingroup attr - * Attribute validation policy. - * - * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. - */ -struct libbpf_nla_policy { - /** Type of attribute or LIBBPF_NLA_UNSPEC */ - uint16_t type; - - /** Minimal length of payload required */ - uint16_t minlen; - - /** Maximal length of payload allowed */ - uint16_t maxlen; -}; - -/** - * @ingroup attr - * Iterate over a stream of attributes - * @arg pos loop counter, set to current attribute - * @arg head head of attribute stream - * @arg len length of attribute stream - * @arg rem initialized to len, holds bytes currently remaining in stream - */ -#define libbpf_nla_for_each_attr(pos, head, len, rem) \ - for (pos = head, rem = len; \ - nla_ok(pos, rem); \ - pos = nla_next(pos, &(rem))) - -/** - * libbpf_nla_data - head of payload - * @nla: netlink attribute - */ -static inline void *libbpf_nla_data(const struct nlattr *nla) -{ - return (char *) nla + NLA_HDRLEN; -} - -static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla) -{ - return *(uint8_t *)libbpf_nla_data(nla); -} - -static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla) -{ - return *(uint32_t *)libbpf_nla_data(nla); -} - -static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla) -{ - return (const char *)libbpf_nla_data(nla); -} - -/** - * libbpf_nla_len - length of payload - * @nla: netlink attribute - */ -static inline int libbpf_nla_len(const struct nlattr *nla) -{ - return nla->nla_len - NLA_HDRLEN; -} - -int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, - int len, struct libbpf_nla_policy *policy); -int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - struct libbpf_nla_policy *policy); - -int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh); - -#endif /* __LIBBPF_NLATTR_H */ diff --git a/src/contrib/libbpf/bpf/str_error.c b/src/contrib/libbpf/bpf/str_error.c deleted file mode 100644 index b8064eedc..000000000 --- a/src/contrib/libbpf/bpf/str_error.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -#undef _GNU_SOURCE -#include <string.h> -#include <stdio.h> -#include "str_error.h" - -/* - * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl - * libc, while checking strerror_r() return to avoid having to check this in - * all places calling it. - */ -char *libbpf_strerror_r(int err, char *dst, int len) -{ - int ret = strerror_r(err < 0 ? -err : err, dst, len); - if (ret) - snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); - return dst; -} diff --git a/src/contrib/libbpf/bpf/str_error.h b/src/contrib/libbpf/bpf/str_error.h deleted file mode 100644 index a139334d5..000000000 --- a/src/contrib/libbpf/bpf/str_error.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __LIBBPF_STR_ERROR_H -#define __LIBBPF_STR_ERROR_H - -char *libbpf_strerror_r(int err, char *dst, int len); -#endif /* __LIBBPF_STR_ERROR_H */ diff --git a/src/contrib/libbpf/bpf/xsk.c b/src/contrib/libbpf/bpf/xsk.c deleted file mode 100644 index 8e0ffa800..000000000 --- a/src/contrib/libbpf/bpf/xsk.c +++ /dev/null @@ -1,797 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * AF_XDP user-space access library. - * - * Copyright(c) 2018 - 2019 Intel Corporation. - * - * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> - */ - -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <arpa/inet.h> -#include <asm/barrier.h> -#include <linux/compiler.h> -#include <linux/ethtool.h> -#include <linux/filter.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_xdp.h> -#include <linux/sockios.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/types.h> - -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" -#include "xsk.h" - -#ifndef SOL_XDP - #define SOL_XDP 283 -#endif - -#ifndef AF_XDP - #define AF_XDP 44 -#endif - -#ifndef PF_XDP - #define PF_XDP AF_XDP -#endif - -struct xsk_umem { - struct xsk_ring_prod *fill; - struct xsk_ring_cons *comp; - char *umem_area; - struct xsk_umem_config config; - int fd; - int refcount; -}; - -struct xsk_socket { - struct xsk_ring_cons *rx; - struct xsk_ring_prod *tx; - __u64 outstanding_tx; - struct xsk_umem *umem; - struct xsk_socket_config config; - int fd; - int ifindex; - int prog_fd; - int xsks_map_fd; - __u32 queue_id; - char ifname[IFNAMSIZ]; -}; - -struct xsk_nl_info { - bool xdp_prog_attached; - int ifindex; - int fd; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_ring_offset_v1 { - __u64 producer; - __u64 consumer; - __u64 desc; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_mmap_offsets_v1 { - struct xdp_ring_offset_v1 rx; - struct xdp_ring_offset_v1 tx; - struct xdp_ring_offset_v1 fr; - struct xdp_ring_offset_v1 cr; -}; - -int xsk_umem__fd(const struct xsk_umem *umem) -{ - return umem ? umem->fd : -EINVAL; -} - -int xsk_socket__fd(const struct xsk_socket *xsk) -{ - return xsk ? xsk->fd : -EINVAL; -} - -static bool xsk_page_aligned(void *buffer) -{ - unsigned long addr = (unsigned long)buffer; - - return !(addr & (getpagesize() - 1)); -} - -static void xsk_set_umem_config(struct xsk_umem_config *cfg, - const struct xsk_umem_config *usr_cfg) -{ - if (!usr_cfg) { - cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; - cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; - cfg->flags = XSK_UMEM__DEFAULT_FLAGS; - return; - } - - cfg->fill_size = usr_cfg->fill_size; - cfg->comp_size = usr_cfg->comp_size; - cfg->frame_size = usr_cfg->frame_size; - cfg->frame_headroom = usr_cfg->frame_headroom; - cfg->flags = usr_cfg->flags; -} - -static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, - const struct xsk_socket_config *usr_cfg) -{ - if (!usr_cfg) { - cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; - cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->libbpf_flags = 0; - cfg->xdp_flags = 0; - cfg->bind_flags = 0; - return 0; - } - - if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) - return -EINVAL; - - cfg->rx_size = usr_cfg->rx_size; - cfg->tx_size = usr_cfg->tx_size; - cfg->libbpf_flags = usr_cfg->libbpf_flags; - cfg->xdp_flags = usr_cfg->xdp_flags; - cfg->bind_flags = usr_cfg->bind_flags; - - return 0; -} - -static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) -{ - struct xdp_mmap_offsets_v1 off_v1; - - /* getsockopt on a kernel <= 5.3 has no flags fields. - * Copy over the offsets to the correct places in the >=5.4 format - * and put the flags where they would have been on that kernel. - */ - memcpy(&off_v1, off, sizeof(off_v1)); - - off->rx.producer = off_v1.rx.producer; - off->rx.consumer = off_v1.rx.consumer; - off->rx.desc = off_v1.rx.desc; - off->rx.flags = off_v1.rx.consumer + sizeof(__u32); - - off->tx.producer = off_v1.tx.producer; - off->tx.consumer = off_v1.tx.consumer; - off->tx.desc = off_v1.tx.desc; - off->tx.flags = off_v1.tx.consumer + sizeof(__u32); - - off->fr.producer = off_v1.fr.producer; - off->fr.consumer = off_v1.fr.consumer; - off->fr.desc = off_v1.fr.desc; - off->fr.flags = off_v1.fr.consumer + sizeof(__u32); - - off->cr.producer = off_v1.cr.producer; - off->cr.consumer = off_v1.cr.consumer; - off->cr.desc = off_v1.cr.desc; - off->cr.flags = off_v1.cr.consumer + sizeof(__u32); -} - -static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) -{ - socklen_t optlen; - int err; - - optlen = sizeof(*off); - err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); - if (err) - return err; - - if (optlen == sizeof(*off)) - return 0; - - if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { - xsk_mmap_offsets_v1(off); - return 0; - } - - return -EINVAL; -} - -int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xdp_mmap_offsets off; - struct xdp_umem_reg mr; - struct xsk_umem *umem; - void *map; - int err; - - if (!umem_area || !umem_ptr || !fill || !comp) - return -EFAULT; - if (!size && !xsk_page_aligned(umem_area)) - return -EINVAL; - - umem = calloc(1, sizeof(*umem)); - if (!umem) - return -ENOMEM; - - umem->fd = socket(AF_XDP, SOCK_RAW, 0); - if (umem->fd < 0) { - err = -errno; - goto out_umem_alloc; - } - - umem->umem_area = umem_area; - xsk_set_umem_config(&umem->config, usr_config); - - memset(&mr, 0, sizeof(mr)); - mr.addr = (uintptr_t)umem_area; - mr.len = size; - mr.chunk_size = umem->config.frame_size; - mr.headroom = umem->config.frame_headroom; - mr.flags = umem->config.flags; - - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); - if (err) { - err = -errno; - goto out_socket; - } - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, - &umem->config.fill_size, - sizeof(umem->config.fill_size)); - if (err) { - err = -errno; - goto out_socket; - } - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, - &umem->config.comp_size, - sizeof(umem->config.comp_size)); - if (err) { - err = -errno; - goto out_socket; - } - - err = xsk_get_mmap_offsets(umem->fd, &off); - if (err) { - err = -errno; - goto out_socket; - } - - map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, - XDP_UMEM_PGOFF_FILL_RING); - if (map == MAP_FAILED) { - err = -errno; - goto out_socket; - } - - umem->fill = fill; - fill->mask = umem->config.fill_size - 1; - fill->size = umem->config.fill_size; - fill->producer = map + off.fr.producer; - fill->consumer = map + off.fr.consumer; - fill->flags = map + off.fr.flags; - fill->ring = map + off.fr.desc; - fill->cached_cons = umem->config.fill_size; - - map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, - XDP_UMEM_PGOFF_COMPLETION_RING); - if (map == MAP_FAILED) { - err = -errno; - goto out_mmap; - } - - umem->comp = comp; - comp->mask = umem->config.comp_size - 1; - comp->size = umem->config.comp_size; - comp->producer = map + off.cr.producer; - comp->consumer = map + off.cr.consumer; - comp->flags = map + off.cr.flags; - comp->ring = map + off.cr.desc; - - *umem_ptr = umem; - return 0; - -out_mmap: - munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); -out_socket: - close(umem->fd); -out_umem_alloc: - free(umem); - return err; -} - -struct xsk_umem_config_v1 { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; -}; - -int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xsk_umem_config config; - - memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); - config.flags = 0; - - return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, - &config); -} -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) - -static int xsk_load_xdp_prog(struct xsk_socket *xsk) -{ - static const int log_buf_size = 16 * 1024; - char log_buf[log_buf_size]; - int err, prog_fd; - - /* This is the C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * int ret, index = ctx->rx_queue_index; - * - * // A set entry here means that the correspnding queue_id - * // has an active AF_XDP socket bound to it. - * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); - * if (ret > 0) - * return ret; - * - * // Fallback for pre-5.3 kernels, not supporting default - * // action in the flags parameter. - * if (bpf_map_lookup_elem(&xsks_map, &index)) - * return bpf_redirect_map(&xsks_map, index, 0); - * return XDP_PASS; - * } - */ - struct bpf_insn prog[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r2 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* if w0 != 0 goto pc+13 */ - BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), - /* r2 = r10 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - /* r2 += -4 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), - /* call bpf_map_lookup_elem */ - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - /* r1 = r0 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - /* r0 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto pc+5 */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), - /* r2 = *(u32 *)(r10 - 4) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), - /* r3 = 0 */ - BPF_MOV64_IMM(BPF_REG_3, 0), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* The jumps are to this instruction */ - BPF_EXIT_INSN(), - }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - - prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, - "LGPL-2.1 or BSD-2-Clause", 0, log_buf, - log_buf_size); - if (prog_fd < 0) { - pr_warn("BPF log buffer:\n%s", log_buf); - return prog_fd; - } - - err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); - if (err) { - close(prog_fd); - return err; - } - - xsk->prog_fd = prog_fd; - return 0; -} - -static int xsk_get_max_queues(struct xsk_socket *xsk) -{ - struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; - struct ifreq ifr = {}; - int fd, err, ret; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) - return -errno; - - ifr.ifr_data = (void *)&channels; - memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; - err = ioctl(fd, SIOCETHTOOL, &ifr); - if (err && errno != EOPNOTSUPP) { - ret = -errno; - goto out; - } - - if (err) { - /* If the device says it has no channels, then all traffic - * is sent to a single stream, so max queues = 1. - */ - ret = 1; - } else { - /* Take the max of rx, tx, combined. Drivers return - * the number of channels in different ways. - */ - ret = max(channels.max_rx, channels.max_tx); - ret = max(ret, (int)channels.max_combined); - } - -out: - close(fd); - return ret; -} - -static int xsk_create_bpf_maps(struct xsk_socket *xsk) -{ - int max_queues; - int fd; - - max_queues = xsk_get_max_queues(xsk); - if (max_queues < 0) - return max_queues; - - fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, 0); - if (fd < 0) - return fd; - - xsk->xsks_map_fd = fd; - - return 0; -} - -static void xsk_delete_bpf_maps(struct xsk_socket *xsk) -{ - bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); - close(xsk->xsks_map_fd); -} - -static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) -{ - __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); - __u32 map_len = sizeof(struct bpf_map_info); - struct bpf_prog_info prog_info = {}; - struct bpf_map_info map_info; - int fd, err; - - err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); - if (err) - return err; - - num_maps = prog_info.nr_map_ids; - - map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); - if (!map_ids) - return -ENOMEM; - - memset(&prog_info, 0, prog_len); - prog_info.nr_map_ids = num_maps; - prog_info.map_ids = (__u64)(unsigned long)map_ids; - - err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); - if (err) - goto out_map_ids; - - xsk->xsks_map_fd = -1; - - for (i = 0; i < prog_info.nr_map_ids; i++) { - fd = bpf_map_get_fd_by_id(map_ids[i]); - if (fd < 0) - continue; - - err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); - if (err) { - close(fd); - continue; - } - - if (!strcmp(map_info.name, "xsks_map")) { - xsk->xsks_map_fd = fd; - continue; - } - - close(fd); - } - - err = 0; - if (xsk->xsks_map_fd == -1) - err = -ENOENT; - -out_map_ids: - free(map_ids); - return err; -} - -static int xsk_set_bpf_maps(struct xsk_socket *xsk) -{ - return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, - &xsk->fd, 0); -} - -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) -{ - __u32 prog_id = 0; - int err; - - err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, - xsk->config.xdp_flags); - if (err) - return err; - - if (!prog_id) { - err = xsk_create_bpf_maps(xsk); - if (err) - return err; - - err = xsk_load_xdp_prog(xsk); - if (err) { - xsk_delete_bpf_maps(xsk); - return err; - } - } else { - xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (xsk->prog_fd < 0) - return -errno; - err = xsk_lookup_bpf_maps(xsk); - if (err) { - close(xsk->prog_fd); - return err; - } - } - - if (xsk->rx) - err = xsk_set_bpf_maps(xsk); - if (err) { - xsk_delete_bpf_maps(xsk); - close(xsk->prog_fd); - return err; - } - - return 0; -} - -int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, - const struct xsk_socket_config *usr_config) -{ - void *rx_map = NULL, *tx_map = NULL; - struct sockaddr_xdp sxdp = {}; - struct xdp_mmap_offsets off; - struct xsk_socket *xsk; - int err; - - if (!umem || !xsk_ptr || !(rx || tx)) - return -EFAULT; - - xsk = calloc(1, sizeof(*xsk)); - if (!xsk) - return -ENOMEM; - - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); - if (err) - goto out_xsk_alloc; - - if (umem->refcount && - !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); - err = -EBUSY; - goto out_xsk_alloc; - } - - if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW, 0); - if (xsk->fd < 0) { - err = -errno; - goto out_xsk_alloc; - } - } else { - xsk->fd = umem->fd; - } - - xsk->outstanding_tx = 0; - xsk->queue_id = queue_id; - xsk->umem = umem; - xsk->ifindex = if_nametoindex(ifname); - if (!xsk->ifindex) { - err = -errno; - goto out_socket; - } - memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); - xsk->ifname[IFNAMSIZ - 1] = '\0'; - - if (rx) { - err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, - &xsk->config.rx_size, - sizeof(xsk->config.rx_size)); - if (err) { - err = -errno; - goto out_socket; - } - } - if (tx) { - err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, - &xsk->config.tx_size, - sizeof(xsk->config.tx_size)); - if (err) { - err = -errno; - goto out_socket; - } - } - - err = xsk_get_mmap_offsets(xsk->fd, &off); - if (err) { - err = -errno; - goto out_socket; - } - - if (rx) { - rx_map = mmap(NULL, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_RX_RING); - if (rx_map == MAP_FAILED) { - err = -errno; - goto out_socket; - } - - rx->mask = xsk->config.rx_size - 1; - rx->size = xsk->config.rx_size; - rx->producer = rx_map + off.rx.producer; - rx->consumer = rx_map + off.rx.consumer; - rx->flags = rx_map + off.rx.flags; - rx->ring = rx_map + off.rx.desc; - } - xsk->rx = rx; - - if (tx) { - tx_map = mmap(NULL, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_TX_RING); - if (tx_map == MAP_FAILED) { - err = -errno; - goto out_mmap_rx; - } - - tx->mask = xsk->config.tx_size - 1; - tx->size = xsk->config.tx_size; - tx->producer = tx_map + off.tx.producer; - tx->consumer = tx_map + off.tx.consumer; - tx->flags = tx_map + off.tx.flags; - tx->ring = tx_map + off.tx.desc; - tx->cached_cons = xsk->config.tx_size; - } - xsk->tx = tx; - - sxdp.sxdp_family = PF_XDP; - sxdp.sxdp_ifindex = xsk->ifindex; - sxdp.sxdp_queue_id = xsk->queue_id; - if (umem->refcount > 1) { - sxdp.sxdp_flags = XDP_SHARED_UMEM; - sxdp.sxdp_shared_umem_fd = umem->fd; - } else { - sxdp.sxdp_flags = xsk->config.bind_flags; - } - - err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); - if (err) { - err = -errno; - goto out_mmap_tx; - } - - xsk->prog_fd = -1; - - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - err = xsk_setup_xdp_prog(xsk); - if (err) - goto out_mmap_tx; - } - - *xsk_ptr = xsk; - return 0; - -out_mmap_tx: - if (tx) - munmap(tx_map, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc)); -out_mmap_rx: - if (rx) - munmap(rx_map, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc)); -out_socket: - if (--umem->refcount) - close(xsk->fd); -out_xsk_alloc: - free(xsk); - return err; -} - -int xsk_umem__delete(struct xsk_umem *umem) -{ - struct xdp_mmap_offsets off; - int err; - - if (!umem) - return 0; - - if (umem->refcount) - return -EBUSY; - - err = xsk_get_mmap_offsets(umem->fd, &off); - if (!err) { - munmap(umem->fill->ring - off.fr.desc, - off.fr.desc + umem->config.fill_size * sizeof(__u64)); - munmap(umem->comp->ring - off.cr.desc, - off.cr.desc + umem->config.comp_size * sizeof(__u64)); - } - - close(umem->fd); - free(umem); - - return 0; -} - -void xsk_socket__delete(struct xsk_socket *xsk) -{ - size_t desc_sz = sizeof(struct xdp_desc); - struct xdp_mmap_offsets off; - int err; - - if (!xsk) - return; - - if (xsk->prog_fd != -1) { - xsk_delete_bpf_maps(xsk); - close(xsk->prog_fd); - } - - err = xsk_get_mmap_offsets(xsk->fd, &off); - if (!err) { - if (xsk->rx) { - munmap(xsk->rx->ring - off.rx.desc, - off.rx.desc + xsk->config.rx_size * desc_sz); - } - if (xsk->tx) { - munmap(xsk->tx->ring - off.tx.desc, - off.tx.desc + xsk->config.tx_size * desc_sz); - } - - } - - xsk->umem->refcount--; - /* Do not close an fd that also has an associated umem connected - * to it. - */ - if (xsk->fd != xsk->umem->fd) - close(xsk->fd); - free(xsk); -} diff --git a/src/contrib/libbpf/bpf/xsk.h b/src/contrib/libbpf/bpf/xsk.h deleted file mode 100644 index 584f6820a..000000000 --- a/src/contrib/libbpf/bpf/xsk.h +++ /dev/null @@ -1,246 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * AF_XDP user-space access library. - * - * Copyright(c) 2018 - 2019 Intel Corporation. - * - * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> - */ - -#ifndef __LIBBPF_XSK_H -#define __LIBBPF_XSK_H - -#include <stdio.h> -#include <stdint.h> -#include <linux/if_xdp.h> - -#include "libbpf.h" -#include "libbpf_util.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Do not access these members directly. Use the functions below. */ -#define DEFINE_XSK_RING(name) \ -struct name { \ - __u32 cached_prod; \ - __u32 cached_cons; \ - __u32 mask; \ - __u32 size; \ - __u32 *producer; \ - __u32 *consumer; \ - void *ring; \ - __u32 *flags; \ -} - -DEFINE_XSK_RING(xsk_ring_prod); -DEFINE_XSK_RING(xsk_ring_cons); - -/* For a detailed explanation on the memory barriers associated with the - * ring, please take a look at net/xdp/xsk_queue.h. - */ - -struct xsk_umem; -struct xsk_socket; - -static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, - __u32 idx) -{ - __u64 *addrs = (__u64 *)fill->ring; - - return &addrs[idx & fill->mask]; -} - -static inline const __u64 * -xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) -{ - const __u64 *addrs = (const __u64 *)comp->ring; - - return &addrs[idx & comp->mask]; -} - -static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, - __u32 idx) -{ - struct xdp_desc *descs = (struct xdp_desc *)tx->ring; - - return &descs[idx & tx->mask]; -} - -static inline const struct xdp_desc * -xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) -{ - const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; - - return &descs[idx & rx->mask]; -} - -static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) -{ - return *r->flags & XDP_RING_NEED_WAKEUP; -} - -static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) -{ - __u32 free_entries = r->cached_cons - r->cached_prod; - - if (free_entries >= nb) - return free_entries; - - /* Refresh the local tail pointer. - * cached_cons is r->size bigger than the real consumer pointer so - * that this addition can be avoided in the more frequently - * executed code that computs free_entries in the beginning of - * this function. Without this optimization it whould have been - * free_entries = r->cached_prod - r->cached_cons + r->size. - */ - r->cached_cons = *r->consumer + r->size; - - return r->cached_cons - r->cached_prod; -} - -static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) -{ - __u32 entries = r->cached_prod - r->cached_cons; - - if (entries == 0) { - r->cached_prod = *r->producer; - entries = r->cached_prod - r->cached_cons; - } - - return (entries > nb) ? nb : entries; -} - -static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, - size_t nb, __u32 *idx) -{ - if (xsk_prod_nb_free(prod, nb) < nb) - return 0; - - *idx = prod->cached_prod; - prod->cached_prod += nb; - - return nb; -} - -static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) -{ - /* Make sure everything has been written to the ring before indicating - * this to the kernel by writing the producer pointer. - */ - libbpf_smp_wmb(); - - *prod->producer += nb; -} - -static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, - size_t nb, __u32 *idx) -{ - size_t entries = xsk_cons_nb_avail(cons, nb); - - if (entries > 0) { - /* Make sure we do not speculatively read the data before - * we have received the packet buffers from the ring. - */ - libbpf_smp_rmb(); - - *idx = cons->cached_cons; - cons->cached_cons += entries; - } - - return entries; -} - -static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) -{ - /* Make sure data has been read before indicating we are done - * with the entries by updating the consumer pointer. - */ - libbpf_smp_rwmb(); - - *cons->consumer += nb; -} - -static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) -{ - return &((char *)umem_area)[addr]; -} - -static inline __u64 xsk_umem__extract_addr(__u64 addr) -{ - return addr & XSK_UNALIGNED_BUF_ADDR_MASK; -} - -static inline __u64 xsk_umem__extract_offset(__u64 addr) -{ - return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; -} - -static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) -{ - return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); -} - -LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); - -#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 -#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 -#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ -#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) -#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 -#define XSK_UMEM__DEFAULT_FLAGS 0 - -struct xsk_umem_config { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; - __u32 flags; -}; - -/* Flags for the libbpf_flags field. */ -#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) - -struct xsk_socket_config { - __u32 rx_size; - __u32 tx_size; - __u32 libbpf_flags; - __u32 xdp_flags; - __u16 bind_flags; -}; - -/* Set config to NULL to get the default configuration. */ -LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); - -/* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __LIBBPF_XSK_H */ diff --git a/src/contrib/libbpf/include/asm/barrier.h b/src/contrib/libbpf/include/asm/barrier.h deleted file mode 100644 index 1fc6aee1f..000000000 --- a/src/contrib/libbpf/include/asm/barrier.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __ASM_BARRIER_H -#define __ASM_BARRIER_H - -#include <linux/compiler.h> - -#endif diff --git a/src/contrib/libbpf/include/linux/compiler.h b/src/contrib/libbpf/include/linux/compiler.h deleted file mode 100644 index 26336dc70..000000000 --- a/src/contrib/libbpf/include/linux/compiler.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_COMPILER_H -#define __LINUX_COMPILER_H - -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -#define READ_ONCE(x) (*(volatile typeof(x) *)&x) -#define WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) - -#define barrier() asm volatile("" ::: "memory") - -#if defined(__x86_64__) - -# define smp_rmb() barrier() -# define smp_wmb() barrier() -# define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") - -# define smp_store_release(p, v) \ -do { \ - barrier(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -# define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p = READ_ONCE(*p); \ - barrier(); \ - ___p; \ -}) - -#elif defined(__aarch64__) - -# define smp_rmb() asm volatile("dmb ishld" ::: "memory") -# define smp_wmb() asm volatile("dmb ishst" ::: "memory") -# define smp_mb() asm volatile("dmb ish" ::: "memory") - -#endif - -#ifndef smp_mb -# define smp_mb() __sync_synchronize() -#endif - -#ifndef smp_rmb -# define smp_rmb() smp_mb() -#endif - -#ifndef smp_wmb -# define smp_wmb() smp_mb() -#endif - -#ifndef smp_store_release -# define smp_store_release(p, v) \ -do { \ - smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) -#endif - -#ifndef smp_load_acquire -# define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p = READ_ONCE(*p); \ - smp_mb(); \ - ___p; \ -}) -#endif - -#endif /* __LINUX_COMPILER_H */ diff --git a/src/contrib/libbpf/include/linux/err.h b/src/contrib/libbpf/include/linux/err.h deleted file mode 100644 index 1b1dafbcb..000000000 --- a/src/contrib/libbpf/include/linux/err.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_ERR_H -#define __LINUX_ERR_H - -#include <linux/types.h> -#include <asm/errno.h> - -#define MAX_ERRNO 4095 - -#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) - -static inline void * ERR_PTR(long error_) -{ - return (void *) error_; -} - -static inline long PTR_ERR(const void *ptr) -{ - return (long) ptr; -} - -static inline bool IS_ERR(const void *ptr) -{ - return IS_ERR_VALUE((unsigned long)ptr); -} - -static inline bool IS_ERR_OR_NULL(const void *ptr) -{ - return (!ptr) || IS_ERR_VALUE((unsigned long)ptr); -} - -static inline long PTR_ERR_OR_ZERO(const void *ptr) -{ - return IS_ERR(ptr) ? PTR_ERR(ptr) : 0; -} - -#endif diff --git a/src/contrib/libbpf/include/linux/filter.h b/src/contrib/libbpf/include/linux/filter.h deleted file mode 100644 index b0700e205..000000000 --- a/src/contrib/libbpf/include/linux/filter.h +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_FILTER_H -#define __LINUX_FILTER_H - -#include <linux/bpf.h> - -#define BPF_ALU64_IMM(OP, DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -#define BPF_MOV64_IMM(DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_MOV | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -#define BPF_EXIT_INSN() \ - ((struct bpf_insn) { \ - .code = BPF_JMP | BPF_EXIT, \ - .dst_reg = 0, \ - .src_reg = 0, \ - .off = 0, \ - .imm = 0 }) - -#define BPF_EMIT_CALL(FUNC) \ - ((struct bpf_insn) { \ - .code = BPF_JMP | BPF_CALL, \ - .dst_reg = 0, \ - .src_reg = 0, \ - .off = 0, \ - .imm = ((FUNC) - BPF_FUNC_unspec) }) - -#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = 0 }) - -#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = 0 }) - -#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = OFF, \ - .imm = IMM }) - -#define BPF_MOV64_REG(DST, SRC) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_MOV | BPF_X, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = 0, \ - .imm = 0 }) - -#define BPF_MOV32_IMM(DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU | BPF_MOV | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \ - ((struct bpf_insn) { \ - .code = BPF_LD | BPF_DW | BPF_IMM, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF1, \ - .imm = IMM1 }), \ - ((struct bpf_insn) { \ - .code = 0, \ - .dst_reg = 0, \ - .src_reg = 0, \ - .off = OFF2, \ - .imm = IMM2 }) - -#define BPF_LD_MAP_FD(DST, MAP_FD) \ - BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \ - MAP_FD, 0) - -#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \ - BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \ - MAP_FD, VALUE_OFF) - -#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = OFF, \ - .imm = IMM }) - -#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = OFF, \ - .imm = IMM }) - -#endif diff --git a/src/contrib/libbpf/include/linux/kernel.h b/src/contrib/libbpf/include/linux/kernel.h deleted file mode 100644 index a4a7a9d6f..000000000 --- a/src/contrib/libbpf/include/linux/kernel.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_KERNEL_H -#define __LINUX_KERNEL_H - -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif - -#ifndef container_of -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) * __mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); }) -#endif - -#ifndef max -#define max(x, y) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - (void) (&_max1 == &_max2); \ - _max1 > _max2 ? _max1 : _max2; }) -#endif - -#ifndef min -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) -#endif - -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) - -#endif diff --git a/src/contrib/libbpf/include/linux/list.h b/src/contrib/libbpf/include/linux/list.h deleted file mode 100644 index e3814f713..000000000 --- a/src/contrib/libbpf/include/linux/list.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_LIST_H -#define __LINUX_LIST_H - -#define LIST_HEAD_INIT(name) { &(name), &(name) } -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -#define POISON_POINTER_DELTA 0 -#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) -#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) - - -static inline void INIT_LIST_HEAD(struct list_head *list) -{ - list->next = list; - list->prev = list; -} - -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty() on entry does not return true after this, the entry is - * in an undefined state. - */ -static inline void __list_del_entry(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} - -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) -#define list_first_entry(ptr, type, member) \ - list_entry((ptr)->next, type, member) -#define list_next_entry(pos, member) \ - list_entry((pos)->member.next, typeof(*(pos)), member) - -#endif diff --git a/src/contrib/libbpf/include/linux/overflow.h b/src/contrib/libbpf/include/linux/overflow.h deleted file mode 100644 index 53d758036..000000000 --- a/src/contrib/libbpf/include/linux/overflow.h +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_OVERFLOW_H -#define __LINUX_OVERFLOW_H - -#define is_signed_type(type) (((type)(-1)) < (type)1) -#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) -#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) -#define type_min(T) ((T)((T)-type_max(T)-(T)1)) - -#ifndef unlikely -#define unlikely(x) __builtin_expect(!!(x), 0) -#endif - -#ifdef __GNUC__ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif -#endif - -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW - -#define check_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_mul_overflow(__a, __b, __d); \ -}) - -#else - -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (__u64)__a * (__u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - -#define check_mul_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d)) - - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - -#endif diff --git a/src/contrib/libbpf/include/linux/ring_buffer.h b/src/contrib/libbpf/include/linux/ring_buffer.h deleted file mode 100644 index fc4677bc0..000000000 --- a/src/contrib/libbpf/include/linux/ring_buffer.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef _TOOLS_LINUX_RING_BUFFER_H_ -#define _TOOLS_LINUX_RING_BUFFER_H_ - -#include <linux/compiler.h> - -static inline __u64 ring_buffer_read_head(struct perf_event_mmap_page *base) -{ - return smp_load_acquire(&base->data_head); -} - -static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, - __u64 tail) -{ - smp_store_release(&base->data_tail, tail); -} - -#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ diff --git a/src/contrib/libbpf/include/linux/types.h b/src/contrib/libbpf/include/linux/types.h deleted file mode 100644 index bae1ed8f7..000000000 --- a/src/contrib/libbpf/include/linux/types.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -#ifndef __LINUX_TYPES_H -#define __LINUX_TYPES_H - -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> - -#include <asm/types.h> -#include <asm/posix_types.h> - -#define __bitwise__ -#define __bitwise __bitwise__ - -typedef __u16 __bitwise __le16; -typedef __u16 __bitwise __be16; -typedef __u32 __bitwise __le32; -typedef __u32 __bitwise __be32; -typedef __u64 __bitwise __le64; -typedef __u64 __bitwise __be64; - -#ifndef __aligned_u64 -# define __aligned_u64 __u64 __attribute__((aligned(8))) -#endif - -struct list_head { - struct list_head *next, *prev; -}; - -#endif diff --git a/src/contrib/libbpf/include/uapi/linux/bpf.h b/src/contrib/libbpf/include/uapi/linux/bpf.h deleted file mode 100644 index dbbcf0b02..000000000 --- a/src/contrib/libbpf/include/uapi/linux/bpf.h +++ /dev/null @@ -1,3692 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#ifndef _UAPI__LINUX_BPF_H__ -#define _UAPI__LINUX_BPF_H__ - -#include <linux/types.h> -#include <linux/bpf_common.h> - -/* Extended instruction set based on top of classic BPF */ - -/* instruction classes */ -#define BPF_JMP32 0x06 /* jmp mode in word width */ -#define BPF_ALU64 0x07 /* alu mode in double word width */ - -/* ld/ldx fields */ -#define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ - -/* alu/jmp fields */ -#define BPF_MOV 0xb0 /* mov reg to reg */ -#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ - -/* change endianness of a register */ -#define BPF_END 0xd0 /* flags for endianness conversion: */ -#define BPF_TO_LE 0x00 /* convert to little-endian */ -#define BPF_TO_BE 0x08 /* convert to big-endian */ -#define BPF_FROM_LE BPF_TO_LE -#define BPF_FROM_BE BPF_TO_BE - -/* jmp encodings */ -#define BPF_JNE 0x50 /* jump != */ -#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ -#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ -#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ -#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ -#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ -#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ -#define BPF_CALL 0x80 /* function call */ -#define BPF_EXIT 0x90 /* function return */ - -/* Register numbers */ -enum { - BPF_REG_0 = 0, - BPF_REG_1, - BPF_REG_2, - BPF_REG_3, - BPF_REG_4, - BPF_REG_5, - BPF_REG_6, - BPF_REG_7, - BPF_REG_8, - BPF_REG_9, - BPF_REG_10, - __MAX_BPF_REG, -}; - -/* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG __MAX_BPF_REG - -struct bpf_insn { - __u8 code; /* opcode */ - __u8 dst_reg:4; /* dest register */ - __u8 src_reg:4; /* source register */ - __s16 off; /* signed offset */ - __s32 imm; /* signed immediate constant */ -}; - -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ -struct bpf_lpm_trie_key { - __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ -}; - -struct bpf_cgroup_storage_key { - __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ -}; - -/* BPF syscall commands, see bpf(2) man-page for details. */ -enum bpf_cmd { - BPF_MAP_CREATE, - BPF_MAP_LOOKUP_ELEM, - BPF_MAP_UPDATE_ELEM, - BPF_MAP_DELETE_ELEM, - BPF_MAP_GET_NEXT_KEY, - BPF_PROG_LOAD, - BPF_OBJ_PIN, - BPF_OBJ_GET, - BPF_PROG_ATTACH, - BPF_PROG_DETACH, - BPF_PROG_TEST_RUN, - BPF_PROG_GET_NEXT_ID, - BPF_MAP_GET_NEXT_ID, - BPF_PROG_GET_FD_BY_ID, - BPF_MAP_GET_FD_BY_ID, - BPF_OBJ_GET_INFO_BY_FD, - BPF_PROG_QUERY, - BPF_RAW_TRACEPOINT_OPEN, - BPF_BTF_LOAD, - BPF_BTF_GET_FD_BY_ID, - BPF_TASK_FD_QUERY, - BPF_MAP_LOOKUP_AND_DELETE_ELEM, - BPF_MAP_FREEZE, - BPF_BTF_GET_NEXT_ID, -}; - -enum bpf_map_type { - BPF_MAP_TYPE_UNSPEC, - BPF_MAP_TYPE_HASH, - BPF_MAP_TYPE_ARRAY, - BPF_MAP_TYPE_PROG_ARRAY, - BPF_MAP_TYPE_PERF_EVENT_ARRAY, - BPF_MAP_TYPE_PERCPU_HASH, - BPF_MAP_TYPE_PERCPU_ARRAY, - BPF_MAP_TYPE_STACK_TRACE, - BPF_MAP_TYPE_CGROUP_ARRAY, - BPF_MAP_TYPE_LRU_HASH, - BPF_MAP_TYPE_LRU_PERCPU_HASH, - BPF_MAP_TYPE_LPM_TRIE, - BPF_MAP_TYPE_ARRAY_OF_MAPS, - BPF_MAP_TYPE_HASH_OF_MAPS, - BPF_MAP_TYPE_DEVMAP, - BPF_MAP_TYPE_SOCKMAP, - BPF_MAP_TYPE_CPUMAP, - BPF_MAP_TYPE_XSKMAP, - BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, - BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, - BPF_MAP_TYPE_QUEUE, - BPF_MAP_TYPE_STACK, - BPF_MAP_TYPE_SK_STORAGE, - BPF_MAP_TYPE_DEVMAP_HASH, -}; - -/* Note that tracing related programs such as - * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} - * are not subject to a stable API since kernel internal data - * structures can change from release to release and may - * therefore break existing tracing BPF programs. Tracing BPF - * programs correspond to /a/ specific kernel which is to be - * analyzed, and not /a/ specific kernel /and/ all future ones. - */ -enum bpf_prog_type { - BPF_PROG_TYPE_UNSPEC, - BPF_PROG_TYPE_SOCKET_FILTER, - BPF_PROG_TYPE_KPROBE, - BPF_PROG_TYPE_SCHED_CLS, - BPF_PROG_TYPE_SCHED_ACT, - BPF_PROG_TYPE_TRACEPOINT, - BPF_PROG_TYPE_XDP, - BPF_PROG_TYPE_PERF_EVENT, - BPF_PROG_TYPE_CGROUP_SKB, - BPF_PROG_TYPE_CGROUP_SOCK, - BPF_PROG_TYPE_LWT_IN, - BPF_PROG_TYPE_LWT_OUT, - BPF_PROG_TYPE_LWT_XMIT, - BPF_PROG_TYPE_SOCK_OPS, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_CGROUP_DEVICE, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_RAW_TRACEPOINT, - BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_PROG_TYPE_LWT_SEG6LOCAL, - BPF_PROG_TYPE_LIRC_MODE2, - BPF_PROG_TYPE_SK_REUSEPORT, - BPF_PROG_TYPE_FLOW_DISSECTOR, - BPF_PROG_TYPE_CGROUP_SYSCTL, - BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, - BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_PROG_TYPE_TRACING, -}; - -enum bpf_attach_type { - BPF_CGROUP_INET_INGRESS, - BPF_CGROUP_INET_EGRESS, - BPF_CGROUP_INET_SOCK_CREATE, - BPF_CGROUP_SOCK_OPS, - BPF_SK_SKB_STREAM_PARSER, - BPF_SK_SKB_STREAM_VERDICT, - BPF_CGROUP_DEVICE, - BPF_SK_MSG_VERDICT, - BPF_CGROUP_INET4_BIND, - BPF_CGROUP_INET6_BIND, - BPF_CGROUP_INET4_CONNECT, - BPF_CGROUP_INET6_CONNECT, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_UDP4_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - BPF_LIRC_MODE2, - BPF_FLOW_DISSECTOR, - BPF_CGROUP_SYSCTL, - BPF_CGROUP_UDP4_RECVMSG, - BPF_CGROUP_UDP6_RECVMSG, - BPF_CGROUP_GETSOCKOPT, - BPF_CGROUP_SETSOCKOPT, - BPF_TRACE_RAW_TP, - BPF_TRACE_FENTRY, - BPF_TRACE_FEXIT, - __MAX_BPF_ATTACH_TYPE -}; - -#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE - -/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command - * - * NONE(default): No further bpf programs allowed in the subtree. - * - * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, - * the program in this cgroup yields to sub-cgroup program. - * - * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, - * that cgroup program gets run in addition to the program in this cgroup. - * - * Only one program is allowed to be attached to a cgroup with - * NONE or BPF_F_ALLOW_OVERRIDE flag. - * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will - * release old program and attach the new one. Attach flags has to match. - * - * Multiple programs are allowed to be attached to a cgroup with - * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order - * (those that were attached first, run first) - * The programs of sub-cgroup are executed first, then programs of - * this cgroup and then programs of parent cgroup. - * When children program makes decision (like picking TCP CA or sock bind) - * parent program has a chance to override it. - * - * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. - * A cgroup with NONE doesn't allow any programs in sub-cgroups. - * Ex1: - * cgrp1 (MULTI progs A, B) -> - * cgrp2 (OVERRIDE prog C) -> - * cgrp3 (MULTI prog D) -> - * cgrp4 (OVERRIDE prog E) -> - * cgrp5 (NONE prog F) - * the event in cgrp5 triggers execution of F,D,A,B in that order. - * if prog F is detached, the execution is E,D,A,B - * if prog F and D are detached, the execution is E,A,B - * if prog F, E and D are detached, the execution is C,A,B - * - * All eligible programs are executed regardless of return code from - * earlier programs. - */ -#define BPF_F_ALLOW_OVERRIDE (1U << 0) -#define BPF_F_ALLOW_MULTI (1U << 1) - -/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the - * verifier will perform strict alignment checking as if the kernel - * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, - * and NET_IP_ALIGN defined to 2. - */ -#define BPF_F_STRICT_ALIGNMENT (1U << 0) - -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the - * verifier will allow any alignment whatsoever. On platforms - * with strict alignment requirements for loads ands stores (such - * as sparc and mips) the verifier validates that all loads and - * stores provably follow this requirement. This flag turns that - * checking and enforcement off. - * - * It is mostly used for testing when we want to validate the - * context and memory access aspects of the verifier, but because - * of an unaligned access the alignment check would trigger before - * the one we are interested in. - */ -#define BPF_F_ANY_ALIGNMENT (1U << 1) - -/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. - * Verifier does sub-register def/use analysis and identifies instructions whose - * def only matters for low 32-bit, high 32-bit is never referenced later - * through implicit zero extension. Therefore verifier notifies JIT back-ends - * that it is safe to ignore clearing high 32-bit for these instructions. This - * saves some back-ends a lot of code-gen. However such optimization is not - * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends - * hence hasn't used verifier's analysis result. But, we really want to have a - * way to be able to verify the correctness of the described optimization on - * x86_64 on which testsuites are frequently exercised. - * - * So, this flag is introduced. Once it is set, verifier will randomize high - * 32-bit for those instructions who has been identified as safe to ignore them. - * Then, if verifier is not doing correct analysis, such randomization will - * regress tests to expose bugs. - */ -#define BPF_F_TEST_RND_HI32 (1U << 2) - -/* The verifier internal test flag. Behavior is undefined */ -#define BPF_F_TEST_STATE_FREQ (1U << 3) - -/* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE - */ -#define BPF_PSEUDO_MAP_FD 1 -#define BPF_PSEUDO_MAP_VALUE 2 - -/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative - * offset to another bpf function - */ -#define BPF_PSEUDO_CALL 1 - -/* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ - -/* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) -/* Instead of having one common LRU list in the - * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list - * which can scale and perform better. - * Note, the LRU nodes (including free nodes) cannot be moved - * across different LRU lists. - */ -#define BPF_F_NO_COMMON_LRU (1U << 1) -/* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U - -/* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) - -/* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) - -/* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) - -/* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) - -/* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) - -/* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) - -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - -enum bpf_stack_build_id_status { - /* user space need an empty entry to identify end of a trace */ - BPF_STACK_BUILD_ID_EMPTY = 0, - /* with valid build_id and offset */ - BPF_STACK_BUILD_ID_VALID = 1, - /* couldn't get build_id, fallback to ip */ - BPF_STACK_BUILD_ID_IP = 2, -}; - -#define BPF_BUILD_ID_SIZE 20 -struct bpf_stack_build_id { - __s32 status; - unsigned char build_id[BPF_BUILD_ID_SIZE]; - union { - __u64 offset; - __u64 ip; - }; -}; - -union bpf_attr { - struct { /* anonymous struct used by BPF_MAP_CREATE command */ - __u32 map_type; /* one of enum bpf_map_type */ - __u32 key_size; /* size of key in bytes */ - __u32 value_size; /* size of value in bytes */ - __u32 max_entries; /* max number of entries in a map */ - __u32 map_flags; /* BPF_MAP_CREATE related - * flags defined above. - */ - __u32 inner_map_fd; /* fd pointing to the inner map */ - __u32 numa_node; /* numa node (effective only if - * BPF_F_NUMA_NODE is set). - */ - char map_name[BPF_OBJ_NAME_LEN]; - __u32 map_ifindex; /* ifindex of netdev to create on */ - __u32 btf_fd; /* fd pointing to a BTF type data */ - __u32 btf_key_type_id; /* BTF type_id of the key */ - __u32 btf_value_type_id; /* BTF type_id of the value */ - }; - - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ - __u32 map_fd; - __aligned_u64 key; - union { - __aligned_u64 value; - __aligned_u64 next_key; - }; - __u64 flags; - }; - - struct { /* anonymous struct used by BPF_PROG_LOAD command */ - __u32 prog_type; /* one of enum bpf_prog_type */ - __u32 insn_cnt; - __aligned_u64 insns; - __aligned_u64 license; - __u32 log_level; /* verbosity level of verifier */ - __u32 log_size; /* size of user buffer */ - __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* not used */ - __u32 prog_flags; - char prog_name[BPF_OBJ_NAME_LEN]; - __u32 prog_ifindex; /* ifindex of netdev to prep for */ - /* For some prog types expected attach type must be known at - * load time to verify attach type specific parts of prog - * (context accesses, allowed helpers, etc). - */ - __u32 expected_attach_type; - __u32 prog_btf_fd; /* fd pointing to BTF type data */ - __u32 func_info_rec_size; /* userspace bpf_func_info size */ - __aligned_u64 func_info; /* func info */ - __u32 func_info_cnt; /* number of bpf_func_info records */ - __u32 line_info_rec_size; /* userspace bpf_line_info size */ - __aligned_u64 line_info; /* line info */ - __u32 line_info_cnt; /* number of bpf_line_info records */ - __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ - }; - - struct { /* anonymous struct used by BPF_OBJ_* commands */ - __aligned_u64 pathname; - __u32 bpf_fd; - __u32 file_flags; - }; - - struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ - __u32 target_fd; /* container object to attach to */ - __u32 attach_bpf_fd; /* eBPF program to attach */ - __u32 attach_type; - __u32 attach_flags; - }; - - struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ - __u32 prog_fd; - __u32 retval; - __u32 data_size_in; /* input: len of data_in */ - __u32 data_size_out; /* input/output: len of data_out - * returns ENOSPC if data_out - * is too small. - */ - __aligned_u64 data_in; - __aligned_u64 data_out; - __u32 repeat; - __u32 duration; - __u32 ctx_size_in; /* input: len of ctx_in */ - __u32 ctx_size_out; /* input/output: len of ctx_out - * returns ENOSPC if ctx_out - * is too small. - */ - __aligned_u64 ctx_in; - __aligned_u64 ctx_out; - } test; - - struct { /* anonymous struct used by BPF_*_GET_*_ID */ - union { - __u32 start_id; - __u32 prog_id; - __u32 map_id; - __u32 btf_id; - }; - __u32 next_id; - __u32 open_flags; - }; - - struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ - __u32 bpf_fd; - __u32 info_len; - __aligned_u64 info; - } info; - - struct { /* anonymous struct used by BPF_PROG_QUERY command */ - __u32 target_fd; /* container object to query */ - __u32 attach_type; - __u32 query_flags; - __u32 attach_flags; - __aligned_u64 prog_ids; - __u32 prog_cnt; - } query; - - struct { - __u64 name; - __u32 prog_fd; - } raw_tracepoint; - - struct { /* anonymous struct for BPF_BTF_LOAD */ - __aligned_u64 btf; - __aligned_u64 btf_log_buf; - __u32 btf_size; - __u32 btf_log_size; - __u32 btf_log_level; - }; - - struct { - __u32 pid; /* input: pid */ - __u32 fd; /* input: fd */ - __u32 flags; /* input: flags */ - __u32 buf_len; /* input/output: buf len */ - __aligned_u64 buf; /* input/output: - * tp_name for tracepoint - * symbol for kprobe - * filename for uprobe - */ - __u32 prog_id; /* output: prod_id */ - __u32 fd_type; /* output: BPF_FD_TYPE_* */ - __u64 probe_offset; /* output: probe_offset */ - __u64 probe_addr; /* output: probe_addr */ - } task_fd_query; -} __attribute__((aligned(8))); - -/* The description below is an attempt at providing documentation to eBPF - * developers about the multiple available eBPF helper functions. It can be - * parsed and used to produce a manual page. The workflow is the following, - * and requires the rst2man utility: - * - * $ ./scripts/bpf_helpers_doc.py \ - * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst - * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 - * $ man /tmp/bpf-helpers.7 - * - * Note that in order to produce this external documentation, some RST - * formatting is used in the descriptions to get "bold" and "italics" in - * manual pages. Also note that the few trailing white spaces are - * intentional, removing them would break paragraphs for rst2man. - * - * Start of BPF helper function descriptions: - * - * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) - * Description - * Perform a lookup in *map* for an entry associated to *key*. - * Return - * Map value associated to *key*, or **NULL** if no entry was - * found. - * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) - * Description - * Add or update the value of the entry associated to *key* in - * *map* with *value*. *flags* is one of: - * - * **BPF_NOEXIST** - * The entry for *key* must not exist in the map. - * **BPF_EXIST** - * The entry for *key* must already exist in the map. - * **BPF_ANY** - * No condition on the existence of the entry for *key*. - * - * Flag value **BPF_NOEXIST** cannot be used for maps of types - * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all - * elements always exist), the helper would return an error. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) - * Description - * Delete entry with *key* from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) - * Description - * For tracing programs, safely attempt to read *size* bytes from - * kernel space address *unsafe_ptr* and store the data in *dst*. - * - * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() - * instead. - * Return - * 0 on success, or a negative error in case of failure. - * - * u64 bpf_ktime_get_ns(void) - * Description - * Return the time elapsed since system boot, in nanoseconds. - * Return - * Current *ktime*. - * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) - * Description - * This helper is a "printk()-like" facility for debugging. It - * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if - * available. It can take up to three additional **u64** - * arguments (as an eBPF helpers, the total number of arguments is - * limited to five). - * - * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. - * The format of the trace is customizable, and the exact output - * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the - * *README* file under the same directory). However, it usually - * defaults to something like: - * - * :: - * - * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> - * - * In the above: - * - * * ``telnet`` is the name of the current task. - * * ``470`` is the PID of the current task. - * * ``001`` is the CPU number on which the task is - * running. - * * In ``.N..``, each character refers to a set of - * options (whether irqs are enabled, scheduling - * options, whether hard/softirqs are running, level of - * preempt_disabled respectively). **N** means that - * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** - * are set. - * * ``419421.045894`` is a timestamp. - * * ``0x00000001`` is a fake value used by BPF for the - * instruction pointer register. - * * ``<formatted msg>`` is the message formatted with - * *fmt*. - * - * The conversion specifiers supported by *fmt* are similar, but - * more limited than for printk(). They are **%d**, **%i**, - * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, - * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size - * of field, padding with zeroes, etc.) is available, and the - * helper will return **-EINVAL** (but print nothing) if it - * encounters an unknown specifier. - * - * Also, note that **bpf_trace_printk**\ () is slow, and should - * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and - * states that the helper should not be used "for production use" - * the first time this helper is used (or more precisely, when - * **trace_printk**\ () buffers are allocated). For passing values - * to user space, perf events should be preferred. - * Return - * The number of bytes written to the buffer, or a negative error - * in case of failure. - * - * u32 bpf_get_prandom_u32(void) - * Description - * Get a pseudo-random number. - * - * From a security point of view, this helper uses its own - * pseudo-random internal state, and cannot be used to infer the - * seed of other random functions in the kernel. However, it is - * essential to note that the generator used by the helper is not - * cryptographically secure. - * Return - * A random 32-bit unsigned value. - * - * u32 bpf_get_smp_processor_id(void) - * Description - * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the - * SMP processor id is stable during all the execution of the - * program. - * Return - * The SMP id of the processor running the program. - * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) - * Description - * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) - * Description - * Recompute the layer 3 (e.g. IP) checksum for the packet - * associated to *skb*. Computation is incremental, so the helper - * must know the former value of the header field that was - * modified (*from*), the new value of this field (*to*), and the - * number of bytes (2 or 4) for this field, stored in *size*. - * Alternatively, it is possible to store the difference between - * the previous and the new values of the header field in *to*, by - * setting *from* and *size* to 0. For both methods, *offset* - * indicates the location of the IP checksum within the packet. - * - * This helper works in combination with **bpf_csum_diff**\ (), - * which does not update the checksum in-place, but offers more - * flexibility and can handle sizes larger than 2 or 4 for the - * checksum to update. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) - * Description - * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the - * packet associated to *skb*. Computation is incremental, so the - * helper must know the former value of the header field that was - * modified (*from*), the new value of this field (*to*), and the - * number of bytes (2 or 4) for this field, stored on the lowest - * four bits of *flags*. Alternatively, it is possible to store - * the difference between the previous and the new values of the - * header field in *to*, by setting *from* and the four lowest - * bits of *flags* to 0. For both methods, *offset* indicates the - * location of the IP checksum within the packet. In addition to - * the size of the field, *flags* can be added (bitwise OR) actual - * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left - * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and - * for updates resulting in a null checksum the value is set to - * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. - * - * This helper works in combination with **bpf_csum_diff**\ (), - * which does not update the checksum in-place, but offers more - * flexibility and can handle sizes larger than 2 or 4 for the - * checksum to update. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) - * Description - * This special helper is used to trigger a "tail call", or in - * other words, to jump into another eBPF program. The same stack - * frame is used (but values on stack and in registers for the - * caller are not accessible to the callee). This mechanism allows - * for program chaining, either for raising the maximum number of - * available eBPF instructions, or to execute given programs in - * conditional blocks. For security reasons, there is an upper - * limit to the number of successive tail calls that can be - * performed. - * - * Upon call of this helper, the program attempts to jump into a - * program referenced at index *index* in *prog_array_map*, a - * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes - * *ctx*, a pointer to the context. - * - * If the call succeeds, the kernel immediately runs the first - * instruction of the new program. This is not a function call, - * and it never returns to the previous program. If the call - * fails, then the helper has no effect, and the caller continues - * to run its subsequent instructions. A call can fail if the - * destination program for the jump does not exist (i.e. *index* - * is superior to the number of entries in *prog_array_map*), or - * if the maximum number of tail calls has been reached for this - * chain of programs. This limit is defined in the kernel by the - * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) - * Description - * Clone and redirect the packet associated to *skb* to another - * net device of index *ifindex*. Both ingress and egress - * interfaces can be used for redirection. The **BPF_F_INGRESS** - * value in *flags* is used to make the distinction (ingress path - * is selected if the flag is present, egress path otherwise). - * This is the only flag supported for now. - * - * In comparison with **bpf_redirect**\ () helper, - * **bpf_clone_redirect**\ () has the associated cost of - * duplicating the packet buffer, but this can be executed out of - * the eBPF program. Conversely, **bpf_redirect**\ () is more - * efficient, but it is handled through an action code where the - * redirection happens only after the eBPF program has returned. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * u64 bpf_get_current_pid_tgid(void) - * Return - * A 64-bit integer containing the current tgid and pid, and - * created as such: - * *current_task*\ **->tgid << 32 \|** - * *current_task*\ **->pid**. - * - * u64 bpf_get_current_uid_gid(void) - * Return - * A 64-bit integer containing the current GID and UID, and - * created as such: *current_gid* **<< 32 \|** *current_uid*. - * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) - * Description - * Copy the **comm** attribute of the current task into *buf* of - * *size_of_buf*. The **comm** attribute contains the name of - * the executable (excluding the path) for the current task. The - * *size_of_buf* must be strictly positive. On success, the - * helper makes sure that the *buf* is NUL-terminated. On failure, - * it is filled with zeroes. - * Return - * 0 on success, or a negative error in case of failure. - * - * u32 bpf_get_cgroup_classid(struct sk_buff *skb) - * Description - * Retrieve the classid for the current task, i.e. for the net_cls - * cgroup to which *skb* belongs. - * - * This helper can be used on TC egress path, but not on ingress. - * - * The net_cls cgroup provides an interface to tag network packets - * based on a user-provided identifier for all traffic coming from - * the tasks belonging to the related cgroup. See also the related - * kernel documentation, available from the Linux sources in file - * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. - * - * The Linux kernel has two versions for cgroups: there are - * cgroups v1 and cgroups v2. Both are available to users, who can - * use a mixture of them, but note that the net_cls cgroup is for - * cgroup v1 only. This makes it incompatible with BPF programs - * run on cgroups, which is a cgroup-v2-only feature (a socket can - * only hold data for one version of cgroups at a time). - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to - * "**y**" or to "**m**". - * Return - * The classid, or 0 for the default unconfigured classid. - * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) - * Description - * Push a *vlan_tci* (VLAN tag control information) of protocol - * *vlan_proto* to the packet associated to *skb*, then update - * the checksum. Note that if *vlan_proto* is different from - * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to - * be **ETH_P_8021Q**. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_vlan_pop(struct sk_buff *skb) - * Description - * Pop a VLAN header from the packet associated to *skb*. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) - * Description - * Get tunnel metadata. This helper takes a pointer *key* to an - * empty **struct bpf_tunnel_key** of **size**, that will be - * filled with tunnel metadata for the packet associated to *skb*. - * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which - * indicates that the tunnel is based on IPv6 protocol instead of - * IPv4. - * - * The **struct bpf_tunnel_key** is an object that generalizes the - * principal parameters used by various tunneling protocols into a - * single struct. This way, it can be used to easily make a - * decision based on the contents of the encapsulation header, - * "summarized" in this struct. In particular, it holds the IP - * address of the remote end (IPv4 or IPv6, depending on the case) - * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, - * this struct exposes the *key*\ **->tunnel_id**, which is - * generally mapped to a VNI (Virtual Network Identifier), making - * it programmable together with the **bpf_skb_set_tunnel_key**\ - * () helper. - * - * Let's imagine that the following code is part of a program - * attached to the TC ingress interface, on one end of a GRE - * tunnel, and is supposed to filter out all messages coming from - * remote ends with IPv4 address other than 10.0.0.1: - * - * :: - * - * int ret; - * struct bpf_tunnel_key key = {}; - * - * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - * if (ret < 0) - * return TC_ACT_SHOT; // drop packet - * - * if (key.remote_ipv4 != 0x0a000001) - * return TC_ACT_SHOT; // drop packet - * - * return TC_ACT_OK; // accept packet - * - * This interface can also be used with all encapsulation devices - * that can operate in "collect metadata" mode: instead of having - * one network device per specific configuration, the "collect - * metadata" mode only requires a single device where the - * configuration can be extracted from this helper. - * - * This can be used together with various tunnels such as VXLan, - * Geneve, GRE or IP in IP (IPIP). - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) - * Description - * Populate tunnel metadata for packet associated to *skb.* The - * tunnel metadata is set to the contents of *key*, of *size*. The - * *flags* can be set to a combination of the following values: - * - * **BPF_F_TUNINFO_IPV6** - * Indicate that the tunnel is based on IPv6 protocol - * instead of IPv4. - * **BPF_F_ZERO_CSUM_TX** - * For IPv4 packets, add a flag to tunnel metadata - * indicating that checksum computation should be skipped - * and checksum set to zeroes. - * **BPF_F_DONT_FRAGMENT** - * Add a flag to tunnel metadata indicating that the - * packet should not be fragmented. - * **BPF_F_SEQ_NUMBER** - * Add a flag to tunnel metadata indicating that a - * sequence number should be added to tunnel header before - * sending the packet. This flag was added for GRE - * encapsulation, but might be used with other protocols - * as well in the future. - * - * Here is a typical usage on the transmit path: - * - * :: - * - * struct bpf_tunnel_key key; - * populate key ... - * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); - * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); - * - * See also the description of the **bpf_skb_get_tunnel_key**\ () - * helper for additional information. - * Return - * 0 on success, or a negative error in case of failure. - * - * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) - * Description - * Read the value of a perf event counter. This helper relies on a - * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of - * the perf event counter is selected when *map* is updated with - * perf event file descriptors. The *map* is an array whose size - * is the number of available CPUs, and each cell contains a value - * relative to one CPU. The value to retrieve is indicated by - * *flags*, that contains the index of the CPU to look up, masked - * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to - * **BPF_F_CURRENT_CPU** to indicate that the value for the - * current CPU should be retrieved. - * - * Note that before Linux 4.13, only hardware perf event can be - * retrieved. - * - * Also, be aware that the newer helper - * **bpf_perf_event_read_value**\ () is recommended over - * **bpf_perf_event_read**\ () in general. The latter has some ABI - * quirks where error and counter value are used as a return code - * (which is wrong to do since ranges may overlap). This issue is - * fixed with **bpf_perf_event_read_value**\ (), which at the same - * time provides more features over the **bpf_perf_event_read**\ - * () interface. Please refer to the description of - * **bpf_perf_event_read_value**\ () for details. - * Return - * The value of the perf event counter read from the map, or a - * negative error code in case of failure. - * - * int bpf_redirect(u32 ifindex, u64 flags) - * Description - * Redirect the packet to another net device of index *ifindex*. - * This helper is somewhat similar to **bpf_clone_redirect**\ - * (), except that the packet is not cloned, which provides - * increased performance. - * - * Except for XDP, both ingress and egress interfaces can be used - * for redirection. The **BPF_F_INGRESS** value in *flags* is used - * to make the distinction (ingress path is selected if the flag - * is present, egress path otherwise). Currently, XDP only - * supports redirection to the egress interface, and accepts no - * flag at all. - * - * The same effect can be attained with the more generic - * **bpf_redirect_map**\ (), which requires specific maps to be - * used but offers better performance. - * Return - * For XDP, the helper returns **XDP_REDIRECT** on success or - * **XDP_ABORTED** on error. For other program types, the values - * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on - * error. - * - * u32 bpf_get_route_realm(struct sk_buff *skb) - * Description - * Retrieve the realm or the route, that is to say the - * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the - * one used with the net_cls cgroup (see description for - * **bpf_get_cgroup_classid**\ () helper), but here this tag is - * held by a route (a destination entry), not by a task. - * - * Retrieving this identifier works with the clsact TC egress hook - * (see also **tc-bpf(8)**), or alternatively on conventional - * classful egress qdiscs, but not on TC ingress path. In case of - * clsact TC egress hook, this has the advantage that, internally, - * the destination entry has not been dropped yet in the transmit - * path. Therefore, the destination entry does not need to be - * artificially held via **netif_keep_dst**\ () for a classful - * qdisc until the *skb* is freed. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_IP_ROUTE_CLASSID** configuration option. - * Return - * The realm of the route for the packet associated to *skb*, or 0 - * if none was found. - * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) - * Description - * Write raw *data* blob into a special BPF perf event held by - * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf - * event must have the following attributes: **PERF_SAMPLE_RAW** - * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and - * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. - * - * The *flags* are used to indicate the index in *map* for which - * the value must be put, masked with **BPF_F_INDEX_MASK**. - * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** - * to indicate that the index of the current CPU core should be - * used. - * - * The value to write, of *size*, is passed through eBPF stack and - * pointed by *data*. - * - * The context of the program *ctx* needs also be passed to the - * helper. - * - * On user space, a program willing to read the values needs to - * call **perf_event_open**\ () on the perf event (either for - * one or for all CPUs) and to store the file descriptor into the - * *map*. This must be done before the eBPF program can send data - * into it. An example is available in file - * *samples/bpf/trace_output_user.c* in the Linux kernel source - * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). - * - * **bpf_perf_event_output**\ () achieves better performance - * than **bpf_trace_printk**\ () for sharing data with user - * space, and is much better suitable for streaming data from eBPF - * programs. - * - * Note that this helper is not restricted to tracing use cases - * and can be used with programs attached to TC or XDP as well, - * where it allows for passing data to user space listeners. Data - * can be: - * - * * Only custom structs, - * * Only the packet payload, or - * * A combination of both. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) - * Description - * This helper was provided as an easy way to load data from a - * packet. It can be used to load *len* bytes from *offset* from - * the packet associated to *skb*, into the buffer pointed by - * *to*. - * - * Since Linux 4.7, usage of this helper has mostly been replaced - * by "direct packet access", enabling packet data to be - * manipulated with *skb*\ **->data** and *skb*\ **->data_end** - * pointing respectively to the first byte of packet data and to - * the byte after the last byte of packet data. However, it - * remains useful if one wishes to read large quantities of data - * at once from a packet into the eBPF stack. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) - * Description - * Walk a user or a kernel stack and return its id. To achieve - * this, the helper needs *ctx*, which is a pointer to the context - * on which the tracing program is executed, and a pointer to a - * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. - * - * The last argument, *flags*, holds the number of stack frames to - * skip (from 0 to 255), masked with - * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set - * a combination of the following flags: - * - * **BPF_F_USER_STACK** - * Collect a user space stack instead of a kernel stack. - * **BPF_F_FAST_STACK_CMP** - * Compare stacks by hash only. - * **BPF_F_REUSE_STACKID** - * If two different stacks hash into the same *stackid*, - * discard the old one. - * - * The stack id retrieved is a 32 bit long integer handle which - * can be further combined with other data (including other stack - * ids) and used as a key into maps. This can be useful for - * generating a variety of graphs (such as flame graphs or off-cpu - * graphs). - * - * For walking a stack, this helper is an improvement over - * **bpf_probe_read**\ (), which can be used with unrolled loops - * but is not efficient and consumes a lot of eBPF instructions. - * Instead, **bpf_get_stackid**\ () can collect up to - * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that - * this limit can be controlled with the **sysctl** program, and - * that it should be manually increased in order to profile long - * user stacks (such as stacks for Java programs). To do so, use: - * - * :: - * - * # sysctl kernel.perf_event_max_stack=<new value> - * Return - * The positive or null stack id on success, or a negative error - * in case of failure. - * - * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) - * Description - * Compute a checksum difference, from the raw buffer pointed by - * *from*, of length *from_size* (that must be a multiple of 4), - * towards the raw buffer pointed by *to*, of size *to_size* - * (same remark). An optional *seed* can be added to the value - * (this can be cascaded, the seed may come from a previous call - * to the helper). - * - * This is flexible enough to be used in several ways: - * - * * With *from_size* == 0, *to_size* > 0 and *seed* set to - * checksum, it can be used when pushing new data. - * * With *from_size* > 0, *to_size* == 0 and *seed* set to - * checksum, it can be used when removing data from a packet. - * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it - * can be used to compute a diff. Note that *from_size* and - * *to_size* do not need to be equal. - * - * This helper can be used in combination with - * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to - * which one can feed in the difference computed with - * **bpf_csum_diff**\ (). - * Return - * The checksum result, or a negative error code in case of - * failure. - * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) - * Description - * Retrieve tunnel options metadata for the packet associated to - * *skb*, and store the raw tunnel option data to the buffer *opt* - * of *size*. - * - * This helper can be used with encapsulation devices that can - * operate in "collect metadata" mode (please refer to the related - * note in the description of **bpf_skb_get_tunnel_key**\ () for - * more details). A particular example where this can be used is - * in combination with the Geneve encapsulation protocol, where it - * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) - * and retrieving arbitrary TLVs (Type-Length-Value headers) from - * the eBPF program. This allows for full customization of these - * headers. - * Return - * The size of the option data retrieved. - * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) - * Description - * Set tunnel options metadata for the packet associated to *skb* - * to the option data contained in the raw buffer *opt* of *size*. - * - * See also the description of the **bpf_skb_get_tunnel_opt**\ () - * helper for additional information. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) - * Description - * Change the protocol of the *skb* to *proto*. Currently - * supported are transition from IPv4 to IPv6, and from IPv6 to - * IPv4. The helper takes care of the groundwork for the - * transition, including resizing the socket buffer. The eBPF - * program is expected to fill the new headers, if any, via - * **skb_store_bytes**\ () and to recompute the checksums with - * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ - * (). The main case for this helper is to perform NAT64 - * operations out of an eBPF program. - * - * Internally, the GSO type is marked as dodgy so that headers are - * checked and segments are recalculated by the GSO/GRO engine. - * The size for GSO target is adapted as well. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) - * Description - * Change the packet type for the packet associated to *skb*. This - * comes down to setting *skb*\ **->pkt_type** to *type*, except - * the eBPF program does not have a write access to *skb*\ - * **->pkt_type** beside this helper. Using a helper here allows - * for graceful handling of errors. - * - * The major use case is to change incoming *skb*s to - * **PACKET_HOST** in a programmatic way instead of having to - * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for - * example. - * - * Note that *type* only allows certain values. At this time, they - * are: - * - * **PACKET_HOST** - * Packet is for us. - * **PACKET_BROADCAST** - * Send packet to all. - * **PACKET_MULTICAST** - * Send packet to group. - * **PACKET_OTHERHOST** - * Send packet to someone else. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) - * Description - * Check whether *skb* is a descendant of the cgroup2 held by - * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. - * Return - * The return value depends on the result of the test, and can be: - * - * * 0, if the *skb* failed the cgroup2 descendant test. - * * 1, if the *skb* succeeded the cgroup2 descendant test. - * * A negative error code, if an error occurred. - * - * u32 bpf_get_hash_recalc(struct sk_buff *skb) - * Description - * Retrieve the hash of the packet, *skb*\ **->hash**. If it is - * not set, in particular if the hash was cleared due to mangling, - * recompute this hash. Later accesses to the hash can be done - * directly with *skb*\ **->hash**. - * - * Calling **bpf_set_hash_invalid**\ (), changing a packet - * prototype with **bpf_skb_change_proto**\ (), or calling - * **bpf_skb_store_bytes**\ () with the - * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear - * the hash and to trigger a new computation for the next call to - * **bpf_get_hash_recalc**\ (). - * Return - * The 32-bit hash. - * - * u64 bpf_get_current_task(void) - * Return - * A pointer to the current task struct. - * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) - * Description - * Attempt in a safe way to write *len* bytes from the buffer - * *src* to *dst* in memory. It only works for threads that are in - * user context, and *dst* must be a valid user space address. - * - * This helper should not be used to implement any kind of - * security mechanism because of TOC-TOU attacks, but rather to - * debug, divert, and manipulate execution of semi-cooperative - * processes. - * - * Keep in mind that this feature is meant for experiments, and it - * has a risk of crashing the system and running programs. - * Therefore, when an eBPF program using this helper is attached, - * a warning including PID and process name is printed to kernel - * logs. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) - * Description - * Check whether the probe is being run is the context of a given - * subset of the cgroup2 hierarchy. The cgroup2 to test is held by - * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. - * Return - * The return value depends on the result of the test, and can be: - * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. - * * A negative error code, if an error occurred. - * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) - * Description - * Resize (trim or grow) the packet associated to *skb* to the - * new *len*. The *flags* are reserved for future usage, and must - * be left at zero. - * - * The basic idea is that the helper performs the needed work to - * change the size of the packet, then the eBPF program rewrites - * the rest via helpers like **bpf_skb_store_bytes**\ (), - * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () - * and others. This helper is a slow path utility intended for - * replies with control messages. And because it is targeted for - * slow path, the helper itself can afford to be slow: it - * implicitly linearizes, unclones and drops offloads from the - * *skb*. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) - * Description - * Pull in non-linear data in case the *skb* is non-linear and not - * all of *len* are part of the linear section. Make *len* bytes - * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. - * - * This helper is only needed for reading and writing with direct - * packet access. - * - * For direct packet access, testing that offsets to access - * are within packet boundaries (test on *skb*\ **->data_end**) is - * susceptible to fail if offsets are invalid, or if the requested - * data is in non-linear parts of the *skb*. On failure the - * program can just bail out, or in the case of a non-linear - * buffer, use a helper to make the data available. The - * **bpf_skb_load_bytes**\ () helper is a first solution to access - * the data. Another one consists in using **bpf_skb_pull_data** - * to pull in once the non-linear parts, then retesting and - * eventually access the data. - * - * At the same time, this also makes sure the *skb* is uncloned, - * which is a necessary condition for direct write. As this needs - * to be an invariant for the write part only, the verifier - * detects writes and adds a prologue that is calling - * **bpf_skb_pull_data()** to effectively unclone the *skb* from - * the very beginning in case it is indeed cloned. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) - * Description - * Add the checksum *csum* into *skb*\ **->csum** in case the - * driver has supplied a checksum for the entire packet into that - * field. Return an error otherwise. This helper is intended to be - * used in combination with **bpf_csum_diff**\ (), in particular - * when the checksum needs to be updated after data has been - * written into the packet through direct packet access. - * Return - * The checksum on success, or a negative error code in case of - * failure. - * - * void bpf_set_hash_invalid(struct sk_buff *skb) - * Description - * Invalidate the current *skb*\ **->hash**. It can be used after - * mangling on headers through direct packet access, in order to - * indicate that the hash is outdated and to trigger a - * recalculation the next time the kernel tries to access this - * hash or when the **bpf_get_hash_recalc**\ () helper is called. - * - * int bpf_get_numa_node_id(void) - * Description - * Return the id of the current NUMA node. The primary use case - * for this helper is the selection of sockets for the local NUMA - * node, when the program is attached to sockets using the - * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), - * but the helper is also available to other eBPF program types, - * similarly to **bpf_get_smp_processor_id**\ (). - * Return - * The id of current NUMA node. - * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) - * Description - * Grows headroom of packet associated to *skb* and adjusts the - * offset of the MAC header accordingly, adding *len* bytes of - * space. It automatically extends and reallocates memory as - * required. - * - * This helper can be used on a layer 3 *skb* to push a MAC header - * for redirection into a layer 2 device. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) - * Description - * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that - * it is possible to use a negative value for *delta*. This helper - * can be used to prepare the packet for pushing or popping - * headers. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) - * Description - * Copy a NUL terminated string from an unsafe kernel address - * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for - * more details. - * - * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() - * instead. - * Return - * On success, the strictly positive length of the string, - * including the trailing NUL character. On error, a negative - * value. - * - * u64 bpf_get_socket_cookie(struct sk_buff *skb) - * Description - * If the **struct sk_buff** pointed by *skb* has a known socket, - * retrieve the cookie (generated by the kernel) of this socket. - * If no cookie has been set yet, generate a new cookie. Once - * generated, the socket cookie remains stable for the life of the - * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a global socket - * identifier that can be assumed unique. - * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. - * - * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) - * Description - * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** context. - * Return - * A 8-byte long non-decreasing number. - * - * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) - * Description - * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** context. - * Return - * A 8-byte long non-decreasing number. - * - * u32 bpf_get_socket_uid(struct sk_buff *skb) - * Return - * The owner UID of the socket associated to *skb*. If the socket - * is **NULL**, or if it is not a full socket (i.e. if it is a - * time-wait or a request socket instead), **overflowuid** value - * is returned (note that **overflowuid** might also be the actual - * UID value for the socket). - * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) - * Description - * Set the full hash for *skb* (set the field *skb*\ **->hash**) - * to value *hash*. - * Return - * 0 - * - * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) - * Description - * Emulate a call to **setsockopt()** on the socket associated to - * *bpf_socket*, which must be a full socket. The *level* at - * which the option resides and the name *optname* of the option - * must be specified, see **setsockopt(2)** for more information. - * The option value of length *optlen* is pointed by *optval*. - * - * This helper actually implements a subset of **setsockopt()**. - * It supports the following *level*\ s: - * - * * **SOL_SOCKET**, which supports the following *optname*\ s: - * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. - * * **IPPROTO_TCP**, which supports the following *optname*\ s: - * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) - * Description - * Grow or shrink the room for data in the packet associated to - * *skb* by *len_diff*, and according to the selected *mode*. - * - * There are two supported modes at this time: - * - * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). - * - * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). - * - * The following flags are supported at this time: - * - * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. - * Adjusting mss in this way is not allowed for datagrams. - * - * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, - * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: - * Any new space is reserved to hold a tunnel header. - * Configure skb offsets and other fields accordingly. - * - * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, - * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: - * Use with ENCAP_L3 flags to further specify the tunnel type. - * - * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): - * Use with ENCAP_L3/L4 flags to further specify the tunnel - * type; *len* is the length of the inner MAC header. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) - * Description - * Redirect the packet to the endpoint referenced by *map* at - * index *key*. Depending on its type, this *map* can contain - * references to net devices (for forwarding packets through other - * ports), or to CPUs (for redirecting XDP frames to another CPU; - * but this is only implemented for native XDP (with driver - * support) as of this writing). - * - * The lower two bits of *flags* are used as the return code if - * the map lookup fails. This is so that the return value can be - * one of the XDP program return codes up to XDP_TX, as chosen by - * the caller. Any higher bits in the *flags* argument must be - * unset. - * - * When used to redirect packets to net devices, this helper - * provides a high performance increase over **bpf_redirect**\ (). - * This is due to various implementation details of the underlying - * mechanisms, one of which is the fact that **bpf_redirect_map**\ - * () tries to send packet as a "bulk" to the device. - * Return - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. - * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) - * Description - * Redirect the packet to the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress path otherwise). This is the only flag supported for now. - * Return - * **SK_PASS** on success, or **SK_DROP** on error. - * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) - * Description - * Add an entry to, or update a *map* referencing sockets. The - * *skops* is used as a new value for the entry associated to - * *key*. *flags* is one of: - * - * **BPF_NOEXIST** - * The entry for *key* must not exist in the map. - * **BPF_EXIST** - * The entry for *key* must already exist in the map. - * **BPF_ANY** - * No condition on the existence of the entry for *key*. - * - * If the *map* has eBPF programs (parser and verdict), those will - * be inherited by the socket being added. If the socket is - * already attached to eBPF programs, this results in an error. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) - * Description - * Adjust the address pointed by *xdp_md*\ **->data_meta** by - * *delta* (which can be positive or negative). Note that this - * operation modifies the address stored in *xdp_md*\ **->data**, - * so the latter must be loaded only after the helper has been - * called. - * - * The use of *xdp_md*\ **->data_meta** is optional and programs - * are not required to use it. The rationale is that when the - * packet is processed with XDP (e.g. as DoS filter), it is - * possible to push further meta data along with it before passing - * to the stack, and to give the guarantee that an ingress eBPF - * program attached as a TC classifier on the same device can pick - * this up for further post-processing. Since TC works with socket - * buffers, it remains possible to set from XDP the **mark** or - * **priority** pointers, or other pointers for the socket buffer. - * Having this scratch space generic and programmable allows for - * more flexibility as the user is free to store whatever meta - * data they need. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) - * Description - * Read the value of a perf event counter, and store it into *buf* - * of size *buf_size*. This helper relies on a *map* of type - * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event - * counter is selected when *map* is updated with perf event file - * descriptors. The *map* is an array whose size is the number of - * available CPUs, and each cell contains a value relative to one - * CPU. The value to retrieve is indicated by *flags*, that - * contains the index of the CPU to look up, masked with - * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to - * **BPF_F_CURRENT_CPU** to indicate that the value for the - * current CPU should be retrieved. - * - * This helper behaves in a way close to - * **bpf_perf_event_read**\ () helper, save that instead of - * just returning the value observed, it fills the *buf* - * structure. This allows for additional data to be retrieved: in - * particular, the enabled and running times (in *buf*\ - * **->enabled** and *buf*\ **->running**, respectively) are - * copied. In general, **bpf_perf_event_read_value**\ () is - * recommended over **bpf_perf_event_read**\ (), which has some - * ABI issues and provides fewer functionalities. - * - * These values are interesting, because hardware PMU (Performance - * Monitoring Unit) counters are limited resources. When there are - * more PMU based perf events opened than available counters, - * kernel will multiplex these events so each event gets certain - * percentage (but not all) of the PMU time. In case that - * multiplexing happens, the number of samples or counter value - * will not reflect the case compared to when no multiplexing - * occurs. This makes comparison between different runs difficult. - * Typically, the counter value should be normalized before - * comparing to other experiments. The usual normalization is done - * as follows. - * - * :: - * - * normalized_counter = counter * t_enabled / t_running - * - * Where t_enabled is the time enabled for event and t_running is - * the time running for event since last normalization. The - * enabled and running times are accumulated since the perf event - * open. To achieve scaling factor between two invocations of an - * eBPF program, users can can use CPU id as the key (which is - * typical for perf array usage model) to remember the previous - * value and do the calculation inside the eBPF program. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) - * Description - * For en eBPF program attached to a perf event, retrieve the - * value of the event counter associated to *ctx* and store it in - * the structure pointed by *buf* and of size *buf_size*. Enabled - * and running times are also stored in the structure (see - * description of helper **bpf_perf_event_read_value**\ () for - * more details). - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) - * Description - * Emulate a call to **getsockopt()** on the socket associated to - * *bpf_socket*, which must be a full socket. The *level* at - * which the option resides and the name *optname* of the option - * must be specified, see **getsockopt(2)** for more information. - * The retrieved value is stored in the structure pointed by - * *opval* and of length *optlen*. - * - * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_override_return(struct pt_regs *regs, u64 rc) - * Description - * Used for error injection, this helper uses kprobes to override - * the return value of the probed function, and to set it to *rc*. - * The first argument is the context *regs* on which the kprobe - * works. - * - * This helper works by setting setting the PC (program counter) - * to an override function which is run in place of the original - * probed function. This means the probed function is not run at - * all. The replacement function just returns with the required - * value. - * - * This helper has security implications, and thus is subject to - * restrictions. It is only available if the kernel was compiled - * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration - * option, and in this case it only works on functions tagged with - * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. - * Return - * 0 - * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) - * Description - * Attempt to set the value of the **bpf_sock_ops_cb_flags** field - * for the full TCP socket associated to *bpf_sock_ops* to - * *argval*. - * - * The primary use of this field is to determine if there should - * be calls to eBPF programs of type - * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP - * code. A program of the same type can change its value, per - * connection and as necessary, when the connection is - * established. This field is directly accessible for reading, but - * this helper must be used for updates in order to return an - * error if an eBPF program tries to set a callback that is not - * supported in the current kernel. - * - * *argval* is a flag array which can combine these flags: - * - * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) - * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) - * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) - * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) - * - * Therefore, this function can be used to clear a callback flag by - * setting the appropriate bit to zero. e.g. to disable the RTO - * callback: - * - * **bpf_sock_ops_cb_flags_set(bpf_sock,** - * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** - * - * Here are some examples of where one could call such eBPF - * program: - * - * * When RTO fires. - * * When a packet is retransmitted. - * * When the connection terminates. - * * When a packet is sent. - * * When a packet is received. - * Return - * Code **-EINVAL** if the socket is not a full TCP socket; - * otherwise, a positive number containing the bits that could not - * be set is returned (which comes down to 0 if all bits were set - * as required). - * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) - * Description - * This helper is used in programs implementing policies at the - * socket level. If the message *msg* is allowed to pass (i.e. if - * the verdict eBPF program returns **SK_PASS**), redirect it to - * the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress path otherwise). This is the only flag supported for now. - * Return - * **SK_PASS** on success, or **SK_DROP** on error. - * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) - * Description - * For socket policies, apply the verdict of the eBPF program to - * the next *bytes* (number of bytes) of message *msg*. - * - * For example, this helper can be used in the following cases: - * - * * A single **sendmsg**\ () or **sendfile**\ () system call - * contains multiple logical messages that the eBPF program is - * supposed to read and for which it should apply a verdict. - * * An eBPF program only cares to read the first *bytes* of a - * *msg*. If the message has a large payload, then setting up - * and calling the eBPF program repeatedly for all bytes, even - * though the verdict is already known, would create unnecessary - * overhead. - * - * When called from within an eBPF program, the helper sets a - * counter internal to the BPF infrastructure, that is used to - * apply the last verdict to the next *bytes*. If *bytes* is - * smaller than the current data being processed from a - * **sendmsg**\ () or **sendfile**\ () system call, the first - * *bytes* will be sent and the eBPF program will be re-run with - * the pointer for start of data pointing to byte number *bytes* - * **+ 1**. If *bytes* is larger than the current data being - * processed, then the eBPF verdict will be applied to multiple - * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are - * consumed. - * - * Note that if a socket closes with the internal counter holding - * a non-zero value, this is not a problem because data is not - * being buffered for *bytes* and is sent as it is received. - * Return - * 0 - * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) - * Description - * For socket policies, prevent the execution of the verdict eBPF - * program for message *msg* until *bytes* (byte number) have been - * accumulated. - * - * This can be used when one needs a specific number of bytes - * before a verdict can be assigned, even if the data spans - * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme - * case would be a user calling **sendmsg**\ () repeatedly with - * 1-byte long message segments. Obviously, this is bad for - * performance, but it is still valid. If the eBPF program needs - * *bytes* bytes to validate a header, this helper can be used to - * prevent the eBPF program to be called again until *bytes* have - * been accumulated. - * Return - * 0 - * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) - * Description - * For socket policies, pull in non-linear data from user space - * for *msg* and set pointers *msg*\ **->data** and *msg*\ - * **->data_end** to *start* and *end* bytes offsets into *msg*, - * respectively. - * - * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it can only parse data that the (**data**, **data_end**) - * pointers have already consumed. For **sendmsg**\ () hooks this - * is likely the first scatterlist element. But for calls relying - * on the **sendpage** handler (e.g. **sendfile**\ ()) this will - * be the range (**0**, **0**) because the data is shared with - * user space and by default the objective is to avoid allowing - * user space to modify data while (or after) eBPF verdict is - * being decided. This helper can be used to pull in data and to - * set the start and end pointer to given values. Data will be - * copied if necessary (i.e. if data was not linear and if start - * and end pointers do not point to the same chunk). - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) - * Description - * Bind the socket associated to *ctx* to the address pointed by - * *addr*, of length *addr_len*. This allows for making outgoing - * connection from the desired IP address, which can be useful for - * example when all processes inside a cgroup should use one - * single IP address on a host that has multiple IP configured. - * - * This helper works for IPv4 and IPv6, TCP and UDP sockets. The - * domain (*addr*\ **->sa_family**) must be **AF_INET** (or - * **AF_INET6**). Looking for a free port to bind to can be - * expensive, therefore binding to port is not permitted by the - * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) - * must be set to zero. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) - * Description - * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is - * only possible to shrink the packet as of this writing, - * therefore *delta* must be a negative integer. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) - * Description - * Retrieve the XFRM state (IP transform framework, see also - * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. - * - * The retrieved value is stored in the **struct bpf_xfrm_state** - * pointed by *xfrm_state* and of length *size*. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_XFRM** configuration option. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) - * Description - * Return a user or a kernel stack in bpf program provided buffer. - * To achieve this, the helper needs *ctx*, which is a pointer - * to the context on which the tracing program is executed. - * To store the stacktrace, the bpf program provides *buf* with - * a nonnegative *size*. - * - * The last argument, *flags*, holds the number of stack frames to - * skip (from 0 to 255), masked with - * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set - * the following flags: - * - * **BPF_F_USER_STACK** - * Collect a user space stack instead of a kernel stack. - * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. - * - * **bpf_get_stack**\ () can collect up to - * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject - * to sufficient large buffer size. Note that - * this limit can be controlled with the **sysctl** program, and - * that it should be manually increased in order to profile long - * user stacks (such as stacks for Java programs). To do so, use: - * - * :: - * - * # sysctl kernel.perf_event_max_stack=<new value> - * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. - * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) - * Description - * This helper is similar to **bpf_skb_load_bytes**\ () in that - * it provides an easy way to load *len* bytes from *offset* - * from the packet associated to *skb*, into the buffer pointed - * by *to*. The difference to **bpf_skb_load_bytes**\ () is that - * a fifth argument *start_header* exists in order to select a - * base offset to start from. *start_header* can be one of: - * - * **BPF_HDR_START_MAC** - * Base offset to load data from is *skb*'s mac header. - * **BPF_HDR_START_NET** - * Base offset to load data from is *skb*'s network header. - * - * In general, "direct packet access" is the preferred method to - * access packet data, however, this helper is in particular useful - * in socket filters where *skb*\ **->data** does not always point - * to the start of the mac header and where "direct packet access" - * is not available. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) - * Description - * Do FIB lookup in kernel tables using parameters in *params*. - * If lookup is successful and result shows packet is to be - * forwarded, the neighbor tables are searched for the nexthop. - * If successful (ie., FIB lookup shows forwarding and nexthop - * is resolved), the nexthop address is returned in ipv4_dst - * or ipv6_dst based on family, smac is set to mac address of - * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only), and ifindex - * is set to the device index of the nexthop from the FIB lookup. - * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: - * - * **BPF_FIB_LOOKUP_DIRECT** - * Do a direct table lookup vs full lookup using FIB - * rules. - * **BPF_FIB_LOOKUP_OUTPUT** - * Perform lookup from an egress perspective (default is - * ingress). - * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return - * * < 0 if any input argument is invalid - * * 0 on success (packet is forwarded, nexthop neighbor exists) - * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the - * packet is not forwarded or needs assist from full stack - * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) - * Description - * Add an entry to, or update a sockhash *map* referencing sockets. - * The *skops* is used as a new value for the entry associated to - * *key*. *flags* is one of: - * - * **BPF_NOEXIST** - * The entry for *key* must not exist in the map. - * **BPF_EXIST** - * The entry for *key* must already exist in the map. - * **BPF_ANY** - * No condition on the existence of the entry for *key*. - * - * If the *map* has eBPF programs (parser and verdict), those will - * be inherited by the socket being added. If the socket is - * already attached to eBPF programs, this results in an error. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) - * Description - * This helper is used in programs implementing policies at the - * socket level. If the message *msg* is allowed to pass (i.e. if - * the verdict eBPF program returns **SK_PASS**), redirect it to - * the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress path otherwise). This is the only flag supported for now. - * Return - * **SK_PASS** on success, or **SK_DROP** on error. - * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) - * Description - * This helper is used in programs implementing policies at the - * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it - * to the socket referenced by *map* (of type - * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and - * egress interfaces can be used for redirection. The - * **BPF_F_INGRESS** value in *flags* is used to make the - * distinction (ingress path is selected if the flag is present, - * egress otherwise). This is the only flag supported for now. - * Return - * **SK_PASS** on success, or **SK_DROP** on error. - * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) - * Description - * Encapsulate the packet associated to *skb* within a Layer 3 - * protocol header. This header is provided in the buffer at - * address *hdr*, with *len* its size in bytes. *type* indicates - * the protocol of the header and can be one of: - * - * **BPF_LWT_ENCAP_SEG6** - * IPv6 encapsulation with Segment Routing Header - * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, - * the IPv6 header is computed by the kernel. - * **BPF_LWT_ENCAP_SEG6_INLINE** - * Only works if *skb* contains an IPv6 packet. Insert a - * Segment Routing Header (**struct ipv6_sr_hdr**) inside - * the IPv6 header. - * **BPF_LWT_ENCAP_IP** - * IP encapsulation (GRE/GUE/IPIP/etc). The outer header - * must be IPv4 or IPv6, followed by zero or more - * additional headers, up to **LWT_BPF_MAX_HEADROOM** - * total bytes in all prepended headers. Please note that - * if **skb_is_gso**\ (*skb*) is true, no more than two - * headers can be prepended, and the inner header, if - * present, should be either GRE or UDP/GUE. - * - * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs - * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can - * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and - * **BPF_PROG_TYPE_LWT_XMIT**. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) - * Description - * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. Only the flags, tag and TLVs - * inside the outermost IPv6 Segment Routing Header can be - * modified through this helper. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) - * Description - * Adjust the size allocated to TLVs in the outermost IPv6 - * Segment Routing Header contained in the packet associated to - * *skb*, at position *offset* by *delta* bytes. Only offsets - * after the segments are accepted. *delta* can be as well - * positive (growing) as negative (shrinking). - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) - * Description - * Apply an IPv6 Segment Routing action of type *action* to the - * packet associated to *skb*. Each action takes a parameter - * contained at address *param*, and of length *param_len* bytes. - * *action* can be one of: - * - * **SEG6_LOCAL_ACTION_END_X** - * End.X action: Endpoint with Layer-3 cross-connect. - * Type of *param*: **struct in6_addr**. - * **SEG6_LOCAL_ACTION_END_T** - * End.T action: Endpoint with specific IPv6 table lookup. - * Type of *param*: **int**. - * **SEG6_LOCAL_ACTION_END_B6** - * End.B6 action: Endpoint bound to an SRv6 policy. - * Type of *param*: **struct ipv6_sr_hdr**. - * **SEG6_LOCAL_ACTION_END_B6_ENCAP** - * End.B6.Encap action: Endpoint bound to an SRv6 - * encapsulation policy. - * Type of *param*: **struct ipv6_sr_hdr**. - * - * A call to this helper is susceptible to change the underlying - * packet buffer. Therefore, at load time, all checks on pointers - * previously done by the verifier are invalidated and must be - * performed again, if the helper is used in combination with - * direct packet access. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_rc_repeat(void *ctx) - * Description - * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. - * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. - * - * The *ctx* should point to the lirc sample as passed into - * the program. - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_BPF_LIRC_MODE2** configuration option set to - * "**y**". - * Return - * 0 - * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) - * Description - * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). - * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. - * - * The *ctx* should point to the lirc sample as passed into - * the program. - * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_BPF_LIRC_MODE2** configuration option set to - * "**y**". - * Return - * 0 - * - * u64 bpf_skb_cgroup_id(struct sk_buff *skb) - * Description - * Return the cgroup v2 id of the socket associated with the *skb*. - * This is roughly similar to the **bpf_get_cgroup_classid**\ () - * helper for cgroup v1 by providing a tag resp. identifier that - * can be matched on or used for map lookups e.g. to implement - * policy. The cgroup v2 id of a given path in the hierarchy is - * exposed in user space through the f_handle API in order to get - * to the same 64-bit id. - * - * This helper can be used on TC egress path, but not on ingress, - * and is available only if the kernel was compiled with the - * **CONFIG_SOCK_CGROUP_DATA** configuration option. - * Return - * The id is returned or 0 in case the id could not be retrieved. - * - * u64 bpf_get_current_cgroup_id(void) - * Return - * A 64-bit integer containing the current cgroup id based - * on the cgroup within which the current task is running. - * - * void *bpf_get_local_storage(void *map, u64 flags) - * Description - * Get the pointer to the local storage area. - * The type and the size of the local storage is defined - * by the *map* argument. - * The *flags* meaning is specific for each map type, - * and has to be 0 for cgroup local storage. - * - * Depending on the BPF program type, a local storage area - * can be shared between multiple instances of the BPF program, - * running simultaneously. - * - * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter - * the shared data. - * Return - * A pointer to the local storage area. - * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) - * Description - * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. - * It checks the selected socket is matching the incoming - * request in the socket buffer. - * Return - * 0 on success, or a negative error in case of failure. - * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) - * Description - * Look for TCP socket matching *tuple*, optionally in a child - * network namespace *netns*. The return value must be checked, - * and if non-**NULL**, released via **bpf_sk_release**\ (). - * - * The *ctx* should point to the context of the program, such as - * the skb or socket (depending on the hook in use). This is used - * to determine the base network namespace for the lookup. - * - * *tuple_size* must be one of: - * - * **sizeof**\ (*tuple*\ **->ipv4**) - * Look for an IPv4 socket. - * **sizeof**\ (*tuple*\ **->ipv6**) - * Look for an IPv6 socket. - * - * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will - * will be used. For the TC hooks, this is the netns of the device - * in the skb. For socket hooks, this is the netns of the socket. - * If *netns* is any other signed 32-bit value greater than or - * equal to zero then it specifies the ID of the netns relative to - * the netns associated with the *ctx*. *netns* values beyond the - * range of 32-bit integers are reserved for future use. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_NET** configuration option. - * Return - * Pointer to **struct bpf_sock**, or **NULL** in case of failure. - * For sockets with reuseport option, the **struct bpf_sock** - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) - * Description - * Look for UDP socket matching *tuple*, optionally in a child - * network namespace *netns*. The return value must be checked, - * and if non-**NULL**, released via **bpf_sk_release**\ (). - * - * The *ctx* should point to the context of the program, such as - * the skb or socket (depending on the hook in use). This is used - * to determine the base network namespace for the lookup. - * - * *tuple_size* must be one of: - * - * **sizeof**\ (*tuple*\ **->ipv4**) - * Look for an IPv4 socket. - * **sizeof**\ (*tuple*\ **->ipv6**) - * Look for an IPv6 socket. - * - * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will - * will be used. For the TC hooks, this is the netns of the device - * in the skb. For socket hooks, this is the netns of the socket. - * If *netns* is any other signed 32-bit value greater than or - * equal to zero then it specifies the ID of the netns relative to - * the netns associated with the *ctx*. *netns* values beyond the - * range of 32-bit integers are reserved for future use. - * - * All values for *flags* are reserved for future usage, and must - * be left at zero. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_NET** configuration option. - * Return - * Pointer to **struct bpf_sock**, or **NULL** in case of failure. - * For sockets with reuseport option, the **struct bpf_sock** - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - * - * int bpf_sk_release(struct bpf_sock *sock) - * Description - * Release the reference held by *sock*. *sock* must be a - * non-**NULL** pointer that was returned from - * **bpf_sk_lookup_xxx**\ (). - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is - * removed to make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) - * Description - * For socket policies, insert *len* bytes into *msg* at offset - * *start*. - * - * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the *msg*. - * This can later be read and used by any of the lower layer BPF - * hooks. - * - * This helper may fail if under memory pressure (a malloc - * fails) in these cases BPF programs will get an appropriate - * error and BPF programs will need to handle them. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) - * Description - * Will remove *len* bytes from a *msg* starting at byte *start*. - * This may result in **ENOMEM** errors under certain situations if - * an allocation and copy are required due to a full ring buffer. - * However, the helper will try to avoid doing the allocation - * if possible. Other errors can occur if input parameters are - * invalid either due to *start* byte not being valid part of *msg* - * payload and/or *pop* value being to large. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) - * Description - * This helper is used in programs implementing IR decoding, to - * report a successfully decoded pointer movement. - * - * The *ctx* should point to the lirc sample as passed into - * the program. - * - * This helper is only available is the kernel was compiled with - * the **CONFIG_BPF_LIRC_MODE2** configuration option set to - * "**y**". - * Return - * 0 - * - * int bpf_spin_lock(struct bpf_spin_lock *lock) - * Description - * Acquire a spinlock represented by the pointer *lock*, which is - * stored as part of a value of a map. Taking the lock allows to - * safely update the rest of the fields in that value. The - * spinlock can (and must) later be released with a call to - * **bpf_spin_unlock**\ (\ *lock*\ ). - * - * Spinlocks in BPF programs come with a number of restrictions - * and constraints: - * - * * **bpf_spin_lock** objects are only allowed inside maps of - * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this - * list could be extended in the future). - * * BTF description of the map is mandatory. - * * The BPF program can take ONE lock at a time, since taking two - * or more could cause dead locks. - * * Only one **struct bpf_spin_lock** is allowed per map element. - * * When the lock is taken, calls (either BPF to BPF or helpers) - * are not allowed. - * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not - * allowed inside a spinlock-ed region. - * * The BPF program MUST call **bpf_spin_unlock**\ () to release - * the lock, on all execution paths, before it returns. - * * The BPF program can access **struct bpf_spin_lock** only via - * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () - * helpers. Loading or storing data into the **struct - * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. - * * To use the **bpf_spin_lock**\ () helper, the BTF description - * of the map value must be a struct and have **struct - * bpf_spin_lock** *anyname*\ **;** field at the top level. - * Nested lock inside another struct is not allowed. - * * The **struct bpf_spin_lock** *lock* field in a map value must - * be aligned on a multiple of 4 bytes in that value. - * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy - * the **bpf_spin_lock** field to user space. - * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from - * a BPF program, do not update the **bpf_spin_lock** field. - * * **bpf_spin_lock** cannot be on the stack or inside a - * networking packet (it can only be inside of a map values). - * * **bpf_spin_lock** is available to root only. - * * Tracing programs and socket filter programs cannot use - * **bpf_spin_lock**\ () due to insufficient preemption checks - * (but this may change in the future). - * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. - * Return - * 0 - * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) - * Description - * Release the *lock* previously locked by a call to - * **bpf_spin_lock**\ (\ *lock*\ ). - * Return - * 0 - * - * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) - * Description - * This helper gets a **struct bpf_sock** pointer such - * that all the fields in this **bpf_sock** can be accessed. - * Return - * A **struct bpf_sock** pointer on success, or **NULL** in - * case of failure. - * - * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) - * Description - * This helper gets a **struct bpf_tcp_sock** pointer from a - * **struct bpf_sock** pointer. - * Return - * A **struct bpf_tcp_sock** pointer on success, or **NULL** in - * case of failure. - * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) - * Description - * Set ECN (Explicit Congestion Notification) field of IP header - * to **CE** (Congestion Encountered) if current value is **ECT** - * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 - * and IPv4. - * Return - * 1 if the **CE** flag is set (either by the current helper call - * or because it was already present), 0 if it is not set. - * - * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) - * Description - * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. - * **bpf_sk_release**\ () is unnecessary and not allowed. - * Return - * A **struct bpf_sock** pointer on success, or **NULL** in - * case of failure. - * - * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) - * Description - * Look for TCP socket matching *tuple*, optionally in a child - * network namespace *netns*. The return value must be checked, - * and if non-**NULL**, released via **bpf_sk_release**\ (). - * - * This function is identical to **bpf_sk_lookup_tcp**\ (), except - * that it also returns timewait or request sockets. Use - * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the - * full structure. - * - * This helper is available only if the kernel was compiled with - * **CONFIG_NET** configuration option. - * Return - * Pointer to **struct bpf_sock**, or **NULL** in case of failure. - * For sockets with reuseport option, the **struct bpf_sock** - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) - * Description - * Check whether *iph* and *th* contain a valid SYN cookie ACK for - * the listening socket in *sk*. - * - * *iph* points to the start of the IPv4 or IPv6 header, while - * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). - * - * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). - * - * Return - * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative - * error otherwise. - * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) - * Description - * Get name of sysctl in /proc/sys/ and copy it into provided by - * program buffer *buf* of size *buf_len*. - * - * The buffer is always NUL terminated, unless it's zero-sized. - * - * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is - * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name - * only (e.g. "tcp_mem"). - * Return - * Number of character copied (not including the trailing NUL). - * - * **-E2BIG** if the buffer wasn't big enough (*buf* will contain - * truncated name in this case). - * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) - * Description - * Get current value of sysctl as it is presented in /proc/sys - * (incl. newline, etc), and copy it as a string into provided - * by program buffer *buf* of size *buf_len*. - * - * The whole value is copied, no matter what file position user - * space issued e.g. sys_read at. - * - * The buffer is always NUL terminated, unless it's zero-sized. - * Return - * Number of character copied (not including the trailing NUL). - * - * **-E2BIG** if the buffer wasn't big enough (*buf* will contain - * truncated name in this case). - * - * **-EINVAL** if current value was unavailable, e.g. because - * sysctl is uninitialized and read returns -EIO for it. - * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) - * Description - * Get new value being written by user space to sysctl (before - * the actual write happens) and copy it as a string into - * provided by program buffer *buf* of size *buf_len*. - * - * User space may write new value at file position > 0. - * - * The buffer is always NUL terminated, unless it's zero-sized. - * Return - * Number of character copied (not including the trailing NUL). - * - * **-E2BIG** if the buffer wasn't big enough (*buf* will contain - * truncated name in this case). - * - * **-EINVAL** if sysctl is being read. - * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) - * Description - * Override new value being written by user space to sysctl with - * value provided by program in buffer *buf* of size *buf_len*. - * - * *buf* should contain a string in same form as provided by user - * space on sysctl write. - * - * User space may write new value at file position > 0. To override - * the whole sysctl value file position should be set to zero. - * Return - * 0 on success. - * - * **-E2BIG** if the *buf_len* is too big. - * - * **-EINVAL** if sysctl is being read. - * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) - * Description - * Convert the initial part of the string from buffer *buf* of - * size *buf_len* to a long integer according to the given base - * and save the result in *res*. - * - * The string may begin with an arbitrary amount of white space - * (as determined by **isspace**\ (3)) followed by a single - * optional '**-**' sign. - * - * Five least significant bits of *flags* encode base, other bits - * are currently unused. - * - * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space **strtol**\ (3). - * Return - * Number of characters consumed on success. Must be positive but - * no more than *buf_len*. - * - * **-EINVAL** if no valid digits were found or unsupported base - * was provided. - * - * **-ERANGE** if resulting value was out of range. - * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) - * Description - * Convert the initial part of the string from buffer *buf* of - * size *buf_len* to an unsigned long integer according to the - * given base and save the result in *res*. - * - * The string may begin with an arbitrary amount of white space - * (as determined by **isspace**\ (3)). - * - * Five least significant bits of *flags* encode base, other bits - * are currently unused. - * - * Base must be either 8, 10, 16 or 0 to detect it automatically - * similar to user space **strtoul**\ (3). - * Return - * Number of characters consumed on success. Must be positive but - * no more than *buf_len*. - * - * **-EINVAL** if no valid digits were found or unsupported base - * was provided. - * - * **-ERANGE** if resulting value was out of range. - * - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) - * Description - * Get a bpf-local-storage from a *sk*. - * - * Logically, it could be thought of getting the value from - * a *map* with *sk* as the **key**. From this - * perspective, the usage is not much different from - * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this - * helper enforces the key must be a full socket and the map must - * be a **BPF_MAP_TYPE_SK_STORAGE** also. - * - * Underneath, the value is stored locally at *sk* instead of - * the *map*. The *map* is used as the bpf-local-storage - * "type". The bpf-local-storage "type" (i.e. the *map*) is - * searched against all bpf-local-storages residing at *sk*. - * - * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be - * used such that a new bpf-local-storage will be - * created if one does not exist. *value* can be used - * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify - * the initial value of a bpf-local-storage. If *value* is - * **NULL**, the new bpf-local-storage will be zero initialized. - * Return - * A bpf-local-storage pointer is returned on success. - * - * **NULL** if not found or there was an error in adding - * a new bpf-local-storage. - * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) - * Description - * Delete a bpf-local-storage from a *sk*. - * Return - * 0 on success. - * - * **-ENOENT** if the bpf-local-storage cannot be found. - * - * int bpf_send_signal(u32 sig) - * Description - * Send signal *sig* to the current task. - * Return - * 0 on success or successfully queued. - * - * **-EBUSY** if work queue under nmi is full. - * - * **-EINVAL** if *sig* is invalid. - * - * **-EPERM** if no permission to send the *sig*. - * - * **-EAGAIN** if bpf program can try again. - * - * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) - * Description - * Try to issue a SYN cookie for the packet with corresponding - * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. - * - * *iph* points to the start of the IPv4 or IPv6 header, while - * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). - * - * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. - * - * Return - * On success, lower 32 bits hold the generated SYN cookie in - * followed by 16 bits which hold the MSS value for that cookie, - * and the top 16 bits are unused. - * - * On failure, the returned value is one of the following: - * - * **-EINVAL** SYN cookie cannot be issued due to error - * - * **-ENOENT** SYN cookie should not be issued (no SYN flood) - * - * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies - * - * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 - * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) - * Description - * Write raw *data* blob into a special BPF perf event held by - * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf - * event must have the following attributes: **PERF_SAMPLE_RAW** - * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and - * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. - * - * The *flags* are used to indicate the index in *map* for which - * the value must be put, masked with **BPF_F_INDEX_MASK**. - * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** - * to indicate that the index of the current CPU core should be - * used. - * - * The value to write, of *size*, is passed through eBPF stack and - * pointed by *data*. - * - * *ctx* is a pointer to in-kernel struct sk_buff. - * - * This helper is similar to **bpf_perf_event_output**\ () but - * restricted to raw_tracepoint bpf programs. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) - * Description - * Safely attempt to read *size* bytes from user space address - * *unsafe_ptr* and store the data in *dst*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) - * Description - * Safely attempt to read *size* bytes from kernel space address - * *unsafe_ptr* and store the data in *dst*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) - * Description - * Copy a NUL terminated string from an unsafe user address - * *unsafe_ptr* to *dst*. The *size* should include the - * terminating NUL byte. In case the string length is smaller than - * *size*, the target is not padded with further NUL bytes. If the - * string length is larger than *size*, just *size*-1 bytes are - * copied and the last byte is set to NUL. - * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: - * - * :: - * - * SEC("kprobe/sys_open") - * void bpf_sys_open(struct pt_regs *ctx) - * { - * char buf[PATHLEN]; // PATHLEN is defined to 256 - * int res = bpf_probe_read_user_str(buf, sizeof(buf), - * ctx->di); - * - * // Consume buf, for example push it to - * // userspace via bpf_perf_event_output(); we - * // can use res (the string length) as event - * // size, after checking its boundaries. - * } - * - * In comparison, using **bpf_probe_read_user()** helper here - * instead to read the string would require to estimate the length - * at compile time, and would often result in copying more memory - * than necessary. - * - * Another useful use case is when parsing individual process - * arguments or individual environment variables navigating - * *current*\ **->mm->arg_start** and *current*\ - * **->mm->env_start**: using this helper and the return value, - * one can quickly iterate at the right offset of the memory area. - * Return - * On success, the strictly positive length of the string, - * including the trailing NUL character. On error, a negative - * value. - * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) - * Description - * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* - * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. - * Return - * On success, the strictly positive length of the string, including - * the trailing NUL character. On error, a negative value. - */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), - -/* integer value in 'imm' field of BPF_CALL instruction selects which helper - * function eBPF program intends to call - */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x -enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) - __BPF_FUNC_MAX_ID, -}; -#undef __BPF_ENUM_FN - -/* All flags used by eBPF helper functions, placed here. */ - -/* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) - -/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. - * First 4 bits are for passing the header field size. - */ -#define BPF_F_HDR_FIELD_MASK 0xfULL - -/* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) - -/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) - -/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) - -/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) -/* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) -/* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) - -/* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) - -/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and - * BPF_FUNC_perf_event_read_value flags. - */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK -/* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) - -/* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) - -/* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) - -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 - -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) -#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ - BPF_ADJ_ROOM_ENCAP_L2_MASK) \ - << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) - -/* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) - -/* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) - -/* Mode for BPF_FUNC_skb_adjust_room helper. */ -enum bpf_adj_room_mode { - BPF_ADJ_ROOM_NET, - BPF_ADJ_ROOM_MAC, -}; - -/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ -enum bpf_hdr_start_off { - BPF_HDR_START_MAC, - BPF_HDR_START_NET, -}; - -/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ -enum bpf_lwt_encap_mode { - BPF_LWT_ENCAP_SEG6, - BPF_LWT_ENCAP_SEG6_INLINE, - BPF_LWT_ENCAP_IP, -}; - -#define __bpf_md_ptr(type, name) \ -union { \ - type name; \ - __u64 :64; \ -} __attribute__((aligned(8))) - -/* user accessible mirror of in-kernel sk_buff. - * new fields can only be added to the end of this structure - */ -struct __sk_buff { - __u32 len; - __u32 pkt_type; - __u32 mark; - __u32 queue_mapping; - __u32 protocol; - __u32 vlan_present; - __u32 vlan_tci; - __u32 vlan_proto; - __u32 priority; - __u32 ingress_ifindex; - __u32 ifindex; - __u32 tc_index; - __u32 cb[5]; - __u32 hash; - __u32 tc_classid; - __u32 data; - __u32 data_end; - __u32 napi_id; - - /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ - __u32 family; - __u32 remote_ip4; /* Stored in network byte order */ - __u32 local_ip4; /* Stored in network byte order */ - __u32 remote_ip6[4]; /* Stored in network byte order */ - __u32 local_ip6[4]; /* Stored in network byte order */ - __u32 remote_port; /* Stored in network byte order */ - __u32 local_port; /* stored in host byte order */ - /* ... here. */ - - __u32 data_meta; - __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); - __u64 tstamp; - __u32 wire_len; - __u32 gso_segs; - __bpf_md_ptr(struct bpf_sock *, sk); -}; - -struct bpf_tunnel_key { - __u32 tunnel_id; - union { - __u32 remote_ipv4; - __u32 remote_ipv6[4]; - }; - __u8 tunnel_tos; - __u8 tunnel_ttl; - __u16 tunnel_ext; /* Padding, future use. */ - __u32 tunnel_label; -}; - -/* user accessible mirror of in-kernel xfrm_state. - * new fields can only be added to the end of this structure - */ -struct bpf_xfrm_state { - __u32 reqid; - __u32 spi; /* Stored in network byte order */ - __u16 family; - __u16 ext; /* Padding, future use. */ - union { - __u32 remote_ipv4; /* Stored in network byte order */ - __u32 remote_ipv6[4]; /* Stored in network byte order */ - }; -}; - -/* Generic BPF return codes which all BPF program types may support. - * The values are binary compatible with their TC_ACT_* counter-part to - * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT - * programs. - * - * XDP is handled seprately, see XDP_*. - */ -enum bpf_ret_code { - BPF_OK = 0, - /* 1 reserved */ - BPF_DROP = 2, - /* 3-6 reserved */ - BPF_REDIRECT = 7, - /* >127 are reserved for prog type specific return codes. - * - * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and - * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been - * changed and should be routed based on its new L3 header. - * (This is an L3 redirect, as opposed to L2 redirect - * represented by BPF_REDIRECT above). - */ - BPF_LWT_REROUTE = 128, -}; - -struct bpf_sock { - __u32 bound_dev_if; - __u32 family; - __u32 type; - __u32 protocol; - __u32 mark; - __u32 priority; - /* IP address also allows 1 and 2 bytes access */ - __u32 src_ip4; - __u32 src_ip6[4]; - __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ - __u32 dst_ip4; - __u32 dst_ip6[4]; - __u32 state; -}; - -struct bpf_tcp_sock { - __u32 snd_cwnd; /* Sending congestion window */ - __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ - __u32 rtt_min; - __u32 snd_ssthresh; /* Slow start size threshold */ - __u32 rcv_nxt; /* What we want to receive next */ - __u32 snd_nxt; /* Next sequence we send */ - __u32 snd_una; /* First byte we want an ack for */ - __u32 mss_cache; /* Cached effective mss, not including SACKS */ - __u32 ecn_flags; /* ECN status bits. */ - __u32 rate_delivered; /* saved rate sample: packets delivered */ - __u32 rate_interval_us; /* saved rate sample: time elapsed */ - __u32 packets_out; /* Packets which are "in flight" */ - __u32 retrans_out; /* Retransmitted packets out */ - __u32 total_retrans; /* Total retransmits for entire connection */ - __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn - * total number of segments in. - */ - __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn - * total number of data segments in. - */ - __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut - * The total number of segments sent. - */ - __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut - * total number of data segments sent. - */ - __u32 lost_out; /* Lost packets */ - __u32 sacked_out; /* SACK'd packets */ - __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived - * sum(delta(rcv_nxt)), or how many bytes - * were acked. - */ - __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked - * sum(delta(snd_una)), or how many bytes - * were acked. - */ - __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups - * total number of DSACK blocks received - */ - __u32 delivered; /* Total data packets delivered incl. rexmits */ - __u32 delivered_ce; /* Like the above but only ECE marked packets */ - __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ -}; - -struct bpf_sock_tuple { - union { - struct { - __be32 saddr; - __be32 daddr; - __be16 sport; - __be16 dport; - } ipv4; - struct { - __be32 saddr[4]; - __be32 daddr[4]; - __be16 sport; - __be16 dport; - } ipv6; - }; -}; - -struct bpf_xdp_sock { - __u32 queue_id; -}; - -#define XDP_PACKET_HEADROOM 256 - -/* User return codes for XDP prog type. - * A valid XDP program must return one of these defined values. All other - * return codes are reserved for future use. Unknown return codes will - * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). - */ -enum xdp_action { - XDP_ABORTED = 0, - XDP_DROP, - XDP_PASS, - XDP_TX, - XDP_REDIRECT, -}; - -/* user accessible metadata for XDP packet hook - * new fields must be added to the end of this structure - */ -struct xdp_md { - __u32 data; - __u32 data_end; - __u32 data_meta; - /* Below access go through struct xdp_rxq_info */ - __u32 ingress_ifindex; /* rxq->dev->ifindex */ - __u32 rx_queue_index; /* rxq->queue_index */ -}; - -enum sk_action { - SK_DROP = 0, - SK_PASS, -}; - -/* user accessible metadata for SK_MSG packet hook, new fields must - * be added to the end of this structure - */ -struct sk_msg_md { - __bpf_md_ptr(void *, data); - __bpf_md_ptr(void *, data_end); - - __u32 family; - __u32 remote_ip4; /* Stored in network byte order */ - __u32 local_ip4; /* Stored in network byte order */ - __u32 remote_ip6[4]; /* Stored in network byte order */ - __u32 local_ip6[4]; /* Stored in network byte order */ - __u32 remote_port; /* Stored in network byte order */ - __u32 local_port; /* stored in host byte order */ - __u32 size; /* Total size of sk_msg */ -}; - -struct sk_reuseport_md { - /* - * Start of directly accessible data. It begins from - * the tcp/udp header. - */ - __bpf_md_ptr(void *, data); - /* End of directly accessible data */ - __bpf_md_ptr(void *, data_end); - /* - * Total length of packet (starting from the tcp/udp header). - * Note that the directly accessible bytes (data_end - data) - * could be less than this "len". Those bytes could be - * indirectly read by a helper "bpf_skb_load_bytes()". - */ - __u32 len; - /* - * Eth protocol in the mac header (network byte order). e.g. - * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD) - */ - __u32 eth_protocol; - __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ - __u32 bind_inany; /* Is sock bound to an INANY address? */ - __u32 hash; /* A hash of the packet 4 tuples */ -}; - -#define BPF_TAG_SIZE 8 - -struct bpf_prog_info { - __u32 type; - __u32 id; - __u8 tag[BPF_TAG_SIZE]; - __u32 jited_prog_len; - __u32 xlated_prog_len; - __aligned_u64 jited_prog_insns; - __aligned_u64 xlated_prog_insns; - __u64 load_time; /* ns since boottime */ - __u32 created_by_uid; - __u32 nr_map_ids; - __aligned_u64 map_ids; - char name[BPF_OBJ_NAME_LEN]; - __u32 ifindex; - __u32 gpl_compatible:1; - __u32 :31; /* alignment pad */ - __u64 netns_dev; - __u64 netns_ino; - __u32 nr_jited_ksyms; - __u32 nr_jited_func_lens; - __aligned_u64 jited_ksyms; - __aligned_u64 jited_func_lens; - __u32 btf_id; - __u32 func_info_rec_size; - __aligned_u64 func_info; - __u32 nr_func_info; - __u32 nr_line_info; - __aligned_u64 line_info; - __aligned_u64 jited_line_info; - __u32 nr_jited_line_info; - __u32 line_info_rec_size; - __u32 jited_line_info_rec_size; - __u32 nr_prog_tags; - __aligned_u64 prog_tags; - __u64 run_time_ns; - __u64 run_cnt; -} __attribute__((aligned(8))); - -struct bpf_map_info { - __u32 type; - __u32 id; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 map_flags; - char name[BPF_OBJ_NAME_LEN]; - __u32 ifindex; - __u32 :32; - __u64 netns_dev; - __u64 netns_ino; - __u32 btf_id; - __u32 btf_key_type_id; - __u32 btf_value_type_id; -} __attribute__((aligned(8))); - -struct bpf_btf_info { - __aligned_u64 btf; - __u32 btf_size; - __u32 id; -} __attribute__((aligned(8))); - -/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed - * by user and intended to be used by socket (e.g. to bind to, depends on - * attach attach type). - */ -struct bpf_sock_addr { - __u32 user_family; /* Allows 4-byte read, but no write. */ - __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. - * Stored in network byte order. - */ - __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. - * Stored in network byte order. - */ - __u32 user_port; /* Allows 4-byte read and write. - * Stored in network byte order - */ - __u32 family; /* Allows 4-byte read, but no write */ - __u32 type; /* Allows 4-byte read, but no write */ - __u32 protocol; /* Allows 4-byte read, but no write */ - __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. - * Stored in network byte order. - */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. - * Stored in network byte order. - */ - __bpf_md_ptr(struct bpf_sock *, sk); -}; - -/* User bpf_sock_ops struct to access socket values and specify request ops - * and their replies. - * Some of this fields are in network (bigendian) byte order and may need - * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). - * New fields can only be added at the end of this structure - */ -struct bpf_sock_ops { - __u32 op; - union { - __u32 args[4]; /* Optionally passed to bpf program */ - __u32 reply; /* Returned by bpf program */ - __u32 replylong[4]; /* Optionally returned by bpf prog */ - }; - __u32 family; - __u32 remote_ip4; /* Stored in network byte order */ - __u32 local_ip4; /* Stored in network byte order */ - __u32 remote_ip6[4]; /* Stored in network byte order */ - __u32 local_ip6[4]; /* Stored in network byte order */ - __u32 remote_port; /* Stored in network byte order */ - __u32 local_port; /* stored in host byte order */ - __u32 is_fullsock; /* Some TCP fields are only valid if - * there is a full socket. If not, the - * fields read as zero. - */ - __u32 snd_cwnd; - __u32 srtt_us; /* Averaged RTT << 3 in usecs */ - __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ - __u32 state; - __u32 rtt_min; - __u32 snd_ssthresh; - __u32 rcv_nxt; - __u32 snd_nxt; - __u32 snd_una; - __u32 mss_cache; - __u32 ecn_flags; - __u32 rate_delivered; - __u32 rate_interval_us; - __u32 packets_out; - __u32 retrans_out; - __u32 total_retrans; - __u32 segs_in; - __u32 data_segs_in; - __u32 segs_out; - __u32 data_segs_out; - __u32 lost_out; - __u32 sacked_out; - __u32 sk_txhash; - __u64 bytes_received; - __u64 bytes_acked; - __bpf_md_ptr(struct bpf_sock *, sk); -}; - -/* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ - -/* List of known BPF sock_ops operators. - * New entries can only be added at the end - */ -enum { - BPF_SOCK_OPS_VOID, - BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or - * -1 if default value should be used - */ - BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized - * window (in packets) or -1 if default - * value should be used - */ - BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an - * active connection is initialized - */ - BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an - * active connection is - * established - */ - BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a - * passive connection is - * established - */ - BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control - * needs ECN - */ - BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is - * based on the path and may be - * dependent on the congestion control - * algorithm. In general it indicates - * a congestion threshold. RTTs above - * this indicate congestion - */ - BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. - * Arg1: value of icsk_retransmits - * Arg2: value of icsk_rto - * Arg3: whether RTO has expired - */ - BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. - * Arg1: sequence number of 1st byte - * Arg2: # segments - * Arg3: return value of - * tcp_transmit_skb (0 => success) - */ - BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. - * Arg1: old_state - * Arg2: new_state - */ - BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after - * socket transition to LISTEN state. - */ - BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. - */ -}; - -/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect - * changes between the TCP and BPF versions. Ideally this should never happen. - * If it does, we need to add code to convert them before calling - * the BPF sock_ops function. - */ -enum { - BPF_TCP_ESTABLISHED = 1, - BPF_TCP_SYN_SENT, - BPF_TCP_SYN_RECV, - BPF_TCP_FIN_WAIT1, - BPF_TCP_FIN_WAIT2, - BPF_TCP_TIME_WAIT, - BPF_TCP_CLOSE, - BPF_TCP_CLOSE_WAIT, - BPF_TCP_LAST_ACK, - BPF_TCP_LISTEN, - BPF_TCP_CLOSING, /* Now a valid state */ - BPF_TCP_NEW_SYN_RECV, - - BPF_TCP_MAX_STATES /* Leave at the end! */ -}; - -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ - -struct bpf_perf_event_value { - __u64 counter; - __u64 enabled; - __u64 running; -}; - -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) - -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) - -struct bpf_cgroup_dev_ctx { - /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ - __u32 access_type; - __u32 major; - __u32 minor; -}; - -struct bpf_raw_tracepoint_args { - __u64 args[0]; -}; - -/* DIRECT: Skip the FIB rules and go to FIB table associated with device - * OUTPUT: Do lookup from egress perspective; default is ingress - */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) - -enum { - BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ - BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ - BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ - BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ - BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ - BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ - BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ - BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ - BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ -}; - -struct bpf_fib_lookup { - /* input: network family for lookup (AF_INET, AF_INET6) - * output: network family of egress nexthop - */ - __u8 family; - - /* set if lookup is to consider L4 data - e.g., FIB rules */ - __u8 l4_protocol; - __be16 sport; - __be16 dport; - - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; - - /* input: L3 device index for lookup - * output: device index from FIB lookup - */ - __u32 ifindex; - - union { - /* inputs to lookup */ - __u8 tos; /* AF_INET */ - __be32 flowinfo; /* AF_INET6, flow_label + priority */ - - /* output: metric of fib result (IPv4/IPv6 only) */ - __u32 rt_metric; - }; - - union { - __be32 ipv4_src; - __u32 ipv6_src[4]; /* in6_addr; network order */ - }; - - /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in - * network header. output: bpf_fib_lookup sets to gateway address - * if FIB lookup returns gateway route - */ - union { - __be32 ipv4_dst; - __u32 ipv6_dst[4]; /* in6_addr; network order */ - }; - - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - __u8 smac[6]; /* ETH_ALEN */ - __u8 dmac[6]; /* ETH_ALEN */ -}; - -enum bpf_task_fd_type { - BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ - BPF_FD_TYPE_TRACEPOINT, /* tp name */ - BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ - BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ - BPF_FD_TYPE_UPROBE, /* filename + offset */ - BPF_FD_TYPE_URETPROBE, /* filename + offset */ -}; - -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) - -struct bpf_flow_keys { - __u16 nhoff; - __u16 thoff; - __u16 addr_proto; /* ETH_P_* of valid addrs */ - __u8 is_frag; - __u8 is_first_frag; - __u8 is_encap; - __u8 ip_proto; - __be16 n_proto; - __be16 sport; - __be16 dport; - union { - struct { - __be32 ipv4_src; - __be32 ipv4_dst; - }; - struct { - __u32 ipv6_src[4]; /* in6_addr; network order */ - __u32 ipv6_dst[4]; /* in6_addr; network order */ - }; - }; - __u32 flags; - __be32 flow_label; -}; - -struct bpf_func_info { - __u32 insn_off; - __u32 type_id; -}; - -#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) -#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) - -struct bpf_line_info { - __u32 insn_off; - __u32 file_name_off; - __u32 line_off; - __u32 line_col; -}; - -struct bpf_spin_lock { - __u32 val; -}; - -struct bpf_sysctl { - __u32 write; /* Sysctl is being read (= 0) or written (= 1). - * Allows 1,2,4-byte read, but no write. - */ - __u32 file_pos; /* Sysctl file position to read from, write to. - * Allows 1,2,4-byte read an 4-byte write. - */ -}; - -struct bpf_sockopt { - __bpf_md_ptr(struct bpf_sock *, sk); - __bpf_md_ptr(void *, optval); - __bpf_md_ptr(void *, optval_end); - - __s32 level; - __s32 optname; - __s32 optlen; - __s32 retval; -}; - -#endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/src/contrib/libbpf/include/uapi/linux/bpf_common.h b/src/contrib/libbpf/include/uapi/linux/bpf_common.h deleted file mode 100644 index ee97668bd..000000000 --- a/src/contrib/libbpf/include/uapi/linux/bpf_common.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__LINUX_BPF_COMMON_H__ -#define _UAPI__LINUX_BPF_COMMON_H__ - -/* Instruction classes */ -#define BPF_CLASS(code) ((code) & 0x07) -#define BPF_LD 0x00 -#define BPF_LDX 0x01 -#define BPF_ST 0x02 -#define BPF_STX 0x03 -#define BPF_ALU 0x04 -#define BPF_JMP 0x05 -#define BPF_RET 0x06 -#define BPF_MISC 0x07 - -/* ld/ldx fields */ -#define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 /* 32-bit */ -#define BPF_H 0x08 /* 16-bit */ -#define BPF_B 0x10 /* 8-bit */ -/* eBPF BPF_DW 0x18 64-bit */ -#define BPF_MODE(code) ((code) & 0xe0) -#define BPF_IMM 0x00 -#define BPF_ABS 0x20 -#define BPF_IND 0x40 -#define BPF_MEM 0x60 -#define BPF_LEN 0x80 -#define BPF_MSH 0xa0 - -/* alu/jmp fields */ -#define BPF_OP(code) ((code) & 0xf0) -#define BPF_ADD 0x00 -#define BPF_SUB 0x10 -#define BPF_MUL 0x20 -#define BPF_DIV 0x30 -#define BPF_OR 0x40 -#define BPF_AND 0x50 -#define BPF_LSH 0x60 -#define BPF_RSH 0x70 -#define BPF_NEG 0x80 -#define BPF_MOD 0x90 -#define BPF_XOR 0xa0 - -#define BPF_JA 0x00 -#define BPF_JEQ 0x10 -#define BPF_JGT 0x20 -#define BPF_JGE 0x30 -#define BPF_JSET 0x40 -#define BPF_SRC(code) ((code) & 0x08) -#define BPF_K 0x00 -#define BPF_X 0x08 - -#ifndef BPF_MAXINSNS -#define BPF_MAXINSNS 4096 -#endif - -#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ diff --git a/src/contrib/libbpf/include/uapi/linux/btf.h b/src/contrib/libbpf/include/uapi/linux/btf.h deleted file mode 100644 index 63ae4a39e..000000000 --- a/src/contrib/libbpf/include/uapi/linux/btf.h +++ /dev/null @@ -1,165 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* Copyright (c) 2018 Facebook */ -#ifndef _UAPI__LINUX_BTF_H__ -#define _UAPI__LINUX_BTF_H__ - -#include <linux/types.h> - -#define BTF_MAGIC 0xeB9F -#define BTF_VERSION 1 - -struct btf_header { - __u16 magic; - __u8 version; - __u8 flags; - __u32 hdr_len; - - /* All offsets are in bytes relative to the end of this header */ - __u32 type_off; /* offset of type section */ - __u32 type_len; /* length of type section */ - __u32 str_off; /* offset of string section */ - __u32 str_len; /* length of string section */ -}; - -/* Max # of type identifier */ -#define BTF_MAX_TYPE 0x0000ffff -/* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x0000ffff -/* Max # of struct/union/enum members or func args */ -#define BTF_MAX_VLEN 0xffff - -struct btf_type { - __u32 name_off; - /* "info" bits arrangement - * bits 0-15: vlen (e.g. # of struct's members) - * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused - * bit 31: kind_flag, currently used by - * struct, union and fwd - */ - __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. - * "size" tells the size of the type it is describing. - * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and VAR. - * "type" is a type_id referring to another type. - */ - union { - __u32 size; - __u32 type; - }; -}; - -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) -#define BTF_INFO_VLEN(info) ((info) & 0xffff) -#define BTF_INFO_KFLAG(info) ((info) >> 31) - -#define BTF_KIND_UNKN 0 /* Unknown */ -#define BTF_KIND_INT 1 /* Integer */ -#define BTF_KIND_PTR 2 /* Pointer */ -#define BTF_KIND_ARRAY 3 /* Array */ -#define BTF_KIND_STRUCT 4 /* Struct */ -#define BTF_KIND_UNION 5 /* Union */ -#define BTF_KIND_ENUM 6 /* Enumeration */ -#define BTF_KIND_FWD 7 /* Forward */ -#define BTF_KIND_TYPEDEF 8 /* Typedef */ -#define BTF_KIND_VOLATILE 9 /* Volatile */ -#define BTF_KIND_CONST 10 /* Const */ -#define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_FUNC 12 /* Function */ -#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_VAR 14 /* Variable */ -#define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_MAX BTF_KIND_DATASEC -#define NR_BTF_KINDS (BTF_KIND_MAX + 1) - -/* For some specific BTF_KIND, "struct btf_type" is immediately - * followed by extra data. - */ - -/* BTF_KIND_INT is followed by a u32 and the following - * is the 32 bits arrangement: - */ -#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) -#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) -#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) - -/* Attributes stored in the BTF_INT_ENCODING */ -#define BTF_INT_SIGNED (1 << 0) -#define BTF_INT_CHAR (1 << 1) -#define BTF_INT_BOOL (1 << 2) - -/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". - * The exact number of btf_enum is stored in the vlen (of the - * info in "struct btf_type"). - */ -struct btf_enum { - __u32 name_off; - __s32 val; -}; - -/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ -struct btf_array { - __u32 type; - __u32 index_type; - __u32 nelems; -}; - -/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed - * by multiple "struct btf_member". The exact number - * of btf_member is stored in the vlen (of the info in - * "struct btf_type"). - */ -struct btf_member { - __u32 name_off; - __u32 type; - /* If the type info kind_flag is set, the btf_member offset - * contains both member bitfield size and bit offset. The - * bitfield size is set for bitfield members. If the type - * info kind_flag is not set, the offset contains only bit - * offset. - */ - __u32 offset; -}; - -/* If the struct/union type info kind_flag is set, the - * following two macros are used to access bitfield_size - * and bit_offset from btf_member.offset. - */ -#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) -#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) - -/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". - * The exact number of btf_param is stored in the vlen (of the - * info in "struct btf_type"). - */ -struct btf_param { - __u32 name_off; - __u32 type; -}; - -enum { - BTF_VAR_STATIC = 0, - BTF_VAR_GLOBAL_ALLOCATED, -}; - -/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe - * additional information related to the variable such as its linkage. - */ -struct btf_var { - __u32 linkage; -}; - -/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo" - * to describe all BTF_KIND_VAR types it contains along with it's - * in-section offset as well as size. - */ -struct btf_var_secinfo { - __u32 type; - __u32 offset; - __u32 size; -}; - -#endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/src/contrib/libbpf/include/uapi/linux/if_link.h b/src/contrib/libbpf/include/uapi/linux/if_link.h deleted file mode 100644 index 8aec8769d..000000000 --- a/src/contrib/libbpf/include/uapi/linux/if_link.h +++ /dev/null @@ -1,1033 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_IF_LINK_H -#define _UAPI_LINUX_IF_LINK_H - -#include <linux/types.h> -#include <linux/netlink.h> - -/* This struct should be in sync with struct rtnl_link_stats64 */ -struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 collisions; - - /* detailed rx_errors: */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; - - __u32 rx_nohandler; /* dropped, no handler found */ -}; - -/* The main device statistics structure */ -struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ - __u64 collisions; - - /* detailed rx_errors: */ - __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u64 tx_aborted_errors; - __u64 tx_carrier_errors; - __u64 tx_fifo_errors; - __u64 tx_heartbeat_errors; - __u64 tx_window_errors; - - /* for cslip etc */ - __u64 rx_compressed; - __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ -}; - -/* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap { - __u64 mem_start; - __u64 mem_end; - __u64 base_addr; - __u16 irq; - __u8 dma; - __u8 port; -}; - -/* - * IFLA_AF_SPEC - * Contains nested attributes for address family specific attributes. - * Each address family may create a attribute with the address family - * number as type and create its own attribute structure in it. - * - * Example: - * [IFLA_AF_SPEC] = { - * [AF_INET] = { - * [IFLA_INET_CONF] = ..., - * }, - * [AF_INET6] = { - * [IFLA_INET6_FLAGS] = ..., - * [IFLA_INET6_CONF] = ..., - * } - * } - */ - -enum { - IFLA_UNSPEC, - IFLA_ADDRESS, - IFLA_BROADCAST, - IFLA_IFNAME, - IFLA_MTU, - IFLA_LINK, - IFLA_QDISC, - IFLA_STATS, - IFLA_COST, -#define IFLA_COST IFLA_COST - IFLA_PRIORITY, -#define IFLA_PRIORITY IFLA_PRIORITY - IFLA_MASTER, -#define IFLA_MASTER IFLA_MASTER - IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ -#define IFLA_WIRELESS IFLA_WIRELESS - IFLA_PROTINFO, /* Protocol specific information for a link */ -#define IFLA_PROTINFO IFLA_PROTINFO - IFLA_TXQLEN, -#define IFLA_TXQLEN IFLA_TXQLEN - IFLA_MAP, -#define IFLA_MAP IFLA_MAP - IFLA_WEIGHT, -#define IFLA_WEIGHT IFLA_WEIGHT - IFLA_OPERSTATE, - IFLA_LINKMODE, - IFLA_LINKINFO, -#define IFLA_LINKINFO IFLA_LINKINFO - IFLA_NET_NS_PID, - IFLA_IFALIAS, - IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ - IFLA_VFINFO_LIST, - IFLA_STATS64, - IFLA_VF_PORTS, - IFLA_PORT_SELF, - IFLA_AF_SPEC, - IFLA_GROUP, /* Group the device belongs to */ - IFLA_NET_NS_FD, - IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ - IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ -#define IFLA_PROMISCUITY IFLA_PROMISCUITY - IFLA_NUM_TX_QUEUES, - IFLA_NUM_RX_QUEUES, - IFLA_CARRIER, - IFLA_PHYS_PORT_ID, - IFLA_CARRIER_CHANGES, - IFLA_PHYS_SWITCH_ID, - IFLA_LINK_NETNSID, - IFLA_PHYS_PORT_NAME, - IFLA_PROTO_DOWN, - IFLA_GSO_MAX_SEGS, - IFLA_GSO_MAX_SIZE, - IFLA_PAD, - IFLA_XDP, - IFLA_EVENT, - IFLA_NEW_NETNSID, - IFLA_IF_NETNSID, - IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ - IFLA_CARRIER_UP_COUNT, - IFLA_CARRIER_DOWN_COUNT, - IFLA_NEW_IFINDEX, - IFLA_MIN_MTU, - IFLA_MAX_MTU, - IFLA_PROP_LIST, - IFLA_ALT_IFNAME, /* Alternative ifname */ - __IFLA_MAX -}; - - -#define IFLA_MAX (__IFLA_MAX - 1) - -/* backwards compatibility for userspace */ -#ifndef __KERNEL__ -#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) -#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) -#endif - -enum { - IFLA_INET_UNSPEC, - IFLA_INET_CONF, - __IFLA_INET_MAX, -}; - -#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) - -/* ifi_flags. - - IFF_* flags. - - The only change is: - IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are - more not changeable by user. They describe link media - characteristics and set by device driver. - - Comments: - - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid - - If neither of these three flags are set; - the interface is NBMA. - - - IFF_MULTICAST does not mean anything special: - multicasts can be used on all not-NBMA links. - IFF_MULTICAST means that this media uses special encapsulation - for multicast frames. Apparently, all IFF_POINTOPOINT and - IFF_BROADCAST devices are able to use multicasts too. - */ - -/* IFLA_LINK. - For usual devices it is equal ifi_index. - If it is a "virtual interface" (f.e. tunnel), ifi_link - can point to real physical interface (f.e. for bandwidth calculations), - or maybe 0, what means, that real media is unknown (usual - for IPIP tunnels, when route to endpoint is allowed to change) - */ - -/* Subtype attributes for IFLA_PROTINFO */ -enum { - IFLA_INET6_UNSPEC, - IFLA_INET6_FLAGS, /* link flags */ - IFLA_INET6_CONF, /* sysctl parameters */ - IFLA_INET6_STATS, /* statistics */ - IFLA_INET6_MCAST, /* MC things. What of them? */ - IFLA_INET6_CACHEINFO, /* time values and max reasm size */ - IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ - IFLA_INET6_TOKEN, /* device token */ - IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ - __IFLA_INET6_MAX -}; - -#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) - -enum in6_addr_gen_mode { - IN6_ADDR_GEN_MODE_EUI64, - IN6_ADDR_GEN_MODE_NONE, - IN6_ADDR_GEN_MODE_STABLE_PRIVACY, - IN6_ADDR_GEN_MODE_RANDOM, -}; - -/* Bridge section */ - -enum { - IFLA_BR_UNSPEC, - IFLA_BR_FORWARD_DELAY, - IFLA_BR_HELLO_TIME, - IFLA_BR_MAX_AGE, - IFLA_BR_AGEING_TIME, - IFLA_BR_STP_STATE, - IFLA_BR_PRIORITY, - IFLA_BR_VLAN_FILTERING, - IFLA_BR_VLAN_PROTOCOL, - IFLA_BR_GROUP_FWD_MASK, - IFLA_BR_ROOT_ID, - IFLA_BR_BRIDGE_ID, - IFLA_BR_ROOT_PORT, - IFLA_BR_ROOT_PATH_COST, - IFLA_BR_TOPOLOGY_CHANGE, - IFLA_BR_TOPOLOGY_CHANGE_DETECTED, - IFLA_BR_HELLO_TIMER, - IFLA_BR_TCN_TIMER, - IFLA_BR_TOPOLOGY_CHANGE_TIMER, - IFLA_BR_GC_TIMER, - IFLA_BR_GROUP_ADDR, - IFLA_BR_FDB_FLUSH, - IFLA_BR_MCAST_ROUTER, - IFLA_BR_MCAST_SNOOPING, - IFLA_BR_MCAST_QUERY_USE_IFADDR, - IFLA_BR_MCAST_QUERIER, - IFLA_BR_MCAST_HASH_ELASTICITY, - IFLA_BR_MCAST_HASH_MAX, - IFLA_BR_MCAST_LAST_MEMBER_CNT, - IFLA_BR_MCAST_STARTUP_QUERY_CNT, - IFLA_BR_MCAST_LAST_MEMBER_INTVL, - IFLA_BR_MCAST_MEMBERSHIP_INTVL, - IFLA_BR_MCAST_QUERIER_INTVL, - IFLA_BR_MCAST_QUERY_INTVL, - IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, - IFLA_BR_MCAST_STARTUP_QUERY_INTVL, - IFLA_BR_NF_CALL_IPTABLES, - IFLA_BR_NF_CALL_IP6TABLES, - IFLA_BR_NF_CALL_ARPTABLES, - IFLA_BR_VLAN_DEFAULT_PVID, - IFLA_BR_PAD, - IFLA_BR_VLAN_STATS_ENABLED, - IFLA_BR_MCAST_STATS_ENABLED, - IFLA_BR_MCAST_IGMP_VERSION, - IFLA_BR_MCAST_MLD_VERSION, - IFLA_BR_VLAN_STATS_PER_PORT, - IFLA_BR_MULTI_BOOLOPT, - __IFLA_BR_MAX, -}; - -#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) - -struct ifla_bridge_id { - __u8 prio[2]; - __u8 addr[6]; /* ETH_ALEN */ -}; - -enum { - BRIDGE_MODE_UNSPEC, - BRIDGE_MODE_HAIRPIN, -}; - -enum { - IFLA_BRPORT_UNSPEC, - IFLA_BRPORT_STATE, /* Spanning tree state */ - IFLA_BRPORT_PRIORITY, /* " priority */ - IFLA_BRPORT_COST, /* " cost */ - IFLA_BRPORT_MODE, /* mode (hairpin) */ - IFLA_BRPORT_GUARD, /* bpdu guard */ - IFLA_BRPORT_PROTECT, /* root port protection */ - IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ - IFLA_BRPORT_LEARNING, /* mac learning */ - IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ - IFLA_BRPORT_PROXYARP, /* proxy ARP */ - IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ - IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ - IFLA_BRPORT_ROOT_ID, /* designated root */ - IFLA_BRPORT_BRIDGE_ID, /* designated bridge */ - IFLA_BRPORT_DESIGNATED_PORT, - IFLA_BRPORT_DESIGNATED_COST, - IFLA_BRPORT_ID, - IFLA_BRPORT_NO, - IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, - IFLA_BRPORT_CONFIG_PENDING, - IFLA_BRPORT_MESSAGE_AGE_TIMER, - IFLA_BRPORT_FORWARD_DELAY_TIMER, - IFLA_BRPORT_HOLD_TIMER, - IFLA_BRPORT_FLUSH, - IFLA_BRPORT_MULTICAST_ROUTER, - IFLA_BRPORT_PAD, - IFLA_BRPORT_MCAST_FLOOD, - IFLA_BRPORT_MCAST_TO_UCAST, - IFLA_BRPORT_VLAN_TUNNEL, - IFLA_BRPORT_BCAST_FLOOD, - IFLA_BRPORT_GROUP_FWD_MASK, - IFLA_BRPORT_NEIGH_SUPPRESS, - IFLA_BRPORT_ISOLATED, - IFLA_BRPORT_BACKUP_PORT, - __IFLA_BRPORT_MAX -}; -#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) - -struct ifla_cacheinfo { - __u32 max_reasm_len; - __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ - __u32 reachable_time; - __u32 retrans_time; -}; - -enum { - IFLA_INFO_UNSPEC, - IFLA_INFO_KIND, - IFLA_INFO_DATA, - IFLA_INFO_XSTATS, - IFLA_INFO_SLAVE_KIND, - IFLA_INFO_SLAVE_DATA, - __IFLA_INFO_MAX, -}; - -#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) - -/* VLAN section */ - -enum { - IFLA_VLAN_UNSPEC, - IFLA_VLAN_ID, - IFLA_VLAN_FLAGS, - IFLA_VLAN_EGRESS_QOS, - IFLA_VLAN_INGRESS_QOS, - IFLA_VLAN_PROTOCOL, - __IFLA_VLAN_MAX, -}; - -#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) - -struct ifla_vlan_flags { - __u32 flags; - __u32 mask; -}; - -enum { - IFLA_VLAN_QOS_UNSPEC, - IFLA_VLAN_QOS_MAPPING, - __IFLA_VLAN_QOS_MAX -}; - -#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) - -struct ifla_vlan_qos_mapping { - __u32 from; - __u32 to; -}; - -/* MACVLAN section */ -enum { - IFLA_MACVLAN_UNSPEC, - IFLA_MACVLAN_MODE, - IFLA_MACVLAN_FLAGS, - IFLA_MACVLAN_MACADDR_MODE, - IFLA_MACVLAN_MACADDR, - IFLA_MACVLAN_MACADDR_DATA, - IFLA_MACVLAN_MACADDR_COUNT, - __IFLA_MACVLAN_MAX, -}; - -#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) - -enum macvlan_mode { - MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ - MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ - MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ - MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ - MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ -}; - -enum macvlan_macaddr_mode { - MACVLAN_MACADDR_ADD, - MACVLAN_MACADDR_DEL, - MACVLAN_MACADDR_FLUSH, - MACVLAN_MACADDR_SET, -}; - -#define MACVLAN_FLAG_NOPROMISC 1 - -/* VRF section */ -enum { - IFLA_VRF_UNSPEC, - IFLA_VRF_TABLE, - __IFLA_VRF_MAX -}; - -#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) - -enum { - IFLA_VRF_PORT_UNSPEC, - IFLA_VRF_PORT_TABLE, - __IFLA_VRF_PORT_MAX -}; - -#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) - -/* MACSEC section */ -enum { - IFLA_MACSEC_UNSPEC, - IFLA_MACSEC_SCI, - IFLA_MACSEC_PORT, - IFLA_MACSEC_ICV_LEN, - IFLA_MACSEC_CIPHER_SUITE, - IFLA_MACSEC_WINDOW, - IFLA_MACSEC_ENCODING_SA, - IFLA_MACSEC_ENCRYPT, - IFLA_MACSEC_PROTECT, - IFLA_MACSEC_INC_SCI, - IFLA_MACSEC_ES, - IFLA_MACSEC_SCB, - IFLA_MACSEC_REPLAY_PROTECT, - IFLA_MACSEC_VALIDATION, - IFLA_MACSEC_PAD, - __IFLA_MACSEC_MAX, -}; - -#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) - -/* XFRM section */ -enum { - IFLA_XFRM_UNSPEC, - IFLA_XFRM_LINK, - IFLA_XFRM_IF_ID, - __IFLA_XFRM_MAX -}; - -#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) - -enum macsec_validation_type { - MACSEC_VALIDATE_DISABLED = 0, - MACSEC_VALIDATE_CHECK = 1, - MACSEC_VALIDATE_STRICT = 2, - __MACSEC_VALIDATE_END, - MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, -}; - -/* IPVLAN section */ -enum { - IFLA_IPVLAN_UNSPEC, - IFLA_IPVLAN_MODE, - IFLA_IPVLAN_FLAGS, - __IFLA_IPVLAN_MAX -}; - -#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) - -enum ipvlan_mode { - IPVLAN_MODE_L2 = 0, - IPVLAN_MODE_L3, - IPVLAN_MODE_L3S, - IPVLAN_MODE_MAX -}; - -#define IPVLAN_F_PRIVATE 0x01 -#define IPVLAN_F_VEPA 0x02 - -/* VXLAN section */ -enum { - IFLA_VXLAN_UNSPEC, - IFLA_VXLAN_ID, - IFLA_VXLAN_GROUP, /* group or remote address */ - IFLA_VXLAN_LINK, - IFLA_VXLAN_LOCAL, - IFLA_VXLAN_TTL, - IFLA_VXLAN_TOS, - IFLA_VXLAN_LEARNING, - IFLA_VXLAN_AGEING, - IFLA_VXLAN_LIMIT, - IFLA_VXLAN_PORT_RANGE, /* source port */ - IFLA_VXLAN_PROXY, - IFLA_VXLAN_RSC, - IFLA_VXLAN_L2MISS, - IFLA_VXLAN_L3MISS, - IFLA_VXLAN_PORT, /* destination port */ - IFLA_VXLAN_GROUP6, - IFLA_VXLAN_LOCAL6, - IFLA_VXLAN_UDP_CSUM, - IFLA_VXLAN_UDP_ZERO_CSUM6_TX, - IFLA_VXLAN_UDP_ZERO_CSUM6_RX, - IFLA_VXLAN_REMCSUM_TX, - IFLA_VXLAN_REMCSUM_RX, - IFLA_VXLAN_GBP, - IFLA_VXLAN_REMCSUM_NOPARTIAL, - IFLA_VXLAN_COLLECT_METADATA, - IFLA_VXLAN_LABEL, - IFLA_VXLAN_GPE, - IFLA_VXLAN_TTL_INHERIT, - IFLA_VXLAN_DF, - __IFLA_VXLAN_MAX -}; -#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) - -struct ifla_vxlan_port_range { - __be16 low; - __be16 high; -}; - -enum ifla_vxlan_df { - VXLAN_DF_UNSET = 0, - VXLAN_DF_SET, - VXLAN_DF_INHERIT, - __VXLAN_DF_END, - VXLAN_DF_MAX = __VXLAN_DF_END - 1, -}; - -/* GENEVE section */ -enum { - IFLA_GENEVE_UNSPEC, - IFLA_GENEVE_ID, - IFLA_GENEVE_REMOTE, - IFLA_GENEVE_TTL, - IFLA_GENEVE_TOS, - IFLA_GENEVE_PORT, /* destination port */ - IFLA_GENEVE_COLLECT_METADATA, - IFLA_GENEVE_REMOTE6, - IFLA_GENEVE_UDP_CSUM, - IFLA_GENEVE_UDP_ZERO_CSUM6_TX, - IFLA_GENEVE_UDP_ZERO_CSUM6_RX, - IFLA_GENEVE_LABEL, - IFLA_GENEVE_TTL_INHERIT, - IFLA_GENEVE_DF, - __IFLA_GENEVE_MAX -}; -#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) - -enum ifla_geneve_df { - GENEVE_DF_UNSET = 0, - GENEVE_DF_SET, - GENEVE_DF_INHERIT, - __GENEVE_DF_END, - GENEVE_DF_MAX = __GENEVE_DF_END - 1, -}; - -/* PPP section */ -enum { - IFLA_PPP_UNSPEC, - IFLA_PPP_DEV_FD, - __IFLA_PPP_MAX -}; -#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) - -/* GTP section */ - -enum ifla_gtp_role { - GTP_ROLE_GGSN = 0, - GTP_ROLE_SGSN, -}; - -enum { - IFLA_GTP_UNSPEC, - IFLA_GTP_FD0, - IFLA_GTP_FD1, - IFLA_GTP_PDP_HASHSIZE, - IFLA_GTP_ROLE, - __IFLA_GTP_MAX, -}; -#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) - -/* Bonding section */ - -enum { - IFLA_BOND_UNSPEC, - IFLA_BOND_MODE, - IFLA_BOND_ACTIVE_SLAVE, - IFLA_BOND_MIIMON, - IFLA_BOND_UPDELAY, - IFLA_BOND_DOWNDELAY, - IFLA_BOND_USE_CARRIER, - IFLA_BOND_ARP_INTERVAL, - IFLA_BOND_ARP_IP_TARGET, - IFLA_BOND_ARP_VALIDATE, - IFLA_BOND_ARP_ALL_TARGETS, - IFLA_BOND_PRIMARY, - IFLA_BOND_PRIMARY_RESELECT, - IFLA_BOND_FAIL_OVER_MAC, - IFLA_BOND_XMIT_HASH_POLICY, - IFLA_BOND_RESEND_IGMP, - IFLA_BOND_NUM_PEER_NOTIF, - IFLA_BOND_ALL_SLAVES_ACTIVE, - IFLA_BOND_MIN_LINKS, - IFLA_BOND_LP_INTERVAL, - IFLA_BOND_PACKETS_PER_SLAVE, - IFLA_BOND_AD_LACP_RATE, - IFLA_BOND_AD_SELECT, - IFLA_BOND_AD_INFO, - IFLA_BOND_AD_ACTOR_SYS_PRIO, - IFLA_BOND_AD_USER_PORT_KEY, - IFLA_BOND_AD_ACTOR_SYSTEM, - IFLA_BOND_TLB_DYNAMIC_LB, - IFLA_BOND_PEER_NOTIF_DELAY, - __IFLA_BOND_MAX, -}; - -#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) - -enum { - IFLA_BOND_AD_INFO_UNSPEC, - IFLA_BOND_AD_INFO_AGGREGATOR, - IFLA_BOND_AD_INFO_NUM_PORTS, - IFLA_BOND_AD_INFO_ACTOR_KEY, - IFLA_BOND_AD_INFO_PARTNER_KEY, - IFLA_BOND_AD_INFO_PARTNER_MAC, - __IFLA_BOND_AD_INFO_MAX, -}; - -#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) - -enum { - IFLA_BOND_SLAVE_UNSPEC, - IFLA_BOND_SLAVE_STATE, - IFLA_BOND_SLAVE_MII_STATUS, - IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, - IFLA_BOND_SLAVE_PERM_HWADDR, - IFLA_BOND_SLAVE_QUEUE_ID, - IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, - IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, - IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, - __IFLA_BOND_SLAVE_MAX, -}; - -#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) - -/* SR-IOV virtual function management section */ - -enum { - IFLA_VF_INFO_UNSPEC, - IFLA_VF_INFO, - __IFLA_VF_INFO_MAX, -}; - -#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) - -enum { - IFLA_VF_UNSPEC, - IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, /* VLAN ID and QoS */ - IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ - IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ - IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ - IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ - IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query - * on/off switch - */ - IFLA_VF_STATS, /* network device statistics */ - IFLA_VF_TRUST, /* Trust VF */ - IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ - IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ - IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ - IFLA_VF_BROADCAST, /* VF broadcast */ - __IFLA_VF_MAX, -}; - -#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) - -struct ifla_vf_mac { - __u32 vf; - __u8 mac[32]; /* MAX_ADDR_LEN */ -}; - -struct ifla_vf_broadcast { - __u8 broadcast[32]; -}; - -struct ifla_vf_vlan { - __u32 vf; - __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ - __u32 qos; -}; - -enum { - IFLA_VF_VLAN_INFO_UNSPEC, - IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ - __IFLA_VF_VLAN_INFO_MAX, -}; - -#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) -#define MAX_VLAN_LIST_LEN 1 - -struct ifla_vf_vlan_info { - __u32 vf; - __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ - __u32 qos; - __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ -}; - -struct ifla_vf_tx_rate { - __u32 vf; - __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ -}; - -struct ifla_vf_rate { - __u32 vf; - __u32 min_tx_rate; /* Min Bandwidth in Mbps */ - __u32 max_tx_rate; /* Max Bandwidth in Mbps */ -}; - -struct ifla_vf_spoofchk { - __u32 vf; - __u32 setting; -}; - -struct ifla_vf_guid { - __u32 vf; - __u64 guid; -}; - -enum { - IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ - IFLA_VF_LINK_STATE_ENABLE, /* link always up */ - IFLA_VF_LINK_STATE_DISABLE, /* link always down */ - __IFLA_VF_LINK_STATE_MAX, -}; - -struct ifla_vf_link_state { - __u32 vf; - __u32 link_state; -}; - -struct ifla_vf_rss_query_en { - __u32 vf; - __u32 setting; -}; - -enum { - IFLA_VF_STATS_RX_PACKETS, - IFLA_VF_STATS_TX_PACKETS, - IFLA_VF_STATS_RX_BYTES, - IFLA_VF_STATS_TX_BYTES, - IFLA_VF_STATS_BROADCAST, - IFLA_VF_STATS_MULTICAST, - IFLA_VF_STATS_PAD, - IFLA_VF_STATS_RX_DROPPED, - IFLA_VF_STATS_TX_DROPPED, - __IFLA_VF_STATS_MAX, -}; - -#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) - -struct ifla_vf_trust { - __u32 vf; - __u32 setting; -}; - -/* VF ports management section - * - * Nested layout of set/get msg is: - * - * [IFLA_NUM_VF] - * [IFLA_VF_PORTS] - * [IFLA_VF_PORT] - * [IFLA_PORT_*], ... - * [IFLA_VF_PORT] - * [IFLA_PORT_*], ... - * ... - * [IFLA_PORT_SELF] - * [IFLA_PORT_*], ... - */ - -enum { - IFLA_VF_PORT_UNSPEC, - IFLA_VF_PORT, /* nest */ - __IFLA_VF_PORT_MAX, -}; - -#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1) - -enum { - IFLA_PORT_UNSPEC, - IFLA_PORT_VF, /* __u32 */ - IFLA_PORT_PROFILE, /* string */ - IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */ - IFLA_PORT_INSTANCE_UUID, /* binary UUID */ - IFLA_PORT_HOST_UUID, /* binary UUID */ - IFLA_PORT_REQUEST, /* __u8 */ - IFLA_PORT_RESPONSE, /* __u16, output only */ - __IFLA_PORT_MAX, -}; - -#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1) - -#define PORT_PROFILE_MAX 40 -#define PORT_UUID_MAX 16 -#define PORT_SELF_VF -1 - -enum { - PORT_REQUEST_PREASSOCIATE = 0, - PORT_REQUEST_PREASSOCIATE_RR, - PORT_REQUEST_ASSOCIATE, - PORT_REQUEST_DISASSOCIATE, -}; - -enum { - PORT_VDP_RESPONSE_SUCCESS = 0, - PORT_VDP_RESPONSE_INVALID_FORMAT, - PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES, - PORT_VDP_RESPONSE_UNUSED_VTID, - PORT_VDP_RESPONSE_VTID_VIOLATION, - PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION, - PORT_VDP_RESPONSE_OUT_OF_SYNC, - /* 0x08-0xFF reserved for future VDP use */ - PORT_PROFILE_RESPONSE_SUCCESS = 0x100, - PORT_PROFILE_RESPONSE_INPROGRESS, - PORT_PROFILE_RESPONSE_INVALID, - PORT_PROFILE_RESPONSE_BADSTATE, - PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES, - PORT_PROFILE_RESPONSE_ERROR, -}; - -struct ifla_port_vsi { - __u8 vsi_mgr_id; - __u8 vsi_type_id[3]; - __u8 vsi_type_version; - __u8 pad[3]; -}; - - -/* IPoIB section */ - -enum { - IFLA_IPOIB_UNSPEC, - IFLA_IPOIB_PKEY, - IFLA_IPOIB_MODE, - IFLA_IPOIB_UMCAST, - __IFLA_IPOIB_MAX -}; - -enum { - IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */ - IPOIB_MODE_CONNECTED = 1, /* using connected QPs */ -}; - -#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) - - -/* HSR section */ - -enum { - IFLA_HSR_UNSPEC, - IFLA_HSR_SLAVE1, - IFLA_HSR_SLAVE2, - IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ - IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ - IFLA_HSR_SEQ_NR, - IFLA_HSR_VERSION, /* HSR version */ - __IFLA_HSR_MAX, -}; - -#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) - -/* STATS section */ - -struct if_stats_msg { - __u8 family; - __u8 pad1; - __u16 pad2; - __u32 ifindex; - __u32 filter_mask; -}; - -/* A stats attribute can be netdev specific or a global stat. - * For netdev stats, lets use the prefix IFLA_STATS_LINK_* - */ -enum { - IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ - IFLA_STATS_LINK_64, - IFLA_STATS_LINK_XSTATS, - IFLA_STATS_LINK_XSTATS_SLAVE, - IFLA_STATS_LINK_OFFLOAD_XSTATS, - IFLA_STATS_AF_SPEC, - __IFLA_STATS_MAX, -}; - -#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) - -#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) - -/* These are embedded into IFLA_STATS_LINK_XSTATS: - * [IFLA_STATS_LINK_XSTATS] - * -> [LINK_XSTATS_TYPE_xxx] - * -> [rtnl link type specific attributes] - */ -enum { - LINK_XSTATS_TYPE_UNSPEC, - LINK_XSTATS_TYPE_BRIDGE, - LINK_XSTATS_TYPE_BOND, - __LINK_XSTATS_TYPE_MAX -}; -#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) - -/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ -enum { - IFLA_OFFLOAD_XSTATS_UNSPEC, - IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ - __IFLA_OFFLOAD_XSTATS_MAX -}; -#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) - -/* XDP section */ - -#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -#define XDP_FLAGS_SKB_MODE (1U << 1) -#define XDP_FLAGS_DRV_MODE (1U << 2) -#define XDP_FLAGS_HW_MODE (1U << 3) -#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ - XDP_FLAGS_DRV_MODE | \ - XDP_FLAGS_HW_MODE) -#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_MODES) - -/* These are stored into IFLA_XDP_ATTACHED on dump. */ -enum { - XDP_ATTACHED_NONE = 0, - XDP_ATTACHED_DRV, - XDP_ATTACHED_SKB, - XDP_ATTACHED_HW, - XDP_ATTACHED_MULTI, -}; - -enum { - IFLA_XDP_UNSPEC, - IFLA_XDP_FD, - IFLA_XDP_ATTACHED, - IFLA_XDP_FLAGS, - IFLA_XDP_PROG_ID, - IFLA_XDP_DRV_PROG_ID, - IFLA_XDP_SKB_PROG_ID, - IFLA_XDP_HW_PROG_ID, - __IFLA_XDP_MAX, -}; - -#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) - -enum { - IFLA_EVENT_NONE, - IFLA_EVENT_REBOOT, /* internal reset / reboot */ - IFLA_EVENT_FEATURES, /* change in offload features */ - IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ - IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ - IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ - IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ -}; - -/* tun section */ - -enum { - IFLA_TUN_UNSPEC, - IFLA_TUN_OWNER, - IFLA_TUN_GROUP, - IFLA_TUN_TYPE, - IFLA_TUN_PI, - IFLA_TUN_VNET_HDR, - IFLA_TUN_PERSIST, - IFLA_TUN_MULTI_QUEUE, - IFLA_TUN_NUM_QUEUES, - IFLA_TUN_NUM_DISABLED_QUEUES, - __IFLA_TUN_MAX, -}; - -#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) - -/* rmnet section */ - -#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) -#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) -#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) -#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) - -enum { - IFLA_RMNET_UNSPEC, - IFLA_RMNET_MUX_ID, - IFLA_RMNET_FLAGS, - __IFLA_RMNET_MAX, -}; - -#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) - -struct ifla_rmnet_flags { - __u32 flags; - __u32 mask; -}; - -#endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/src/contrib/libbpf/include/uapi/linux/if_xdp.h b/src/contrib/libbpf/include/uapi/linux/if_xdp.h deleted file mode 100644 index be328c593..000000000 --- a/src/contrib/libbpf/include/uapi/linux/if_xdp.h +++ /dev/null @@ -1,108 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * if_xdp: XDP socket user-space interface - * Copyright(c) 2018 Intel Corporation. - * - * Author(s): Björn Töpel <bjorn.topel@intel.com> - * Magnus Karlsson <magnus.karlsson@intel.com> - */ - -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H - -#include <linux/types.h> - -/* Options for the sxdp_flags field */ -#define XDP_SHARED_UMEM (1 << 0) -#define XDP_COPY (1 << 1) /* Force copy-mode */ -#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ -/* If this option is set, the driver might go sleep and in that case - * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be - * set. If it is set, the application need to explicitly wake up the - * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are - * running the driver and the application on the same core, you should - * use this option so that the kernel will yield to the user space - * application. - */ -#define XDP_USE_NEED_WAKEUP (1 << 3) - -/* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) - -struct sockaddr_xdp { - __u16 sxdp_family; - __u16 sxdp_flags; - __u32 sxdp_ifindex; - __u32 sxdp_queue_id; - __u32 sxdp_shared_umem_fd; -}; - -/* XDP_RING flags */ -#define XDP_RING_NEED_WAKEUP (1 << 0) - -struct xdp_ring_offset { - __u64 producer; - __u64 consumer; - __u64 desc; - __u64 flags; -}; - -struct xdp_mmap_offsets { - struct xdp_ring_offset rx; - struct xdp_ring_offset tx; - struct xdp_ring_offset fr; /* Fill */ - struct xdp_ring_offset cr; /* Completion */ -}; - -/* XDP socket options */ -#define XDP_MMAP_OFFSETS 1 -#define XDP_RX_RING 2 -#define XDP_TX_RING 3 -#define XDP_UMEM_REG 4 -#define XDP_UMEM_FILL_RING 5 -#define XDP_UMEM_COMPLETION_RING 6 -#define XDP_STATISTICS 7 -#define XDP_OPTIONS 8 - -struct xdp_umem_reg { - __u64 addr; /* Start of packet data area */ - __u64 len; /* Length of packet data area */ - __u32 chunk_size; - __u32 headroom; - __u32 flags; -}; - -struct xdp_statistics { - __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ - __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ - __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ -}; - -struct xdp_options { - __u32 flags; -}; - -/* Flags for the flags field of struct xdp_options */ -#define XDP_OPTIONS_ZEROCOPY (1 << 0) - -/* Pgoff for mmaping the rings */ -#define XDP_PGOFF_RX_RING 0 -#define XDP_PGOFF_TX_RING 0x80000000 -#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL -#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL - -/* Masks for unaligned chunks mode */ -#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48 -#define XSK_UNALIGNED_BUF_ADDR_MASK \ - ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) - -/* Rx/Tx descriptor */ -struct xdp_desc { - __u64 addr; - __u32 len; - __u32 options; -}; - -/* UMEM descriptor is __u64 */ - -#endif /* _LINUX_IF_XDP_H */ diff --git a/src/contrib/libbpf/include/uapi/linux/netlink.h b/src/contrib/libbpf/include/uapi/linux/netlink.h deleted file mode 100644 index 0a4d73317..000000000 --- a/src/contrib/libbpf/include/uapi/linux/netlink.h +++ /dev/null @@ -1,252 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__LINUX_NETLINK_H -#define _UAPI__LINUX_NETLINK_H - -#include <linux/kernel.h> -#include <linux/socket.h> /* for __kernel_sa_family_t */ -#include <linux/types.h> - -#define NETLINK_ROUTE 0 /* Routing/device hook */ -#define NETLINK_UNUSED 1 /* Unused number */ -#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ -#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ -#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ -#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ -#define NETLINK_XFRM 6 /* ipsec */ -#define NETLINK_SELINUX 7 /* SELinux event notifications */ -#define NETLINK_ISCSI 8 /* Open-iSCSI */ -#define NETLINK_AUDIT 9 /* auditing */ -#define NETLINK_FIB_LOOKUP 10 -#define NETLINK_CONNECTOR 11 -#define NETLINK_NETFILTER 12 /* netfilter subsystem */ -#define NETLINK_IP6_FW 13 -#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ -#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ -#define NETLINK_GENERIC 16 -/* leave room for NETLINK_DM (DM Events) */ -#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ -#define NETLINK_ECRYPTFS 19 -#define NETLINK_RDMA 20 -#define NETLINK_CRYPTO 21 /* Crypto layer */ -#define NETLINK_SMC 22 /* SMC monitoring */ - -#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG - -#define MAX_LINKS 32 - -struct sockaddr_nl { - __kernel_sa_family_t nl_family; /* AF_NETLINK */ - unsigned short nl_pad; /* zero */ - __u32 nl_pid; /* port ID */ - __u32 nl_groups; /* multicast groups mask */ -}; - -struct nlmsghdr { - __u32 nlmsg_len; /* Length of message including header */ - __u16 nlmsg_type; /* Message content */ - __u16 nlmsg_flags; /* Additional flags */ - __u32 nlmsg_seq; /* Sequence number */ - __u32 nlmsg_pid; /* Sending process port ID */ -}; - -/* Flags values */ - -#define NLM_F_REQUEST 0x01 /* It is request message. */ -#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ -#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ -#define NLM_F_ECHO 0x08 /* Echo this request */ -#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ -#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ - -/* Modifiers to GET request */ -#define NLM_F_ROOT 0x100 /* specify tree root */ -#define NLM_F_MATCH 0x200 /* return all matching */ -#define NLM_F_ATOMIC 0x400 /* atomic GET */ -#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) - -/* Modifiers to NEW request */ -#define NLM_F_REPLACE 0x100 /* Override existing */ -#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ -#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ -#define NLM_F_APPEND 0x800 /* Add to end of list */ - -/* Modifiers to DELETE request */ -#define NLM_F_NONREC 0x100 /* Do not delete recursively */ - -/* Flags for ACK message */ -#define NLM_F_CAPPED 0x100 /* request was capped */ -#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ - -/* - 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL - 4.4BSD CHANGE NLM_F_REPLACE - - True CHANGE NLM_F_CREATE|NLM_F_REPLACE - Append NLM_F_CREATE - Check NLM_F_EXCL - */ - -#define NLMSG_ALIGNTO 4U -#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) -#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) -#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) -#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) -#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) -#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ - (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) -#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ - (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ - (nlh)->nlmsg_len <= (len)) -#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) - -#define NLMSG_NOOP 0x1 /* Nothing. */ -#define NLMSG_ERROR 0x2 /* Error */ -#define NLMSG_DONE 0x3 /* End of a dump */ -#define NLMSG_OVERRUN 0x4 /* Data lost */ - -#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ - -struct nlmsgerr { - int error; - struct nlmsghdr msg; - /* - * followed by the message contents unless NETLINK_CAP_ACK was set - * or the ACK indicates success (error == 0) - * message length is aligned with NLMSG_ALIGN() - */ - /* - * followed by TLVs defined in enum nlmsgerr_attrs - * if NETLINK_EXT_ACK was set - */ -}; - -/** - * enum nlmsgerr_attrs - nlmsgerr attributes - * @NLMSGERR_ATTR_UNUSED: unused - * @NLMSGERR_ATTR_MSG: error message string (string) - * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original - * message, counting from the beginning of the header (u32) - * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to - * be used - in the success case - to identify a created - * object or operation or similar (binary) - * @__NLMSGERR_ATTR_MAX: number of attributes - * @NLMSGERR_ATTR_MAX: highest attribute number - */ -enum nlmsgerr_attrs { - NLMSGERR_ATTR_UNUSED, - NLMSGERR_ATTR_MSG, - NLMSGERR_ATTR_OFFS, - NLMSGERR_ATTR_COOKIE, - - __NLMSGERR_ATTR_MAX, - NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 -}; - -#define NETLINK_ADD_MEMBERSHIP 1 -#define NETLINK_DROP_MEMBERSHIP 2 -#define NETLINK_PKTINFO 3 -#define NETLINK_BROADCAST_ERROR 4 -#define NETLINK_NO_ENOBUFS 5 -#ifndef __KERNEL__ -#define NETLINK_RX_RING 6 -#define NETLINK_TX_RING 7 -#endif -#define NETLINK_LISTEN_ALL_NSID 8 -#define NETLINK_LIST_MEMBERSHIPS 9 -#define NETLINK_CAP_ACK 10 -#define NETLINK_EXT_ACK 11 -#define NETLINK_GET_STRICT_CHK 12 - -struct nl_pktinfo { - __u32 group; -}; - -struct nl_mmap_req { - unsigned int nm_block_size; - unsigned int nm_block_nr; - unsigned int nm_frame_size; - unsigned int nm_frame_nr; -}; - -struct nl_mmap_hdr { - unsigned int nm_status; - unsigned int nm_len; - __u32 nm_group; - /* credentials */ - __u32 nm_pid; - __u32 nm_uid; - __u32 nm_gid; -}; - -#ifndef __KERNEL__ -enum nl_mmap_status { - NL_MMAP_STATUS_UNUSED, - NL_MMAP_STATUS_RESERVED, - NL_MMAP_STATUS_VALID, - NL_MMAP_STATUS_COPY, - NL_MMAP_STATUS_SKIP, -}; - -#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO -#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) -#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) -#endif - -#define NET_MAJOR 36 /* Major 36 is reserved for networking */ - -enum { - NETLINK_UNCONNECTED = 0, - NETLINK_CONNECTED, -}; - -/* - * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> - * +---------------------+- - -+- - - - - - - - - -+- - -+ - * | Header | Pad | Payload | Pad | - * | (struct nlattr) | ing | | ing | - * +---------------------+- - -+- - - - - - - - - -+- - -+ - * <-------------- nlattr->nla_len --------------> - */ - -struct nlattr { - __u16 nla_len; - __u16 nla_type; -}; - -/* - * nla_type (16 bits) - * +---+---+-------------------------------+ - * | N | O | Attribute Type | - * +---+---+-------------------------------+ - * N := Carries nested attributes - * O := Payload stored in network byte order - * - * Note: The N and O flag are mutually exclusive. - */ -#define NLA_F_NESTED (1 << 15) -#define NLA_F_NET_BYTEORDER (1 << 14) -#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) - -#define NLA_ALIGNTO 4 -#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) -#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) - -/* Generic 32 bitflags attribute content sent to the kernel. - * - * The value is a bitmap that defines the values being set - * The selector is a bitmask that defines which value is legit - * - * Examples: - * value = 0x0, and selector = 0x1 - * implies we are selecting bit 1 and we want to set its value to 0. - * - * value = 0x2, and selector = 0x2 - * implies we are selecting bit 2 and we want to set its value to 1. - * - */ -struct nla_bitfield32 { - __u32 value; - __u32 selector; -}; - -#endif /* _UAPI__LINUX_NETLINK_H */ diff --git a/src/contrib/licenses/LGPL-2.1 b/src/contrib/licenses/LGPL-2.1 deleted file mode 100644 index 27bb4342a..000000000 --- a/src/contrib/licenses/LGPL-2.1 +++ /dev/null @@ -1,503 +0,0 @@ -Valid-License-Identifier: LGPL-2.1 -Valid-License-Identifier: LGPL-2.1+ -SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html -Usage-Guide: - To use this license in source code, put one of the following SPDX - tag/value pairs into a comment according to the placement - guidelines in the licensing rules documentation. - For 'GNU Lesser General Public License (LGPL) version 2.1 only' use: - SPDX-License-Identifier: LGPL-2.1 - For 'GNU Lesser General Public License (LGPL) version 2.1 or any later - version' use: - SPDX-License-Identifier: LGPL-2.1+ -License-Text: - -GNU LESSER GENERAL PUBLIC LICENSE -Version 2.1, February 1999 - -Copyright (C) 1991, 1999 Free Software Foundation, Inc. -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -Everyone is permitted to copy and distribute verbatim copies of this -license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts as -the successor of the GNU Library Public License, version 2, hence the -version number 2.1.] - -Preamble - -The licenses for most software are designed to take away your freedom to -share and change it. By contrast, the GNU General Public Licenses are -intended to guarantee your freedom to share and change free software--to -make sure the software is free for all its users. - -This license, the Lesser General Public License, applies to some specially -designated software packages--typically libraries--of the Free Software -Foundation and other authors who decide to use it. You can use it too, but -we suggest you first think carefully about whether this license or the -ordinary General Public License is the better strategy to use in any -particular case, based on the explanations below. - -When we speak of free software, we are referring to freedom of use, not -price. Our General Public Licenses are designed to make sure that you have -the freedom to distribute copies of free software (and charge for this -service if you wish); that you receive source code or can get it if you -want it; that you can change the software and use pieces of it in new free -programs; and that you are informed that you can do these things. - -To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for you if -you distribute copies of the library or if you modify it. - -For example, if you distribute copies of the library, whether gratis or for -a fee, you must give the recipients all the rights that we gave you. You -must make sure that they, too, receive or can get the source code. If you -link other code with the library, you must provide complete object files to -the recipients, so that they can relink them with the library after making -changes to the library and recompiling it. And you must show them these -terms so they know their rights. - -We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - -To protect each distributor, we want to make it very clear that there is no -warranty for the free library. Also, if the library is modified by someone -else and passed on, the recipients should know that what they have is not -the original version, so that the original author's reputation will not be -affected by problems that might be introduced by others. - -Finally, software patents pose a constant threat to the existence of any -free program. We wish to make sure that a company cannot effectively -restrict the users of a free program by obtaining a restrictive license -from a patent holder. Therefore, we insist that any patent license obtained -for a version of the library must be consistent with the full freedom of -use specified in this license. - -Most GNU software, including some libraries, is covered by the ordinary GNU -General Public License. This license, the GNU Lesser General Public -License, applies to certain designated libraries, and is quite different -from the ordinary General Public License. We use this license for certain -libraries in order to permit linking those libraries into non-free -programs. - -When a program is linked with a library, whether statically or using a -shared library, the combination of the two is legally speaking a combined -work, a derivative of the original library. The ordinary General Public -License therefore permits such linking only if the entire combination fits -its criteria of freedom. The Lesser General Public License permits more lax -criteria for linking other code with the library. - -We call this license the "Lesser" General Public License because it does -Less to protect the user's freedom than the ordinary General Public -License. It also provides other free software developers Less of an -advantage over competing non-free programs. These disadvantages are the -reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - -For example, on rare occasions, there may be a special need to encourage -the widest possible use of a certain library, so that it becomes a de-facto -standard. To achieve this, non-free programs must be allowed to use the -library. A more frequent case is that a free library does the same job as -widely used non-free libraries. In this case, there is little to gain by -limiting the free library to free software only, so we use the Lesser -General Public License. - -In other cases, permission to use a particular library in non-free programs -enables a greater number of people to use a large body of free -software. For example, permission to use the GNU C Library in non-free -programs enables many more people to use the whole GNU operating system, as -well as its variant, the GNU/Linux operating system. - -Although the Lesser General Public License is Less protective of the users' -freedom, it does ensure that the user of a program that is linked with the -Library has the freedom and the wherewithal to run that program using a -modified version of the Library. - -The precise terms and conditions for copying, distribution and modification -follow. Pay close attention to the difference between a "work based on the -library" and a "work that uses the library". The former contains code -derived from the library, whereas the latter must be combined with the -library in order to run. - -TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - -0. This License Agreement applies to any software library or other program - which contains a notice placed by the copyright holder or other - authorized party saying it may be distributed under the terms of this - Lesser General Public License (also called "this License"). Each - licensee is addressed as "you". - - A "library" means a collection of software functions and/or data - prepared so as to be conveniently linked with application programs - (which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work which - has been distributed under these terms. A "work based on the Library" - means either the Library or any derivative work under copyright law: - that is to say, a work containing the Library or a portion of it, either - verbatim or with modifications and/or translated straightforwardly into - another language. (Hereinafter, translation is included without - limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for making - modifications to it. For a library, complete source code means all the - source code for all modules it contains, plus any associated interface - definition files, plus the scripts used to control compilation and - installation of the library. - - Activities other than copying, distribution and modification are not - covered by this License; they are outside its scope. The act of running - a program using the Library is not restricted, and output from such a - program is covered only if its contents constitute a work based on the - Library (independent of the use of the Library in a tool for writing - it). Whether that is true depends on what the Library does and what the - program that uses the Library does. - -1. You may copy and distribute verbatim copies of the Library's complete - source code as you receive it, in any medium, provided that you - conspicuously and appropriately publish on each copy an appropriate - copyright notice and disclaimer of warranty; keep intact all the notices - that refer to this License and to the absence of any warranty; and - distribute a copy of this License along with the Library. - - You may charge a fee for the physical act of transferring a copy, and - you may at your option offer warranty protection in exchange for a fee. - -2. You may modify your copy or copies of the Library or any portion of it, - thus forming a work based on the Library, and copy and distribute such - modifications or work under the terms of Section 1 above, provided that - you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices stating - that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no charge to - all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a table - of data to be supplied by an application program that uses the - facility, other than as an argument passed when the facility is - invoked, then you must make a good faith effort to ensure that, in - the event an application does not supply such function or table, the - facility still operates, and performs whatever part of its purpose - remains meaningful. - - (For example, a function in a library to compute square roots has a - purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must be - optional: if the application does not supply it, the square root - function must still compute square roots.) - - These requirements apply to the modified work as a whole. If - identifiable sections of that work are not derived from the Library, and - can be reasonably considered independent and separate works in - themselves, then this License, and its terms, do not apply to those - sections when you distribute them as separate works. But when you - distribute the same sections as part of a whole which is a work based on - the Library, the distribution of the whole must be on the terms of this - License, whose permissions for other licensees extend to the entire - whole, and thus to each and every part regardless of who wrote it. - - Thus, it is not the intent of this section to claim rights or contest - your rights to work written entirely by you; rather, the intent is to - exercise the right to control the distribution of derivative or - collective works based on the Library. - - In addition, mere aggregation of another work not based on the Library - with the Library (or with a work based on the Library) on a volume of a - storage or distribution medium does not bring the other work under the - scope of this License. - -3. You may opt to apply the terms of the ordinary GNU General Public - License instead of this License to a given copy of the Library. To do - this, you must alter all the notices that refer to this License, so that - they refer to the ordinary GNU General Public License, version 2, - instead of to this License. (If a newer version than version 2 of the - ordinary GNU General Public License has appeared, then you can specify - that version instead if you wish.) Do not make any other change in these - notices. - - Once this change is made in a given copy, it is irreversible for that - copy, so the ordinary GNU General Public License applies to all - subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of the - Library into a program that is not a library. - -4. You may copy and distribute the Library (or a portion or derivative of - it, under Section 2) in object code or executable form under the terms - of Sections 1 and 2 above provided that you accompany it with the - complete corresponding machine-readable source code, which must be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange. - - If distribution of object code is made by offering access to copy from a - designated place, then offering equivalent access to copy the source - code from the same place satisfies the requirement to distribute the - source code, even though third parties are not compelled to copy the - source along with the object code. - -5. A program that contains no derivative of any portion of the Library, but - is designed to work with the Library by being compiled or linked with - it, is called a "work that uses the Library". Such a work, in isolation, - is not a derivative work of the Library, and therefore falls outside the - scope of this License. - - However, linking a "work that uses the Library" with the Library creates - an executable that is a derivative of the Library (because it contains - portions of the Library), rather than a "work that uses the - library". The executable is therefore covered by this License. Section 6 - states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file - that is part of the Library, the object code for the work may be a - derivative work of the Library even though the source code is - not. Whether this is true is especially significant if the work can be - linked without the Library, or if the work is itself a library. The - threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data structure - layouts and accessors, and small macros and small inline functions (ten - lines or less in length), then the use of the object file is - unrestricted, regardless of whether it is legally a derivative - work. (Executables containing this object code plus portions of the - Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may - distribute the object code for the work under the terms of Section - 6. Any executables containing that work also fall under Section 6, - whether or not they are linked directly with the Library itself. - -6. As an exception to the Sections above, you may also combine or link a - "work that uses the Library" with the Library to produce a work - containing portions of the Library, and distribute that work under terms - of your choice, provided that the terms permit modification of the work - for the customer's own use and reverse engineering for debugging such - modifications. - - You must give prominent notice with each copy of the work that the - Library is used in it and that the Library and its use are covered by - this License. You must supply a copy of this License. If the work during - execution displays copyright notices, you must include the copyright - notice for the Library among them, as well as a reference directing the - user to the copy of this License. Also, you must do one of these things: - - a) Accompany the work with the complete corresponding machine-readable - source code for the Library including whatever changes were used in - the work (which must be distributed under Sections 1 and 2 above); - and, if the work is an executable linked with the Library, with the - complete machine-readable "work that uses the Library", as object - code and/or source code, so that the user can modify the Library and - then relink to produce a modified executable containing the modified - Library. (It is understood that the user who changes the contents of - definitions files in the Library will not necessarily be able to - recompile the application to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a copy - of the library already present on the user's computer system, rather - than copying library functions into the executable, and (2) will - operate properly with a modified version of the library, if the user - installs one, as long as the modified version is interface-compatible - with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at least three - years, to give the same user the materials specified in Subsection - 6a, above, for a charge no more than the cost of performing this - distribution. - - d) If distribution of the work is made by offering access to copy from a - designated place, offer equivalent access to copy the above specified - materials from the same place. - - e) Verify that the user has already received a copy of these materials - or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the Library" - must include any data and utility programs needed for reproducing the - executable from it. However, as a special exception, the materials to be - distributed need not include anything that is normally distributed (in - either source or binary form) with the major components (compiler, - kernel, and so on) of the operating system on which the executable runs, - unless that component itself accompanies the executable. - - It may happen that this requirement contradicts the license restrictions - of other proprietary libraries that do not normally accompany the - operating system. Such a contradiction means you cannot use both them - and the Library together in an executable that you distribute. - -7. You may place library facilities that are a work based on the Library - side-by-side in a single library together with other library facilities - not covered by this License, and distribute such a combined library, - provided that the separate distribution of the work based on the Library - and of the other library facilities is otherwise permitted, and provided - that you do these two things: - - a) Accompany the combined library with a copy of the same work based on - the Library, uncombined with any other library facilities. This must - be distributed under the terms of the Sections above. - - b) Give prominent notice with the combined library of the fact that part - of it is a work based on the Library, and explaining where to find - the accompanying uncombined form of the same work. - -8. You may not copy, modify, sublicense, link with, or distribute the - Library except as expressly provided under this License. Any attempt - otherwise to copy, modify, sublicense, link with, or distribute the - Library is void, and will automatically terminate your rights under this - License. However, parties who have received copies, or rights, from you - under this License will not have their licenses terminated so long as - such parties remain in full compliance. - -9. You are not required to accept this License, since you have not signed - it. However, nothing else grants you permission to modify or distribute - the Library or its derivative works. These actions are prohibited by law - if you do not accept this License. Therefore, by modifying or - distributing the Library (or any work based on the Library), you - indicate your acceptance of this License to do so, and all its terms and - conditions for copying, distributing or modifying the Library or works - based on it. - -10. Each time you redistribute the Library (or any work based on the - Library), the recipient automatically receives a license from the - original licensor to copy, distribute, link with or modify the Library - subject to these terms and conditions. You may not impose any further - restrictions on the recipients' exercise of the rights granted - herein. You are not responsible for enforcing compliance by third - parties with this License. - -11. If, as a consequence of a court judgment or allegation of patent - infringement or for any other reason (not limited to patent issues), - conditions are imposed on you (whether by court order, agreement or - otherwise) that contradict the conditions of this License, they do not - excuse you from the conditions of this License. If you cannot - distribute so as to satisfy simultaneously your obligations under this - License and any other pertinent obligations, then as a consequence you - may not distribute the Library at all. For example, if a patent license - would not permit royalty-free redistribution of the Library by all - those who receive copies directly or indirectly through you, then the - only way you could satisfy both it and this License would be to refrain - entirely from distribution of the Library. - - If any portion of this section is held invalid or unenforceable under - any particular circumstance, the balance of the section is intended to - apply, and the section as a whole is intended to apply in other - circumstances. - - It is not the purpose of this section to induce you to infringe any - patents or other property right claims or to contest validity of any - such claims; this section has the sole purpose of protecting the - integrity of the free software distribution system which is implemented - by public license practices. Many people have made generous - contributions to the wide range of software distributed through that - system in reliance on consistent application of that system; it is up - to the author/donor to decide if he or she is willing to distribute - software through any other system and a licensee cannot impose that - choice. - - This section is intended to make thoroughly clear what is believed to - be a consequence of the rest of this License. - -12. If the distribution and/or use of the Library is restricted in certain - countries either by patents or by copyrighted interfaces, the original - copyright holder who places the Library under this License may add an - explicit geographical distribution limitation excluding those - countries, so that distribution is permitted only in or among countries - not thus excluded. In such case, this License incorporates the - limitation as if written in the body of this License. - -13. The Free Software Foundation may publish revised and/or new versions of - the Lesser General Public License from time to time. Such new versions - will be similar in spirit to the present version, but may differ in - detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the Library - specifies a version number of this License which applies to it and "any - later version", you have the option of following the terms and - conditions either of that version or of any later version published by - the Free Software Foundation. If the Library does not specify a license - version number, you may choose any version ever published by the Free - Software Foundation. - -14. If you wish to incorporate parts of the Library into other free - programs whose distribution conditions are incompatible with these, - write to the author to ask for permission. For software which is - copyrighted by the Free Software Foundation, write to the Free Software - Foundation; we sometimes make exceptions for this. Our decision will be - guided by the two goals of preserving the free status of all - derivatives of our free software and of promoting the sharing and reuse - of software generally. - -NO WARRANTY - -15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY - FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN - OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES - PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER - EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE - ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH - YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL - NECESSARY SERVICING, REPAIR OR CORRECTION. - -16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING - WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR - REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR - DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL - DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY - (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED - INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF - THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR - OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -END OF TERMS AND CONDITIONS - -How to Apply These Terms to Your New Libraries - -If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - -To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - -one line to give the library's name and an idea of what it does. -Copyright (C) year name of author - -This library is free software; you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at -your option) any later version. - -This library is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License -for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this library; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add -information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - -Yoyodyne, Inc., hereby disclaims all copyright interest in -the library `Frob' (a library for tweaking knobs) written -by James Random Hacker. - -signature of Ty Coon, 1 April 1990 -Ty Coon, President of Vice -That's all there is to it! diff --git a/src/libknot/Makefile.inc b/src/libknot/Makefile.inc index 73a4d6e74..32ea26bce 100755 --- a/src/libknot/Makefile.inc +++ b/src/libknot/Makefile.inc @@ -92,9 +92,6 @@ libknot_la_SOURCES = \ libknot/yparser/yptrafo.c \ libknot/xdp/tcp_iobuf.c -if EMBEDDED_LIBBPF -libknot_la_LIBADD += $(libembbpf_LIBS) -endif EMBEDDED_LIBBPF if ENABLE_XDP libknot_la_CPPFLAGS += $(libbpf_CFLAGS) libknot_la_LIBADD += $(libbpf_LIBS) diff --git a/src/libknot/xdp/bpf-kernel.c b/src/libknot/xdp/bpf-kernel.c index 229449ab2..1576ac295 100644 --- a/src/libknot/xdp/bpf-kernel.c +++ b/src/libknot/xdp/bpf-kernel.c @@ -14,17 +14,17 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ +#include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> #include "bpf-consts.h" -#include "../../contrib/libbpf/include/uapi/linux/bpf.h" -#include "../../contrib/libbpf/bpf/bpf_endian.h" -#include "../../contrib/libbpf/bpf/bpf_helpers.h" /* Don't fragment flag. */ #define IP_DF 0x4000 |