summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Synacek <jan.synacek@gmail.com>2017-09-26 16:07:34 +0200
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2017-09-26 16:07:34 +0200
commit0cde65e263643cf00a6b29c3178c9a73724da812 (patch)
treedd636232aeaba10d01545c145594f3df54a82a49
parentMerge pull request #6917 from keszybz/restore-some-tests (diff)
parentbasic/log: fix return value from log_struct_iovec_internal() (diff)
downloadsystemd-0cde65e263643cf00a6b29c3178c9a73724da812.tar.xz
systemd-0cde65e263643cf00a6b29c3178c9a73724da812.zip
test-cpu-set-util.c: fix typo in comment (#6916)
-rw-r--r--ENVIRONMENT.md14
-rw-r--r--TODO9
-rw-r--r--man/systemd-system.conf.xml15
-rw-r--r--man/systemd.resource-control.xml117
-rw-r--r--man/systemd.slice.xml25
-rw-r--r--man/systemd.special.xml37
-rw-r--r--meson.build2
-rwxr-xr-xmkosi.build2
-rw-r--r--src/basic/bpf-program.c183
-rw-r--r--src/basic/bpf-program.h55
-rw-r--r--src/basic/cgroup-util.c9
-rw-r--r--src/basic/in-addr-util.c132
-rw-r--r--src/basic/in-addr-util.h15
-rw-r--r--src/basic/io-util.h13
-rw-r--r--src/basic/journal-importer.c5
-rw-r--r--src/basic/log.c113
-rw-r--r--src/basic/log.h14
-rw-r--r--src/basic/meson.build20
-rw-r--r--src/basic/missing_syscall.h32
-rw-r--r--src/basic/rlimit-util.c3
-rw-r--r--src/basic/socket-label.c2
-rw-r--r--src/core/bpf-firewall.c680
-rw-r--r--src/core/bpf-firewall.h32
-rw-r--r--src/core/cgroup.c362
-rw-r--r--src/core/cgroup.h25
-rw-r--r--src/core/dbus-cgroup.c181
-rw-r--r--src/core/dbus-unit.c38
-rw-r--r--src/core/dynamic-user.c17
-rw-r--r--src/core/execute.c11
-rw-r--r--src/core/ip-address-access.c217
-rw-r--r--src/core/ip-address-access.h38
-rw-r--r--src/core/job.c11
-rw-r--r--src/core/load-fragment-gperf.gperf.m43
-rw-r--r--src/core/main.c31
-rw-r--r--src/core/manager.c7
-rw-r--r--src/core/manager.h2
-rw-r--r--src/core/meson.build178
-rw-r--r--src/core/mount.c10
-rw-r--r--src/core/mount.h2
-rw-r--r--src/core/scope.c3
-rw-r--r--src/core/service.c13
-rw-r--r--src/core/service.h2
-rw-r--r--src/core/show-status.c14
-rw-r--r--src/core/slice.c3
-rw-r--r--src/core/socket.c268
-rw-r--r--src/core/socket.h2
-rw-r--r--src/core/swap.c10
-rw-r--r--src/core/swap.h2
-rw-r--r--src/core/system.conf3
-rw-r--r--src/core/unit.c284
-rw-r--r--src/core/unit.h27
-rw-r--r--src/coredump/coredump.c30
-rw-r--r--src/journal/journal-send.c70
-rw-r--r--src/journal/journald-audit.c14
-rw-r--r--src/journal/journald-console.c22
-rw-r--r--src/journal/journald-kmsg.c56
-rw-r--r--src/journal/journald-native.c2
-rw-r--r--src/journal/journald-server.c30
-rw-r--r--src/journal/journald-stream.c15
-rw-r--r--src/journal/journald-syslog.c32
-rw-r--r--src/libsystemd-network/sd-dhcp-lease.c4
-rw-r--r--src/libsystemd-network/sd-dhcp-server.c2
-rw-r--r--src/network/networkd-address.c2
-rw-r--r--src/network/networkd-dhcp4.c6
-rw-r--r--src/nspawn/nspawn.c25
-rw-r--r--src/shared/bus-unit-util.c133
-rw-r--r--src/shared/firewall-util.c4
-rw-r--r--src/shared/linux/bpf.h673
-rw-r--r--src/shared/linux/bpf_common.h55
-rw-r--r--src/shared/linux/libbpf.h198
-rw-r--r--src/systemctl/systemctl.c17
-rw-r--r--src/systemd/sd-messages.h3
-rw-r--r--src/test/meson.build15
-rw-r--r--src/test/test-bpf.c162
-rw-r--r--src/test/test-fileio.c6
-rw-r--r--src/test/test-in-addr-util.c75
76 files changed, 4381 insertions, 563 deletions
diff --git a/ENVIRONMENT.md b/ENVIRONMENT.md
index e542d4ec6f..4ae561a892 100644
--- a/ENVIRONMENT.md
+++ b/ENVIRONMENT.md
@@ -64,3 +64,17 @@ installed systemd tests:
* `$SYSTEMD_TEST_DATA` — override the location of test data. This is useful if
a test executable is moved to an arbitrary location.
+
+nss-systemd:
+
+* `$SYSTEMD_NSS_BYPASS_SYNTHETIC=1` — if set, `nss-systemd` won't synthesize
+ user/group records for the `root` and `nobody` users if they are missing from
+ `/etc/passwd`.
+
+* `$SYSTEMD_NSS_DYNAMIC_BYPASS=1` — if set, `nss-systemd` won't return
+ user/group records for dynamically registered service users (i.e. users
+ registered through `DynamicUser=1`).
+
+* `$SYSTEMD_NSS_BYPASS_BUS=1` — if set, `nss-systemd` won't use D-Bus to do
+ dynamic user lookups. This is primarily useful to make `nss-systemd` work
+ safely from within `dbus-daemon`.
diff --git a/TODO b/TODO
index 2de99823a8..b638ab95c8 100644
--- a/TODO
+++ b/TODO
@@ -26,6 +26,15 @@ Features:
* replace all uses of fgets() + LINE_MAX by read_line()
+* fix logging in execute.c: extend log.c to have an optional mode where
+ log_open() is implicitly done before each log line and log_close() right
+ after. This way we don't have open fds around but logs will still
+ work. Because it is slow this mode should used exclusively in the execute.c
+ case.
+
+* set IPAddressDeny=any on all services that shouldn't do networking (possibly
+ combined with IPAddressAllow=localhost).
+
* dissect: when we discover squashfs, don't claim we had a "writable" partition
in systemd-dissect
diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
index 336c7a5fd1..81f1b1ef8d 100644
--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@@ -319,17 +319,14 @@
<term><varname>DefaultBlockIOAccounting=</varname></term>
<term><varname>DefaultMemoryAccounting=</varname></term>
<term><varname>DefaultTasksAccounting=</varname></term>
+ <term><varname>DefaultIPAccounting=</varname></term>
- <listitem><para>Configure the default resource accounting
- settings, as configured per-unit by
- <varname>CPUAccounting=</varname>,
- <varname>BlockIOAccounting=</varname>,
- <varname>MemoryAccounting=</varname> and
- <varname>TasksAccounting=</varname>. See
+ <listitem><para>Configure the default resource accounting settings, as configured per-unit by
+ <varname>CPUAccounting=</varname>, <varname>BlockIOAccounting=</varname>, <varname>MemoryAccounting=</varname>,
+ <varname>TasksAccounting=</varname> and <varname>IPAccounting=</varname>. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
- for details on the per-unit
- settings. <varname>DefaultTasksAccounting=</varname> defaults
- to on, the other three settings to off.</para></listitem>
+ for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to on, the other
+ four settings to off.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index bb69599f99..0c0c91608a 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -481,6 +481,123 @@
</varlistentry>
<varlistentry>
+ <term><varname>IPAccounting=</varname></term>
+
+ <listitem>
+ <para>Takes a boolean argument. If true, turns on IPv4 and IPv6 network traffic accounting for packets sent
+ or received by the unit. When this option is turned on, all IPv4 and IPv6 sockets created by any process of
+ the unit are accounted for. When this option is used in socket units, it applies to all IPv4 and IPv6 sockets
+ associated with it (including both listening and connection sockets where this applies). Note that for
+ socket-activated services, this configuration setting and the accounting data of the service unit and the
+ socket unit are kept separate, and displayed separately. No propagation of the setting and the collected
+ statistics is done, in either direction. Moreover, any traffic sent or received on any of the socket unit's
+ sockets is accounted to the socket unit — and never to the service unit it might have activated, even if the
+ socket is used by it. Note that IP accounting is currently not supported for slice units, and enabling this
+ option for them has no effect. The system default for this setting may be controlled with
+ <varname>DefaultIPAccounting=</varname> in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>IPAddressAllow=<replaceable>ADDDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
+ <term><varname>IPAddressDeny=<replaceable>ADDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
+
+ <listitem>
+ <para>Turn on address range network traffic filtering for packets sent and received over AF_INET and AF_INET6
+ sockets. Both directives take a space separated list of IPv4 or IPv6 addresses, each optionally suffixed
+ with an address prefix length (separated by a <literal>/</literal> character). If the latter is omitted, the
+ address is considered a host address, i.e. the prefix covers the whole address (32 for IPv4, 128 for IPv6).
+ </para>
+
+ <para>The access lists configured with this option are applied to all sockets created by processes of this
+ unit (or in the case of socket units, associated with it). The lists are implicitly combined with any lists
+ configured for any of the parent slice units this unit might be a member of. By default all access lists are
+ empty. When configured the lists are enforced as follows:</para>
+
+ <itemizedlist>
+ <listitem><para>Access will be granted in case its destination/source address matches any entry in the
+ <varname>IPAddressAllow=</varname> setting.</para></listitem>
+
+ <listitem><para>Otherwise, access will be denied in case its destination/source address matches any entry
+ in the <varname>IPAddressDeny=</varname> setting.</para></listitem>
+
+ <listitem><para>Otherwise, access will be granted.</para></listitem>
+ </itemizedlist>
+
+ <para>In order to implement a whitelisting IP firewall, it is recommended to use a
+ <varname>IPAddressDeny=</varname><constant>any</constant> setting on an upper-level slice unit (such as the
+ root slice <filename>-.slice</filename> or the slice containing all system services
+ <filename>system.slice</filename> – see
+ <citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+ details on these slice units), plus individual per-service <varname>IPAddressAllow=</varname> lines
+ permitting network access to relevant services, and only them.</para>
+
+ <para>Note that for socket-activated services, the IP access list configured on the socket unit applies to
+ all sockets associated with it directly, but not to any sockets created by the ultimately activated services
+ for it. Conversely, the IP access list configured for the service is not applied to any sockets passed into
+ the service via socket activation. Thus, it is usually a good idea, to replicate the IP access lists on both
+ the socket and the service unit, however it often makes sense to maintain one list more open and the other
+ one more restricted, depending on the usecase.</para>
+
+ <para>If these settings are used multiple times in the same unit the specified lists are combined. If an
+ empty string is assigned to these settings the specific access list is reset and all previous settings undone.</para>
+
+ <para>In place of explicit IPv4 or IPv6 address and prefix length specifications a small set of symbolic
+ names may be used. The following names are defined:</para>
+
+ <table>
+ <title>Special address/network names</title>
+
+ <tgroup cols='3'>
+ <colspec colname='name'/>
+ <colspec colname='definition'/>
+ <colspec colname='meaning'/>
+
+ <thead>
+ <row>
+ <entry>Symbolic Name</entry>
+ <entry>Definition</entry>
+ <entry>Meaning</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry><constant>any</constant></entry>
+ <entry>0.0.0.0/0 ::/0</entry>
+ <entry>Any host</entry>
+ </row>
+
+ <row>
+ <entry><constant>localhost</constant></entry>
+ <entry>127.0.0.0/8 ::1/128</entry>
+ <entry>All addresses on the local loopback</entry>
+ </row>
+
+ <row>
+ <entry><constant>link-local</constant></entry>
+ <entry>169.254.0.0/16 fe80::/64</entry>
+ <entry>All link-local IP addresses</entry>
+ </row>
+
+ <row>
+ <entry><constant>multicast</constant></entry>
+ <entry>224.0.0.0/4 ff00::/8</entry>
+ <entry>All IP multicasting addresses</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>Note that these settings might not be supported on some systems (for example if eBPF control group
+ support is not enabled in the underlying kernel or container manager). These settings will have no effect in
+ that case. If compatibility with such systems is desired it is hence recommended to not exclusively rely on
+ them for IP security.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>DeviceAllow=</varname></term>
<listitem>
diff --git a/man/systemd.slice.xml b/man/systemd.slice.xml
index 67f7a93448..c46ba7a2e1 100644
--- a/man/systemd.slice.xml
+++ b/man/systemd.slice.xml
@@ -53,22 +53,15 @@
<refsect1>
<title>Description</title>
- <para>A unit configuration file whose name ends in
- <literal>.slice</literal> encodes information about a slice which
- is a concept for hierarchically managing resources of a group of
- processes. This management is performed by creating a node in the
- Linux Control Group (cgroup) tree. Units that manage processes
- (primarily scope and service units) may be assigned to a specific
- slice. For each slice, certain resource limits may be set that
- apply to all processes of all units contained in that
- slice. Slices are organized hierarchically in a tree. The name of
- the slice encodes the location in the tree. The name consists of a
- dash-separated series of names, which describes the path to the
- slice from the root slice. The root slice is named,
- <filename>-.slice</filename>. Example:
- <filename>foo-bar.slice</filename> is a slice that is located
- within <filename>foo.slice</filename>, which in turn is located in
- the root slice <filename>-.slice</filename>.
+ <para>A unit configuration file whose name ends in <literal>.slice</literal> encodes information about a slice
+ unit. A slice unit is a concept for hierarchically managing resources of a group of processes. This management is
+ performed by creating a node in the Linux Control Group (cgroup) tree. Units that manage processes (primarily scope
+ and service units) may be assigned to a specific slice. For each slice, certain resource limits may be set that
+ apply to all processes of all units contained in that slice. Slices are organized hierarchically in a tree. The
+ name of the slice encodes the location in the tree. The name consists of a dash-separated series of names, which
+ describes the path to the slice from the root slice. The root slice is named <filename>-.slice</filename>. Example:
+ <filename>foo-bar.slice</filename> is a slice that is located within <filename>foo.slice</filename>, which in turn
+ is located in the root slice <filename>-.slice</filename>.
</para>
<para>Note that slice units cannot be templated, nor is possible to add multiple names to a slice unit by creating
diff --git a/man/systemd.special.xml b/man/systemd.special.xml
index 73e1e720e9..dc0b313b0c 100644
--- a/man/systemd.special.xml
+++ b/man/systemd.special.xml
@@ -48,8 +48,7 @@
</refnamediv>
<refsynopsisdiv><para>
- <!-- sort alphabetically, targets first -->
- <filename>basic.target</filename>,
+ <!-- sort alphabetically, targets first --><filename>basic.target</filename>,
<filename>bluetooth.target</filename>,
<filename>cryptsetup-pre.target</filename>,
<filename>cryptsetup.target</filename>,
@@ -107,15 +106,15 @@
<filename>time-sync.target</filename>,
<filename>timers.target</filename>,
<filename>umount.target</filename>,
- <!-- slices -->
- <filename>-.slice</filename>,
+ <!-- slices --><filename>-.slice</filename>,
<filename>system.slice</filename>,
<filename>user.slice</filename>,
<filename>machine.slice</filename>,
- <!-- the rest -->
+ <!-- the rest --><filename>-.mount</filename>,
<filename>dbus.service</filename>,
<filename>dbus.socket</filename>,
<filename>display-manager.service</filename>,
+ <filename>init.scope</filename>,
<filename>system-update-cleanup.service</filename>
</para></refsynopsisdiv>
@@ -132,6 +131,15 @@
<variablelist>
<varlistentry>
+ <term><filename>-.mount</filename></term>
+ <listitem>
+ <para>The root mount point, i.e. the mount unit for the <filename>/</filename> path. This unit is
+ unconditionally active, during the entire time the system is up, as this mount point is where the basic
+ userspace is running from.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><filename>basic.target</filename></term>
<listitem>
<para>A special target unit covering basic boot-up.</para>
@@ -327,6 +335,13 @@
</listitem>
</varlistentry>
<varlistentry>
+ <term><filename>init.scope</filename></term>
+ <listitem>
+ <para>This scope unit is where the system and service manager (PID 1) itself resides. It is active as long as
+ the system is running.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><filename>initrd-fs.target</filename></term>
<listitem>
<para><citerefentry><refentrytitle>systemd-fstab-generator</refentrytitle><manvolnum>3</manvolnum></citerefentry>
@@ -1009,17 +1024,17 @@ PartOf=graphical-session.target
<refsect1>
<title>Special Slice Units</title>
- <para>There are four <literal>.slice</literal> units which form
- the basis of the hierarchy for assignment of resources for
- services, users, and virtual machines or containers.</para>
+ <para>There are four <literal>.slice</literal> units which form the basis of the hierarchy for assignment of
+ resources for services, users, and virtual machines or containers. See
+ <citerefentry><refentrytitle>-.slice</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about slice
+ units.</para>
<variablelist>
<varlistentry>
<term><filename>-.slice</filename></term>
<listitem>
- <para>The root slice is the root of the hierarchy. It
- usually does not contain units directly, but may be used to
- set defaults for the whole tree.</para>
+ <para>The root slice is the root of the slice hierarchy. It usually does not contain units directly, but may
+ be used to set defaults for the whole tree.</para>
</listitem>
</varlistentry>
diff --git a/meson.build b/meson.build
index 3e85442a6f..d72fc6f148 100644
--- a/meson.build
+++ b/meson.build
@@ -443,6 +443,8 @@ foreach ident : [
#include <keyutils.h>'''],
['copy_file_range', '''#include <sys/syscall.h>
#include <unistd.h>'''],
+ ['bpf', '''#include <sys/syscall.h>
+ #include <unistd.h>'''],
['explicit_bzero' , '''#include <string.h>'''],
]
diff --git a/mkosi.build b/mkosi.build
index 12e88b909c..92eb55b130 100755
--- a/mkosi.build
+++ b/mkosi.build
@@ -28,7 +28,7 @@ export LC_CTYPE=C.UTF-8
[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR"
ninja -C "$BUILDDIR" all
-[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test
+[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
ninja -C "$BUILDDIR" install
mkdir -p "$DESTDIR"/etc
diff --git a/src/basic/bpf-program.c b/src/basic/bpf-program.c
new file mode 100644
index 0000000000..ce6f9e4409
--- /dev/null
+++ b/src/basic/bpf-program.c
@@ -0,0 +1,183 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "log.h"
+#include "missing.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+ p = new0(BPFProgram, 1);
+ if (!p)
+ return log_oom();
+
+ p->prog_type = prog_type;
+ p->kernel_fd = -1;
+
+ *ret = p;
+ p = NULL;
+ return 0;
+}
+
+BPFProgram *bpf_program_unref(BPFProgram *p) {
+ if (!p)
+ return NULL;
+
+ safe_close(p->kernel_fd);
+ free(p->instructions);
+
+ return mfree(p);
+}
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+ assert(p);
+
+ if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+ return -ENOMEM;
+
+ memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+ p->n_instructions += count;
+
+ return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0)
+ return -EBUSY;
+
+ attr = (union bpf_attr) {
+ .prog_type = p->prog_type,
+ .insns = PTR_TO_UINT64(p->instructions),
+ .insn_cnt = p->n_instructions,
+ .license = PTR_TO_UINT64("GPL"),
+ .log_buf = PTR_TO_UINT64(log_buf),
+ .log_level = !!log_buf,
+ .log_size = log_size,
+ };
+
+ p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+ _cleanup_close_ int fd = -1;
+ union bpf_attr attr;
+
+ assert(p);
+ assert(type >= 0);
+ assert(path);
+
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ attr = (union bpf_attr) {
+ .attach_type = type,
+ .target_fd = fd,
+ .attach_bpf_fd = p->kernel_fd,
+ .attach_flags = flags,
+ };
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_cgroup_detach(int type, const char *path) {
+ _cleanup_close_ int fd = -1;
+ union bpf_attr attr;
+
+ assert(path);
+
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ attr = (union bpf_attr) {
+ .attach_type = type,
+ .target_fd = fd,
+ };
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = key_size,
+ .value_size = value_size,
+ .max_entries = max_entries,
+ .map_flags = flags,
+ };
+ int fd;
+
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = PTR_TO_UINT64(key),
+ .value = PTR_TO_UINT64(value),
+ };
+
+ if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = PTR_TO_UINT64(key),
+ .value = PTR_TO_UINT64(value),
+ };
+
+ if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/bpf-program.h b/src/basic/bpf-program.h
new file mode 100644
index 0000000000..35a41ffc44
--- /dev/null
+++ b/src/basic/bpf-program.h
@@ -0,0 +1,55 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+
+ [Except for the stuff copy/pasted from the kernel sources, see below]
+***/
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+ int kernel_fd;
+ uint32_t prog_type;
+
+ size_t n_instructions;
+ size_t allocated;
+ struct bpf_insn *instructions;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(int type, const char *path);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 7cd92005e3..f3f6a21576 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -103,9 +103,12 @@ int cg_read_pid(FILE *f, pid_t *_pid) {
return 1;
}
-int cg_read_event(const char *controller, const char *path, const char *event,
- char **val)
-{
+int cg_read_event(
+ const char *controller,
+ const char *path,
+ const char *event,
+ char **val) {
+
_cleanup_free_ char *events = NULL, *content = NULL;
char *p, *line;
int r;
diff --git a/src/basic/in-addr-util.c b/src/basic/in-addr-util.c
index d52fdad3ac..e27faba75f 100644
--- a/src/basic/in-addr-util.c
+++ b/src/basic/in-addr-util.c
@@ -308,22 +308,22 @@ int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
return 0;
}
-int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret) {
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
int r;
assert(s);
r = in_addr_from_string(AF_INET, s, ret);
if (r >= 0) {
- if (family)
- *family = AF_INET;
+ if (ret_family)
+ *ret_family = AF_INET;
return 0;
}
r = in_addr_from_string(AF_INET6, s, ret);
if (r >= 0) {
- if (family)
- *family = AF_INET6;
+ if (ret_family)
+ *ret_family = AF_INET6;
return 0;
}
@@ -371,13 +371,13 @@ int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_u
return r;
}
-unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr) {
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
assert(addr);
return 32 - u32ctz(be32toh(addr->s_addr));
}
-struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
assert(addr);
assert(prefixlen <= 32);
@@ -390,7 +390,7 @@ struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char
return addr;
}
-int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
uint8_t msb_octet = *(uint8_t*) addr;
/* addr may not be aligned, so make sure we only access it byte-wise */
@@ -414,18 +414,18 @@ int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixl
return 0;
}
-int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
unsigned char prefixlen;
int r;
assert(addr);
assert(mask);
- r = in_addr_default_prefixlen(addr, &prefixlen);
+ r = in4_addr_default_prefixlen(addr, &prefixlen);
if (r < 0)
return r;
- in_addr_prefixlen_to_netmask(mask, prefixlen);
+ in4_addr_prefixlen_to_netmask(mask, prefixlen);
return 0;
}
@@ -435,7 +435,7 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
if (family == AF_INET) {
struct in_addr mask;
- if (!in_addr_prefixlen_to_netmask(&mask, prefixlen))
+ if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
return -EINVAL;
addr->in.s_addr &= mask.s_addr;
@@ -465,10 +465,57 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
return -EAFNOSUPPORT;
}
-int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen) {
+int in_addr_prefix_covers(int family,
+ const union in_addr_union *prefix,
+ unsigned char prefixlen,
+ const union in_addr_union *address) {
+
+ union in_addr_union masked_prefix, masked_address;
+ int r;
+
+ assert(prefix);
+ assert(address);
+
+ masked_prefix = *prefix;
+ r = in_addr_mask(family, &masked_prefix, prefixlen);
+ if (r < 0)
+ return r;
+
+ masked_address = *address;
+ r = in_addr_mask(family, &masked_address, prefixlen);
+ if (r < 0)
+ return r;
+
+ return in_addr_equal(family, &masked_prefix, &masked_address);
+}
+
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
+ uint8_t u;
+ int r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ r = safe_atou8(p, &u);
+ if (r < 0)
+ return r;
+
+ if (u > FAMILY_ADDRESS_SIZE(family) * 8)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int in_addr_prefix_from_string(
+ const char *p,
+ int family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
union in_addr_union buffer;
const char *e, *l;
- uint8_t k;
+ unsigned char k;
int r;
assert(p);
@@ -486,23 +533,58 @@ int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *r
if (r < 0)
return r;
- k = FAMILY_ADDRESS_SIZE(family) * 8;
-
if (e) {
- uint8_t n;
-
- r = safe_atou8(e + 1, &n);
+ r = in_addr_parse_prefixlen(family, e+1, &k);
if (r < 0)
return r;
+ } else
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
- if (n > k)
- return -ERANGE;
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
- k = n;
- }
+ return 0;
+}
+
+int in_addr_prefix_from_string_auto(
+ const char *p,
+ int *ret_family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int family, r;
+
+ assert(p);
+
+ e = strchr(p, '/');
+ if (e)
+ l = strndupa(p, e - p);
+ else
+ l = p;
- *ret_prefix = buffer;
- *ret_prefixlen = k;
+ r = in_addr_from_string_auto(l, &family, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
return 0;
+
}
diff --git a/src/basic/in-addr-util.h b/src/basic/in-addr-util.h
index 14e27246b5..d129bf5585 100644
--- a/src/basic/in-addr-util.h
+++ b/src/basic/in-addr-util.h
@@ -53,14 +53,17 @@ int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
-int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret);
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
-unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr);
-struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
-int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
-int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
-int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen);
+int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
+int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
static inline size_t FAMILY_ADDRESS_SIZE(int family) {
assert(family == AF_INET || family == AF_INET6);
diff --git a/src/basic/io-util.h b/src/basic/io-util.h
index 4684ed3bfc..d9b69adde9 100644
--- a/src/basic/io-util.h
+++ b/src/basic/io-util.h
@@ -40,14 +40,6 @@ int fd_wait_for_event(int fd, int event, usec_t timeout);
ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
-#define IOVEC_SET_STRING(i, s) \
- do { \
- struct iovec *_i = &(i); \
- char *_s = (char *)(s); \
- _i->iov_base = _s; \
- _i->iov_len = strlen(_s); \
- } while (false)
-
static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, unsigned n) {
unsigned j;
size_t r = 0;
@@ -93,3 +85,8 @@ static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
return FILE_SIZE_VALID(l);
}
+
+#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
+#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
+#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
+#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
diff --git a/src/basic/journal-importer.c b/src/basic/journal-importer.c
index 7d72effdea..38ac8deaf3 100644
--- a/src/basic/journal-importer.c
+++ b/src/basic/journal-importer.c
@@ -20,8 +20,9 @@
#include <unistd.h>
#include "alloc-util.h"
-#include "journal-importer.h"
#include "fd-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
#include "parse-util.h"
#include "string-util.h"
#include "unaligned.h"
@@ -38,7 +39,7 @@ static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
return log_oom();
- iovw->iovec[iovw->count++] = (struct iovec) {data, len};
+ iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
return 0;
}
diff --git a/src/basic/log.c b/src/basic/log.c
index 421ae52dc5..168c6c37ce 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -351,22 +351,22 @@ static int write_to_console(
if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
xsprintf(prefix, "<%i>", level);
- IOVEC_SET_STRING(iovec[n++], prefix);
+ iovec[n++] = IOVEC_MAKE_STRING(prefix);
}
highlight = LOG_PRI(level) <= LOG_ERR && show_color;
if (show_location) {
snprintf(location, sizeof(location), "(%s:%i) ", file, line);
- IOVEC_SET_STRING(iovec[n++], location);
+ iovec[n++] = IOVEC_MAKE_STRING(location);
}
if (highlight)
- IOVEC_SET_STRING(iovec[n++], ANSI_HIGHLIGHT_RED);
- IOVEC_SET_STRING(iovec[n++], buffer);
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
+ iovec[n++] = IOVEC_MAKE_STRING(buffer);
if (highlight)
- IOVEC_SET_STRING(iovec[n++], ANSI_NORMAL);
- IOVEC_SET_STRING(iovec[n++], "\n");
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
if (writev(console_fd, iovec, n) < 0) {
@@ -425,11 +425,11 @@ static int write_to_syslog(
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
- IOVEC_SET_STRING(iovec[0], header_priority);
- IOVEC_SET_STRING(iovec[1], header_time);
- IOVEC_SET_STRING(iovec[2], program_invocation_short_name);
- IOVEC_SET_STRING(iovec[3], header_pid);
- IOVEC_SET_STRING(iovec[4], buffer);
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(header_time);
+ iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[3] = IOVEC_MAKE_STRING(header_pid);
+ iovec[4] = IOVEC_MAKE_STRING(buffer);
/* When using syslog via SOCK_STREAM separate the messages by NUL chars */
if (syslog_is_stream)
@@ -470,11 +470,11 @@ static int write_to_kmsg(
xsprintf(header_priority, "<%i>", level);
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
- IOVEC_SET_STRING(iovec[0], header_priority);
- IOVEC_SET_STRING(iovec[1], program_invocation_short_name);
- IOVEC_SET_STRING(iovec[2], header_pid);
- IOVEC_SET_STRING(iovec[3], buffer);
- IOVEC_SET_STRING(iovec[4], "\n");
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[2] = IOVEC_MAKE_STRING(header_pid);
+ iovec[3] = IOVEC_MAKE_STRING(buffer);
+ iovec[4] = IOVEC_MAKE_STRING("\n");
if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
return -errno;
@@ -547,10 +547,10 @@ static int write_to_journal(
log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
- IOVEC_SET_STRING(iovec[0], header);
- IOVEC_SET_STRING(iovec[1], "MESSAGE=");
- IOVEC_SET_STRING(iovec[2], buffer);
- IOVEC_SET_STRING(iovec[3], "\n");
+ iovec[0] = IOVEC_MAKE_STRING(header);
+ iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
+ iovec[2] = IOVEC_MAKE_STRING(buffer);
+ iovec[3] = IOVEC_MAKE_STRING("\n");
mh.msg_iov = iovec;
mh.msg_iovlen = ELEMENTSOF(iovec);
@@ -872,7 +872,7 @@ int log_format_iovec(
* the next format string */
VA_FORMAT_ADVANCE(format, ap);
- IOVEC_SET_STRING(iovec[(*n)++], m);
+ iovec[(*n)++] = IOVEC_MAKE_STRING(m);
if (newline_separator) {
iovec[*n].iov_base = (char*) &nl;
@@ -893,9 +893,9 @@ int log_struct_internal(
const char *func,
const char *format, ...) {
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
char buf[LINE_MAX];
bool found = false;
- LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
PROTECT_ERRNO;
va_list ap;
@@ -926,7 +926,7 @@ int log_struct_internal(
/* If the journal is available do structured logging */
log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
- IOVEC_SET_STRING(iovec[n++], header);
+ iovec[n++] = IOVEC_MAKE_STRING(header);
va_start(ap, format);
r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
@@ -975,6 +975,73 @@ int log_struct_internal(
return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
}
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec input_iovec[],
+ size_t n_input_iovec) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+ size_t i;
+ char *m;
+
+ if (error < 0)
+ error = -error;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -error;
+
+ if (log_target == LOG_TARGET_NULL)
+ return -error;
+
+ if ((level & LOG_FACMASK) == 0)
+ level = log_facility | LOG_PRI(level);
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL) &&
+ journal_fd >= 0) {
+
+ struct iovec iovec[1 + n_input_iovec*2];
+ char header[LINE_MAX];
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ .msg_iovlen = 1 + n_input_iovec*2,
+ };
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[0] = IOVEC_MAKE_STRING(header);
+
+ for (i = 0; i < n_input_iovec; i++) {
+ iovec[1+i*2] = input_iovec[i];
+ iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
+ return -error;
+ }
+
+ for (i = 0; i < n_input_iovec; i++) {
+ if (input_iovec[i].iov_len < strlen("MESSAGE="))
+ continue;
+
+ if (memcmp(input_iovec[i].iov_base, "MESSAGE=", strlen("MESSAGE=")) == 0)
+ break;
+ }
+
+ if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
+ return -error;
+
+ m = strndupa(input_iovec[i].iov_base + strlen("MESSAGE="),
+ input_iovec[i].iov_len - strlen("MESSAGE="));
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
+}
+
int log_set_target_from_string(const char *e) {
LogTarget t;
diff --git a/src/basic/log.h b/src/basic/log.h
index 186747ff8e..e3fd3203d0 100644
--- a/src/basic/log.h
+++ b/src/basic/log.h
@@ -187,6 +187,15 @@ int log_format_iovec(
const char *format,
va_list ap) _printf_(6, 0);
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec input_iovec[],
+ size_t n_input_iovec);
+
/* This modifies the buffer passed! */
int log_dump_internal(
int level,
@@ -270,6 +279,11 @@ void log_assert_failed_return_realm(
error, __FILE__, __LINE__, __func__, __VA_ARGS__)
#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
+#define log_struct_iovec_errno(level, error, iovec, n_iovec) \
+ log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, __FILE__, __LINE__, __func__, iovec, n_iovec)
+#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
+
/* This modifies the buffer passed! */
#define log_dump(level, buffer) \
log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
diff --git a/src/basic/meson.build b/src/basic/meson.build
index 67cc27274d..994336fde2 100644
--- a/src/basic/meson.build
+++ b/src/basic/meson.build
@@ -1,4 +1,6 @@
basic_sources_plain = files('''
+ MurmurHash2.c
+ MurmurHash2.h
af-list.c
af-list.h
alloc-util.c
@@ -16,6 +18,8 @@ basic_sources_plain = files('''
bitmap.c
bitmap.h
blkid-util.h
+ bpf-program.c
+ bpf-program.h
btrfs-ctree.h
btrfs-util.c
btrfs-util.h
@@ -24,10 +28,10 @@ basic_sources_plain = files('''
bus-label.h
calendarspec.c
calendarspec.h
- capability-util.c
- capability-util.h
cap-list.c
cap-list.h
+ capability-util.c
+ capability-util.h
cgroup-util.c
cgroup-util.h
chattr-util.c
@@ -61,10 +65,10 @@ basic_sources_plain = files('''
extract-word.h
fd-util.c
fd-util.h
- fileio.c
- fileio.h
fileio-label.c
fileio-label.h
+ fileio.c
+ fileio.h
format-util.h
fs-util.c
fs-util.h
@@ -82,9 +86,9 @@ basic_sources_plain = files('''
hostname-util.h
in-addr-util.c
in-addr-util.h
- ioprio.h
io-util.c
io-util.h
+ ioprio.h
journal-importer.c
journal-importer.h
khash.c
@@ -106,13 +110,11 @@ basic_sources_plain = files('''
mempool.c
mempool.h
missing_syscall.h
+ mkdir-label.c
mkdir.c
mkdir.h
- mkdir-label.c
mount-util.c
mount-util.h
- MurmurHash2.c
- MurmurHash2.h
nss-util.h
ordered-set.c
ordered-set.h
@@ -138,9 +140,9 @@ basic_sources_plain = files('''
rlimit-util.h
rm-rf.c
rm-rf.h
- securebits.h
securebits-util.c
securebits-util.h
+ securebits.h
selinux-util.c
selinux-util.h
set.h
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
index 898116c7b3..17cde5e74f 100644
--- a/src/basic/missing_syscall.h
+++ b/src/basic/missing_syscall.h
@@ -22,6 +22,8 @@
/* Missing glibc definitions to access certain kernel APIs */
+#include <sys/types.h>
+
#if !HAVE_DECL_PIVOT_ROOT
static inline int pivot_root(const char *new_root, const char *put_old) {
return syscall(SYS_pivot_root, new_root, put_old);
@@ -316,3 +318,33 @@ static inline ssize_t copy_file_range(int fd_in, loff_t *off_in,
# endif
}
#endif
+
+#if !HAVE_DECL_BPF
+# ifndef __NR_bpf
+# if defined __i386__
+# define __NR_bpf 357
+# elif defined __x86_64__
+# define __NR_bpf 321
+# elif defined __aarch64__
+# define __NR_bpf 280
+# elif defined __sparc__
+# define __NR_bpf 349
+# elif defined __s390__
+# define __NR_bpf 351
+# else
+# warning "__NR_bpf not defined for your architecture"
+# endif
+# endif
+
+union bpf_attr;
+
+static inline int bpf(int cmd, union bpf_attr *attr, size_t size) {
+#ifdef __NR_bpf
+ return (int) syscall(__NR_bpf, cmd, attr, size);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+#endif
diff --git a/src/basic/rlimit-util.c b/src/basic/rlimit-util.c
index ca834df621..5c41429f01 100644
--- a/src/basic/rlimit-util.c
+++ b/src/basic/rlimit-util.c
@@ -42,7 +42,8 @@ int setrlimit_closest(int resource, const struct rlimit *rlim) {
/* So we failed to set the desired setrlimit, then let's try
* to get as close as we can */
- assert_se(getrlimit(resource, &highest) == 0);
+ if (getrlimit(resource, &highest) < 0)
+ return -errno;
fixed.rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max);
fixed.rlim_max = MIN(rlim->rlim_max, highest.rlim_max);
diff --git a/src/basic/socket-label.c b/src/basic/socket-label.c
index 6d1dc83874..6e7cdaac63 100644
--- a/src/basic/socket-label.c
+++ b/src/basic/socket-label.c
@@ -83,7 +83,7 @@ int socket_address_listen(
return -errno;
}
- if (socket_address_family(a) == AF_INET || socket_address_family(a) == AF_INET6) {
+ if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
if (bind_to_device)
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
return -errno;
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
new file mode 100644
index 0000000000..909c1c8253
--- /dev/null
+++ b/src/core/bpf-firewall.c
@@ -0,0 +1,680 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/libbpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "ip-address-access.h"
+#include "unit.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+enum {
+ ACCESS_ALLOWED = 1,
+ ACCESS_DENIED = 2,
+};
+
+/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
+
+static int add_lookup_instructions(
+ BPFProgram *p,
+ int map_fd,
+ int protocol,
+ bool is_ingress,
+ int verdict) {
+
+ int r, addr_offset, addr_size;
+
+ assert(p);
+ assert(map_fd >= 0);
+
+ switch (protocol) {
+
+ case ETH_P_IP:
+ addr_size = sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct iphdr, saddr) :
+ offsetof(struct iphdr, daddr);
+ break;
+
+ case ETH_P_IPV6:
+ addr_size = 4 * sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct ip6_hdr, ip6_src.s6_addr) :
+ offsetof(struct ip6_hdr, ip6_dst.s6_addr);
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ do {
+ /* Compare IPv4 with one word instruction (32bit) */
+ struct bpf_insn insn[] = {
+ /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
+
+ /*
+ * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
+ *
+ * R1: Pointer to the skb
+ * R2: Data offset
+ * R3: Destination buffer on the stack (r10 - 4)
+ * R4: Number of bytes to read (4)
+ */
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV32_IMM(BPF_REG_2, addr_offset),
+
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
+
+ BPF_MOV32_IMM(BPF_REG_4, addr_size),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+
+ /*
+ * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
+ * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
+ * has to be set to the maximum possible value.
+ *
+ * On success, the looked up value is stored in R0. For this application, the actual
+ * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
+ * matching value.
+ */
+
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
+
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+
+ } while (false);
+
+ return 0;
+}
+
+static int bpf_firewall_compile_bpf(
+ Unit *u,
+ bool is_ingress,
+ BPFProgram **ret) {
+
+ struct bpf_insn pre_insn[] = {
+ /*
+ * When the eBPF program is entered, R1 contains the address of the skb.
+ * However, R1-R5 are scratch registers that are not preserved when calling
+ * into kernel functions, so we need to save anything that's supposed to
+ * stay around to R6-R9. Save the skb to R6.
+ */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /*
+ * Although we cannot access the skb data directly from eBPF programs used in this
+ * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
+ * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
+ * for later use.
+ */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
+
+ /*
+ * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
+ * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
+ */
+ BPF_MOV32_IMM(BPF_REG_8, 0),
+ };
+
+ /*
+ * The access checkers compiled for the configured allowance and denial lists
+ * write to R8 at runtime. The following code prepares for an early exit that
+ * skip the accounting if the packet is denied.
+ *
+ * R0 = 1
+ * if (R8 == ACCESS_DENIED)
+ * R0 = 0
+ *
+ * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
+ * is allowed to pass.
+ */
+ struct bpf_insn post_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ int accounting_map_fd, r;
+ bool access_enabled;
+
+ assert(u);
+ assert(ret);
+
+ accounting_map_fd = is_ingress ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+
+ access_enabled =
+ u->ipv4_allow_map_fd >= 0 ||
+ u->ipv6_allow_map_fd >= 0 ||
+ u->ipv4_deny_map_fd >= 0 ||
+ u->ipv6_deny_map_fd >= 0;
+
+ if (accounting_map_fd < 0 && !access_enabled) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+ if (r < 0)
+ return r;
+
+ r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return r;
+
+ if (access_enabled) {
+ /*
+ * The simple rule this function translates into eBPF instructions is:
+ *
+ * - Access will be granted when an address matches an entry in @list_allow
+ * - Otherwise, access will be denied when an address matches an entry in @list_deny
+ * - Otherwise, access will be granted
+ */
+
+ if (u->ipv4_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv4_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return r;
+
+ if (accounting_map_fd >= 0) {
+ struct bpf_insn insn[] = {
+ /*
+ * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
+ * The jump label will be fixed up later.
+ */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Allow the packet to pass */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ }
+
+ do {
+ /*
+ * Exit from the eBPF program, R0 contains the verdict.
+ * 0 means the packet is denied, 1 means the packet may pass.
+ */
+ struct bpf_insn insn[] = {
+ BPF_EXIT_INSN()
+ };
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ } while (false);
+
+ *ret = p;
+ p = NULL;
+
+ return 0;
+}
+
+static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
+ IPAddressAccessItem *a;
+
+ assert(n_ipv4);
+ assert(n_ipv6);
+
+ LIST_FOREACH(items, a, list) {
+ switch (a->family) {
+
+ case AF_INET:
+ (*n_ipv4)++;
+ break;
+
+ case AF_INET6:
+ (*n_ipv6)++;
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_add_access_items(
+ IPAddressAccessItem *list,
+ int ipv4_map_fd,
+ int ipv6_map_fd,
+ int verdict) {
+
+ struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
+ uint64_t value = verdict;
+ IPAddressAccessItem *a;
+ int r;
+
+ key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
+ key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
+
+ LIST_FOREACH(items, a, list) {
+ switch (a->family) {
+
+ case AF_INET:
+ key_ipv4->prefixlen = a->prefixlen;
+ memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case AF_INET6:
+ key_ipv6->prefixlen = a->prefixlen;
+ memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_prepare_access_maps(
+ Unit *u,
+ int verdict,
+ int *ret_ipv4_map_fd,
+ int *ret_ipv6_map_fd) {
+
+ _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
+ size_t n_ipv4 = 0, n_ipv6 = 0;
+ Unit *p;
+ int r;
+
+ assert(ret_ipv4_map_fd);
+ assert(ret_ipv6_map_fd);
+
+ for (p = u; p; p = UNIT_DEREF(p->slice)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
+ }
+
+ if (n_ipv4 > 0) {
+ ipv4_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
+ sizeof(uint64_t),
+ n_ipv4,
+ BPF_F_NO_PREALLOC);
+ if (ipv4_map_fd < 0)
+ return ipv4_map_fd;
+ }
+
+ if (n_ipv6 > 0) {
+ ipv6_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
+ sizeof(uint64_t),
+ n_ipv6,
+ BPF_F_NO_PREALLOC);
+ if (ipv6_map_fd < 0)
+ return ipv6_map_fd;
+ }
+
+ for (p = u; p; p = UNIT_DEREF(p->slice)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
+ ipv4_map_fd, ipv6_map_fd, verdict);
+ if (r < 0)
+ return r;
+ }
+
+ *ret_ipv4_map_fd = ipv4_map_fd;
+ *ret_ipv6_map_fd = ipv6_map_fd;
+
+ ipv4_map_fd = ipv6_map_fd = -1;
+ return 0;
+}
+
+static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
+ int r;
+
+ assert(fd_ingress);
+ assert(fd_egress);
+
+ if (enabled) {
+ if (*fd_ingress < 0) {
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_ingress = r;
+ }
+
+ if (*fd_egress < 0) {
+
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_egress = r;
+ }
+ } else {
+ *fd_ingress = safe_close(*fd_ingress);
+ *fd_egress = safe_close(*fd_egress);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_compile(Unit *u) {
+ CGroupContext *cc;
+ int r;
+
+ assert(u);
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_debug("BPF firewalling not supported on this systemd, proceeding without.");
+ return -EOPNOTSUPP;
+ }
+
+ /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
+ * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
+ * configuration, but we don't flush out the accounting unnecessarily */
+
+ u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+ u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+
+ u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+ u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+
+ u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+ u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
+ if (r < 0)
+ return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
+ if (r < 0)
+ return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
+
+ r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+ if (r < 0)
+ return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
+ if (r < 0)
+ return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
+ if (r < 0)
+ return log_error_errno(r, "Compilation for egress BPF program failed: %m");
+
+ return 0;
+}
+
+int bpf_firewall_install(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ CGroupContext *cc;
+ int r;
+
+ assert(u);
+
+ if (!u->cgroup_path)
+ return -EINVAL;
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_debug("BPF firewalling not supported on this systemd, proceeding without.");
+ return -EOPNOTSUPP;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+ if (u->ip_bpf_egress) {
+ r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
+
+ r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+ if (r < 0)
+ return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
+ } else {
+ r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
+ if (r < 0)
+ return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+ "Detaching egress BPF program from cgroup failed: %m");
+ }
+
+ if (u->ip_bpf_ingress) {
+ r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
+
+ r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+ if (r < 0)
+ return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
+ } else {
+ r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
+ if (r < 0)
+ return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+ "Detaching ingress BPF program from cgroup failed: %m");
+ }
+
+ return 0;
+}
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
+ uint64_t key, packets;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ if (ret_packets) {
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_lookup_element(map_fd, &key, &packets);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_bytes) {
+ key = MAP_KEY_BYTES;
+ r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_packets)
+ *ret_packets = packets;
+
+ return 0;
+}
+
+int bpf_firewall_reset_accounting(int map_fd) {
+ uint64_t key, value = 0;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_update_element(map_fd, &key, &value);
+ if (r < 0)
+ return r;
+
+ key = MAP_KEY_BYTES;
+ return bpf_map_update_element(map_fd, &key, &value);
+}
+
+
+int bpf_firewall_supported(void) {
+ static int supported = -1;
+ int fd, r;
+
+ /* Checks whether BPF firewalling is supported. For this, we check three things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
+ *
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0)
+ return supported = false;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0)
+ return supported = false;
+
+ fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
+ sizeof(uint64_t),
+ 1,
+ BPF_F_NO_PREALLOC);
+ if (fd < 0) {
+ log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
+ return supported = false;
+ }
+
+ safe_close(fd);
+
+ return supported = true;
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
new file mode 100644
index 0000000000..870e314e0e
--- /dev/null
+++ b/src/core/bpf-firewall.h
@@ -0,0 +1,32 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+int bpf_firewall_supported(void);
+
+int bpf_firewall_compile(Unit *u);
+int bpf_firewall_install(Unit *u);
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
+int bpf_firewall_reset_accounting(int map_fd);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c806d6b7cb..9a0d374aa8 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -21,6 +21,7 @@
#include <fnmatch.h>
#include "alloc-util.h"
+#include "bpf-firewall.h"
#include "cgroup-util.h"
#include "cgroup.h"
#include "fd-util.h"
@@ -30,9 +31,9 @@
#include "path-util.h"
#include "process-util.h"
#include "special.h"
+#include "stdio-util.h"
#include "string-table.h"
#include "string-util.h"
-#include "stdio-util.h"
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
@@ -141,6 +142,9 @@ void cgroup_context_done(CGroupContext *c) {
while (c->device_allow)
cgroup_context_free_device_allow(c, c->device_allow);
+
+ c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
+ c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
}
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@@ -149,6 +153,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
+ IPAddressAccessItem *iaai;
char u[FORMAT_TIMESPAN_MAX];
assert(c);
@@ -162,6 +167,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
+ "%sIPAccounting=%s\n"
"%sCPUWeight=%" PRIu64 "\n"
"%sStartupCPUWeight=%" PRIu64 "\n"
"%sCPUShares=%" PRIu64 "\n"
@@ -184,6 +190,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
+ prefix, yes_no(c->ip_accounting),
prefix, c->cpu_weight,
prefix, c->startup_cpu_weight,
prefix, c->cpu_shares,
@@ -253,6 +260,20 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
b->path,
format_bytes(buf, sizeof(buf), b->wbps));
}
+
+ LIST_FOREACH(items, iaai, c->ip_address_allow) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+ fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+ }
+
+ LIST_FOREACH(items, iaai, c->ip_address_deny) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+ fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+ }
}
static int lookup_block_device(const char *p, dev_t *dev) {
@@ -645,7 +666,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
"Failed to set %s: %m", file);
}
-static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
+static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
+ int r;
+
+ if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
+ * not recursive we don't ever touch the bpf on them */
+ return;
+
+ r = bpf_firewall_compile(u);
+ if (r < 0)
+ return;
+
+ (void) bpf_firewall_install(u);
+ return;
+}
+
+static void cgroup_context_apply(
+ Unit *u,
+ CGroupMask apply_mask,
+ bool apply_bpf,
+ ManagerState state) {
+
const char *path;
CGroupContext *c;
bool is_root;
@@ -659,7 +700,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
assert(c);
assert(path);
- if (mask == 0)
+ /* Nothing to do? Exit early! */
+ if (apply_mask == 0 && !apply_bpf)
return;
/* Some cgroup attributes are not supported on the root cgroup,
@@ -673,9 +715,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* cgroup trees (assuming we are running in a container then),
* and missing cgroups, i.e. EROFS and ENOENT. */
- if ((mask & CGROUP_MASK_CPU) && !is_root) {
- bool has_weight = cgroup_context_has_cpu_weight(c);
- bool has_shares = cgroup_context_has_cpu_shares(c);
+ if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
+ bool has_weight, has_shares;
+
+ has_weight = cgroup_context_has_cpu_weight(c);
+ has_shares = cgroup_context_has_cpu_shares(c);
if (cg_all_unified() > 0) {
uint64_t weight;
@@ -712,7 +756,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if (mask & CGROUP_MASK_IO) {
+ if (apply_mask & CGROUP_MASK_IO) {
bool has_io = cgroup_context_has_io_config(c);
bool has_blockio = cgroup_context_has_blockio_config(c);
@@ -789,7 +833,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if (mask & CGROUP_MASK_BLKIO) {
+ if (apply_mask & CGROUP_MASK_BLKIO) {
bool has_io = cgroup_context_has_io_config(c);
bool has_blockio = cgroup_context_has_blockio_config(c);
@@ -856,7 +900,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
+ if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
if (cg_all_unified() > 0) {
uint64_t max, swap_max = CGROUP_LIMIT_MAX;
@@ -896,7 +940,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
+ if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
CGroupDeviceAllow *a;
/* Changing the devices list of a populated cgroup
@@ -960,7 +1004,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
- if ((mask & CGROUP_MASK_PIDS) && !is_root) {
+ if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {
if (c->tasks_max != CGROUP_LIMIT_MAX) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@@ -974,6 +1018,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set pids.max: %m");
}
+
+ if (apply_bpf)
+ cgroup_apply_firewall(u, c);
}
CGroupMask cgroup_context_get_mask(CGroupContext *c) {
@@ -1120,6 +1167,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
return mask;
}
+bool unit_get_needs_bpf(Unit *u) {
+ CGroupContext *c;
+ Unit *p;
+ assert(u);
+
+ /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
+ * moment. */
+ if (u->type == UNIT_SLICE)
+ return false;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ if (c->ip_accounting ||
+ c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+
+ /* If any parent slice has an IP access list defined, it applies too */
+ for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+ c = unit_get_cgroup_context(p);
+ if (!c)
+ return false;
+
+ if (c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+ }
+
+ return false;
+}
+
/* Recurse from a unit up through its containing slices, propagating
* mask bits upward. A unit is also member of itself. */
void unit_update_cgroup_members_masks(Unit *u) {
@@ -1295,7 +1375,8 @@ int unit_watch_cgroup(Unit *u) {
static int unit_create_cgroup(
Unit *u,
CGroupMask target_mask,
- CGroupMask enable_mask) {
+ CGroupMask enable_mask,
+ bool needs_bpf) {
CGroupContext *c;
int r;
@@ -1337,6 +1418,7 @@ static int unit_create_cgroup(
u->cgroup_realized = true;
u->cgroup_realized_mask = target_mask;
u->cgroup_enabled_mask = enable_mask;
+ u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
if (u->type != UNIT_SLICE && !c->delegate) {
@@ -1386,10 +1468,19 @@ static void cgroup_xattr_apply(Unit *u) {
log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
}
-static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
+static bool unit_has_mask_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask,
+ bool needs_bpf) {
+
assert(u);
- return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
+ return u->cgroup_realized &&
+ u->cgroup_realized_mask == target_mask &&
+ u->cgroup_enabled_mask == enable_mask &&
+ ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
+ (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
}
/* Check if necessary controllers and attributes for a unit are in place.
@@ -1400,6 +1491,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
* Returns 0 on success and < 0 on failure. */
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
CGroupMask target_mask, enable_mask;
+ bool needs_bpf, apply_bpf;
int r;
assert(u);
@@ -1411,10 +1503,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
target_mask = unit_get_target_mask(u);
enable_mask = unit_get_enable_mask(u);
+ needs_bpf = unit_get_needs_bpf(u);
- if (unit_has_mask_realized(u, target_mask, enable_mask))
+ if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
return 0;
+ /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
+ * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
+ * this will trickle down properly to cgroupfs. */
+ apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
+
/* First, realize parents */
if (UNIT_ISSET(u->slice)) {
r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@@ -1423,18 +1521,19 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
}
/* And then do the real work */
- r = unit_create_cgroup(u, target_mask, enable_mask);
+ r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
if (r < 0)
return r;
/* Finally, apply the necessary attributes. */
- cgroup_context_apply(u, target_mask, state);
+ cgroup_context_apply(u, target_mask, apply_bpf, state);
cgroup_xattr_apply(u);
return 0;
}
static void unit_add_to_cgroup_queue(Unit *u) {
+ assert(u);
if (u->in_cgroup_queue)
return;
@@ -1492,7 +1591,10 @@ static void unit_queue_siblings(Unit *u) {
/* If the unit doesn't need any new controllers
* and has current ones realized, it doesn't need
* any changes. */
- if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
+ if (unit_has_mask_realized(m,
+ unit_get_target_mask(m),
+ unit_get_enable_mask(m),
+ unit_get_needs_bpf(m)))
continue;
unit_add_to_cgroup_queue(m);
@@ -1756,6 +1858,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
+ const char *scope_path;
CGroupController c;
int r, all_unified;
char *e;
@@ -1813,73 +1916,66 @@ int manager_setup_cgroup(Manager *m) {
log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
}
- if (!m->test_run_flags) {
- const char *scope_path;
-
- /* 3. Install agent */
- if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+ /* 3. Install agent */
+ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
- /* In the unified hierarchy we can get
- * cgroup empty notifications via inotify. */
+ /* In the unified hierarchy we can get
+ * cgroup empty notifications via inotify. */
- m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
- safe_close(m->cgroup_inotify_fd);
+ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+ safe_close(m->cgroup_inotify_fd);
- m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
- if (m->cgroup_inotify_fd < 0)
- return log_error_errno(errno, "Failed to create control group inotify object: %m");
+ m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (m->cgroup_inotify_fd < 0)
+ return log_error_errno(errno, "Failed to create control group inotify object: %m");
- r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
- if (r < 0)
- return log_error_errno(r, "Failed to watch control group inotify object: %m");
-
- /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
- * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
- r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
- if (r < 0)
- return log_error_errno(r, "Failed to set priority of inotify event source: %m");
+ r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch control group inotify object: %m");
- (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
+ /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
+ * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
+ r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of inotify event source: %m");
- } else if (MANAGER_IS_SYSTEM(m)) {
+ (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
- /* On the legacy hierarchy we only get
- * notifications via cgroup agents. (Which
- * isn't really reliable, since it does not
- * generate events when control groups with
- * children run empty. */
+ } else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) {
- r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
- if (r < 0)
- log_warning_errno(r, "Failed to install release agent, ignoring: %m");
- else if (r > 0)
- log_debug("Installed release agent.");
- else if (r == 0)
- log_debug("Release agent already installed.");
- }
+ /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
+ * since it does not generate events when control groups with children run empty. */
- /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
- scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
- r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
if (r < 0)
- return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
+ log_warning_errno(r, "Failed to install release agent, ignoring: %m");
+ else if (r > 0)
+ log_debug("Installed release agent.");
+ else if (r == 0)
+ log_debug("Release agent already installed.");
+ }
- /* also, move all other userspace processes remaining
- * in the root cgroup into that scope. */
- r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
- if (r < 0)
- log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
+ /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
+ scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
- /* 5. And pin it, so that it cannot be unmounted */
- safe_close(m->pin_cgroupfs_fd);
- m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
- if (m->pin_cgroupfs_fd < 0)
- return log_error_errno(errno, "Failed to open pin file: %m");
+ /* also, move all other userspace processes remaining
+ * in the root cgroup into that scope. */
+ r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ if (r < 0)
+ log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
- /* 6. Always enable hierarchical support if it exists... */
- if (!all_unified)
- (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
- }
+ /* 5. And pin it, so that it cannot be unmounted */
+ safe_close(m->pin_cgroupfs_fd);
+ m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
+ if (m->pin_cgroupfs_fd < 0)
+ return log_error_errno(errno, "Failed to open pin file: %m");
+
+ /* 6. Always enable hierarchical support if it exists... */
+ if (!all_unified && m->test_run_flags == 0)
+ (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
/* 7. Figure out which controllers are supported */
r = cg_mask_supported(&m->cgroup_supported);
@@ -1992,11 +2088,18 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
int unit_get_memory_current(Unit *u, uint64_t *ret) {
_cleanup_free_ char *v = NULL;
+ CGroupContext *cc;
int r;
assert(u);
assert(ret);
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -ENODATA;
+ if (!cc->memory_accounting)
+ return -ENODATA;
+
if (!u->cgroup_path)
return -ENODATA;
@@ -2020,11 +2123,18 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
int unit_get_tasks_current(Unit *u, uint64_t *ret) {
_cleanup_free_ char *v = NULL;
+ CGroupContext *cc;
int r;
assert(u);
assert(ret);
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -ENODATA;
+ if (!cc->tasks_accounting)
+ return -ENODATA;
+
if (!u->cgroup_path)
return -ENODATA;
@@ -2091,6 +2201,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
}
int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
+ CGroupContext *cc;
nsec_t ns;
int r;
@@ -2100,6 +2211,12 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
* started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
* call this function with a NULL return value. */
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -ENODATA;
+ if (!cc->cpu_accounting)
+ return -ENODATA;
+
r = unit_get_cpu_usage_raw(u, &ns);
if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
/* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
@@ -2124,7 +2241,57 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
return 0;
}
-int unit_reset_cpu_usage(Unit *u) {
+int unit_get_ip_accounting(
+ Unit *u,
+ CGroupIPAccountingMetric metric,
+ uint64_t *ret) {
+
+ CGroupContext *cc;
+ uint64_t value;
+ int fd, r;
+
+ assert(u);
+ assert(metric >= 0);
+ assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+ assert(ret);
+
+ /* IP accounting is currently not recursive, and hence we refuse to return any data for slice nodes. Slices are
+ * inner cgroup nodes and hence have no processes directly attached, hence their counters would be zero
+ * anyway. And if we block this now we can later open this up, if the kernel learns recursive BPF cgroup
+ * filters. */
+ if (u->type == UNIT_SLICE)
+ return -ENODATA;
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -ENODATA;
+ if (!cc->ip_accounting)
+ return -ENODATA;
+
+ fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+
+ if (fd < 0)
+ return -ENODATA;
+
+ if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+ r = bpf_firewall_read_accounting(fd, &value, NULL);
+ else
+ r = bpf_firewall_read_accounting(fd, NULL, &value);
+ if (r < 0)
+ return r;
+
+ /* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
+ * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
+ * ip_accounting_extra[] field, and add them in here transparently. */
+
+ *ret = value + u->ip_accounting_extra[metric];
+
+ return r;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
nsec_t ns;
int r;
@@ -2142,6 +2309,22 @@ int unit_reset_cpu_usage(Unit *u) {
return 0;
}
+int unit_reset_ip_accounting(Unit *u) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ if (u->ip_accounting_ingress_map_fd >= 0)
+ r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+ if (u->ip_accounting_egress_map_fd >= 0)
+ q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+ zero(u->ip_accounting_extra);
+
+ return r < 0 ? r : q;
+}
+
bool unit_cgroup_delegate(Unit *u) {
CGroupContext *c;
@@ -2167,6 +2350,9 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
+ if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
+ m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
+
if ((u->cgroup_realized_mask & m) == 0)
return;
@@ -2174,6 +2360,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
unit_add_to_cgroup_queue(u);
}
+void unit_invalidate_cgroup_bpf(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return;
+
+ if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
+ return;
+
+ u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+ unit_add_to_cgroup_queue(u);
+
+ /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+ * list of our children includes our own. */
+ if (u->type == UNIT_SLICE) {
+ Unit *member;
+ Iterator i;
+
+ SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
+ if (member == u)
+ continue;
+
+ if (UNIT_DEREF(member->slice) != u)
+ continue;
+
+ unit_invalidate_cgroup_bpf(member);
+ }
+ }
+}
+
void manager_invalidate_startup_units(Manager *m) {
Iterator i;
Unit *u;
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 4cd168f63e..fcbf8d01ca 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -21,9 +21,10 @@
#include <stdbool.h>
+#include "cgroup-util.h"
+#include "ip-address-access.h"
#include "list.h"
#include "time-util.h"
-#include "cgroup-util.h"
typedef struct CGroupContext CGroupContext;
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
@@ -87,6 +88,7 @@ struct CGroupContext {
bool blockio_accounting;
bool memory_accounting;
bool tasks_accounting;
+ bool ip_accounting;
/* For unified hierarchy */
uint64_t cpu_weight;
@@ -103,6 +105,9 @@ struct CGroupContext {
uint64_t memory_max;
uint64_t memory_swap_max;
+ LIST_HEAD(IPAddressAccessItem, ip_address_allow);
+ LIST_HEAD(IPAddressAccessItem, ip_address_deny);
+
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
@@ -123,6 +128,16 @@ struct CGroupContext {
bool delegate;
};
+/* Used when querying IP accounting data */
+typedef enum CGroupIPAccountingMetric {
+ CGROUP_IP_INGRESS_BYTES,
+ CGROUP_IP_INGRESS_PACKETS,
+ CGROUP_IP_EGRESS_BYTES,
+ CGROUP_IP_EGRESS_PACKETS,
+ _CGROUP_IP_ACCOUNTING_METRIC_MAX,
+ _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
+} CGroupIPAccountingMetric;
+
#include "unit.h"
void cgroup_context_init(CGroupContext *c);
@@ -145,6 +160,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
CGroupMask unit_get_target_mask(Unit *u);
CGroupMask unit_get_enable_mask(Unit *u);
+bool unit_get_needs_bpf(Unit *u);
+
void unit_update_cgroup_members_masks(Unit *u);
char *unit_default_cgroup_path(Unit *u);
@@ -172,7 +189,10 @@ int unit_watch_all_pids(Unit *u);
int unit_get_memory_current(Unit *u, uint64_t *ret);
int unit_get_tasks_current(Unit *u, uint64_t *ret);
int unit_get_cpu_usage(Unit *u, nsec_t *ret);
-int unit_reset_cpu_usage(Unit *u);
+int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+
+int unit_reset_cpu_accounting(Unit *u);
+int unit_reset_ip_accounting(Unit *u);
bool unit_cgroup_delegate(Unit *u);
@@ -180,6 +200,7 @@ int unit_notify_cgroup_empty(Unit *u);
int manager_notify_cgroup_empty(Manager *m, const char *group);
void unit_invalidate_cgroup(Unit *u, CGroupMask m);
+void unit_invalidate_cgroup_bpf(Unit *u);
void manager_invalidate_startup_units(Manager *m);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index c1026e3f5b..f61ca08fcb 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -17,7 +17,11 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
+#include <arpa/inet.h>
+
+#include "af-list.h"
#include "alloc-util.h"
+#include "bpf-firewall.h"
#include "bus-util.h"
#include "cgroup-util.h"
#include "cgroup.h"
@@ -206,6 +210,48 @@ static int property_get_device_allow(
return sd_bus_message_close_container(reply);
}
+static int property_get_ip_address_access(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ IPAddressAccessItem** items = userdata, *i;
+ int r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(items, i, *items) {
+
+ r = sd_bus_message_open_container(reply, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", i->family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@@ -239,6 +285,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
+ SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
+ SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
+ SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
SD_BUS_VTABLE_END
};
@@ -1133,6 +1182,7 @@ int bus_cgroup_set_property(
}
return 1;
+
} else if (streq(name, "TasksMaxScale")) {
uint64_t limit;
uint32_t raw;
@@ -1153,6 +1203,137 @@ int bus_cgroup_set_property(
}
return 1;
+
+ } else if (streq(name, "IPAccounting")) {
+ int b;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ c->ip_accounting = b;
+
+ unit_invalidate_cgroup_bpf(u);
+ unit_write_drop_in_private(u, mode, name, b ? "IPAccounting=yes" : "IPAccounting=no");
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
+ IPAddressAccessItem **list;
+ size_t n = 0;
+
+ list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
+
+ r = sd_bus_message_enter_container(message, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const void *ap;
+ int32_t family;
+ uint32_t prefixlen;
+ size_t an;
+
+ r = sd_bus_message_enter_container(message, 'r', "iayu");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_set_errnof(error, EINVAL, "IPAddressAllow= expects IPv4 or IPv6 addresses only.");
+
+ r = sd_bus_message_read_array(message, 'y', &ap, &an);
+ if (r < 0)
+ return r;
+
+ if (an != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_set_errnof(error, EINVAL, "IP address has wrong size for family (%s, expected %zu, got %zu)",
+ af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
+
+ r = sd_bus_message_read(message, "u", &prefixlen);
+ if (r < 0)
+ return r;
+
+ if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
+ return sd_bus_error_set_errnof(error, EINVAL, "Prefix length too large for family.");
+
+ if (mode != UNIT_CHECK) {
+ IPAddressAccessItem *item;
+
+ item = new0(IPAddressAccessItem, 1);
+ if (!item)
+ return -ENOMEM;
+
+ item->family = family;
+ item->prefixlen = prefixlen;
+ memcpy(&item->address, ap, an);
+
+ LIST_PREPEND(items, *list, item);
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ *list = ip_address_access_reduce(*list);
+
+ if (mode != UNIT_CHECK) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ IPAddressAccessItem *item;
+ size_t size = 0;
+
+ if (n == 0)
+ *list = ip_address_access_free_all(*list);
+
+ unit_invalidate_cgroup_bpf(u);
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs_unlocked(name, f);
+ fputs_unlocked("=\n", f);
+
+ LIST_FOREACH(items, item, *list) {
+ char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+
+ errno = 0;
+ if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
+ return errno > 0 ? -errno : -EINVAL;
+
+ fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private(u, mode, name, buf);
+
+ if (*list) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == 0)
+ log_warning("Transient unit %s configures an IP firewall, but the local system does not support BPF/cgroup firewalling.\n"
+ "Proceeding WITHOUT firewalling in effect!", u->id);
+ }
+ }
+
+ return 1;
}
if (u->transient && u->load_state == UNIT_STUB) {
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index b0645ce294..8d2ae964d8 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -20,6 +20,7 @@
#include "sd-bus.h"
#include "alloc-util.h"
+#include "bpf-firewall.h"
#include "bus-common-errors.h"
#include "cgroup-util.h"
#include "dbus-job.h"
@@ -1051,6 +1052,39 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
return sd_bus_send(NULL, reply, NULL);
}
+static int property_get_ip_counter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupIPAccountingMetric metric;
+ uint64_t value = (uint64_t) -1;
+ Unit *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(property);
+ assert(u);
+
+ if (streq(property, "IPIngressBytes"))
+ metric = CGROUP_IP_INGRESS_BYTES;
+ else if (streq(property, "IPIngressPackets"))
+ metric = CGROUP_IP_INGRESS_PACKETS;
+ else if (streq(property, "IPEgressBytes"))
+ metric = CGROUP_IP_EGRESS_BYTES;
+ else {
+ assert(streq(property, "IPEgressPackets"));
+ metric = CGROUP_IP_EGRESS_PACKETS;
+ }
+
+ (void) unit_get_ip_accounting(u, metric, &value);
+ return sd_bus_message_append(reply, "t", value);
+}
+
const sd_bus_vtable bus_unit_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
@@ -1058,6 +1092,10 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
+ SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_VTABLE_END
};
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index e1846e1adb..9b0dbaf248 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -23,13 +23,14 @@
#include "dynamic-user.h"
#include "fd-util.h"
+#include "fileio.h"
#include "fs-util.h"
+#include "io-util.h"
#include "parse-util.h"
#include "random-util.h"
#include "stdio-util.h"
#include "string-util.h"
#include "user-util.h"
-#include "fileio.h"
/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
@@ -245,8 +246,8 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
/* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
l = pwritev(lock_fd,
(struct iovec[2]) {
- { .iov_base = (char*) name, .iov_len = strlen(name) },
- { .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
+ IOVEC_INIT_STRING(name),
+ IOVEC_INIT((char[1]) { '\n' }, 1),
}, 2, 0);
if (l < 0) {
(void) unlink(lock_path);
@@ -271,10 +272,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
uid_t uid = UID_INVALID;
- struct iovec iov = {
- .iov_base = &uid,
- .iov_len = sizeof(uid),
- };
+ struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
union {
struct cmsghdr cmsghdr;
uint8_t buf[CMSG_SPACE(sizeof(int))];
@@ -314,10 +312,7 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
}
static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
- struct iovec iov = {
- .iov_base = &uid,
- .iov_len = sizeof(uid),
- };
+ struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
union {
struct cmsghdr cmsghdr;
uint8_t buf[CMSG_SPACE(sizeof(int))];
diff --git a/src/core/execute.c b/src/core/execute.c
index 28c6b2fc38..0b49be2000 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2351,9 +2351,9 @@ static int send_user_lookup(
if (writev(user_lookup_fd,
(struct iovec[]) {
- { .iov_base = &uid, .iov_len = sizeof(uid) },
- { .iov_base = &gid, .iov_len = sizeof(gid) },
- { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+ IOVEC_INIT(&uid, sizeof(uid)),
+ IOVEC_INIT(&gid, sizeof(gid)),
+ IOVEC_INIT_STRING(unit->id) }, 3) < 0)
return -errno;
return 0;
@@ -3150,6 +3150,7 @@ static int exec_child(
"EXECUTABLE=%s", command->path,
LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
NULL);
log_close();
}
@@ -3223,6 +3224,7 @@ int exec_spawn(Unit *unit,
LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
"EXECUTABLE=%s", command->path,
LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
NULL);
pid = fork();
if (pid < 0)
@@ -3254,6 +3256,7 @@ int exec_spawn(Unit *unit,
log_struct_errno(LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "%s: %m",
error_message),
"EXECUTABLE=%s", command->path,
@@ -3262,6 +3265,7 @@ int exec_spawn(Unit *unit,
log_struct_errno(LOG_INFO, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
command->path),
"EXECUTABLE=%s", command->path,
@@ -3270,6 +3274,7 @@ int exec_spawn(Unit *unit,
log_struct_errno(LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
command->path),
diff --git a/src/core/ip-address-access.c b/src/core/ip-address-access.c
new file mode 100644
index 0000000000..cfb7d51c4f
--- /dev/null
+++ b/src/core/ip-address-access.c
@@ -0,0 +1,217 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "extract-word.h"
+#include "hostname-util.h"
+#include "ip-address-access.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+int config_parse_ip_address_access(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ IPAddressAccessItem **list = data;
+ const char *p;
+ int r;
+
+ assert(list);
+
+ if (isempty(rvalue)) {
+ *list = ip_address_access_free_all(*list);
+ return 0;
+ }
+
+ p = rvalue;
+
+ for (;;) {
+ _cleanup_free_ IPAddressAccessItem *a = NULL;
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ if (streq(word, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ a->family = AF_INET;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+
+ } else if (is_localhost(word)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32(0x7f000000);
+ a->prefixlen = 8;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ a->prefixlen = 128;
+
+ } else if (streq(word, "link-local")) {
+
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ a->prefixlen = 16;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) {
+ .__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
+ };
+ a->prefixlen = 64;
+
+ } else if (streq(word, "multicast")) {
+
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
+ a->prefixlen = 4;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) {
+ .__in6_u.__u6_addr32[0] = htobe32(0xff000000)
+ };
+ a->prefixlen = 8;
+
+ } else {
+ r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
+ return 0;
+ }
+ }
+
+ LIST_APPEND(items, *list, a);
+ a = NULL;
+ }
+
+ *list = ip_address_access_reduce(*list);
+
+ if (*list) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == 0)
+ log_warning("File %s:%u configures an IP firewall (%s=%s), but the local system does not support BPF/cgroup based firewalling.\n"
+ "Proceeding WITHOUT firewalling in effect!", filename, line, lvalue, rvalue);
+ }
+
+ return 0;
+}
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
+ IPAddressAccessItem *next, *p = first;
+
+ while (p) {
+ next = p->items_next;
+ free(p);
+
+ p = next;
+ }
+
+ return NULL;
+}
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
+ IPAddressAccessItem *a, *b, *tmp;
+ int r;
+
+ /* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
+ * entries. */
+
+ LIST_FOREACH_SAFE(items, a, tmp, first) {
+
+ /* Drop irrelevant bits */
+ (void) in_addr_mask(a->family, &a->address, a->prefixlen);
+
+ LIST_FOREACH(items, b, first) {
+
+ if (a == b)
+ continue;
+
+ if (a->family != b->family)
+ continue;
+
+ if (b->prefixlen > a->prefixlen)
+ continue;
+
+ r = in_addr_prefix_covers(b->family,
+ &b->address,
+ b->prefixlen,
+ &a->address);
+ if (r <= 0)
+ continue;
+
+ /* b covers a fully, then let's drop a */
+
+ LIST_REMOVE(items, first, a);
+ free(a);
+ }
+ }
+
+ return first;
+}
diff --git a/src/core/ip-address-access.h b/src/core/ip-address-access.h
new file mode 100644
index 0000000000..9aeab1f4f8
--- /dev/null
+++ b/src/core/ip-address-access.h
@@ -0,0 +1,38 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct IPAddressAccessItem IPAddressAccessItem;
+
+struct IPAddressAccessItem {
+ int family;
+ unsigned char prefixlen;
+ union in_addr_union address;
+ LIST_FIELDS(IPAddressAccessItem, items);
+};
+
+int config_parse_ip_address_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);
diff --git a/src/core/job.c b/src/core/job.c
index 8e2039d321..f04c8a2168 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -806,21 +806,26 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
default:
log_struct(job_result_log_level[result],
LOG_MESSAGE("%s", buf),
- "RESULT=%s", job_result_to_string(result),
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
NULL);
return;
}
log_struct(job_result_log_level[result],
LOG_MESSAGE("%s", buf),
- "RESULT=%s", job_result_to_string(result),
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
mid,
NULL);
}
static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
+ assert(u);
/* No message if the job did not actually do anything due to failed condition. */
if (t == JOB_START && result == JOB_DONE && !u->condition_result)
@@ -903,7 +908,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
* the unit itself. We don't treat JOB_CANCELED as failure in
* this context. And JOB_FAILURE is already handled by the
* unit itself. */
- if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
+ if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
log_struct(LOG_NOTICE,
"JOB_TYPE=%s", job_type_to_string(t),
"JOB_RESULT=%s", job_result_to_string(result),
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index f7d5f24861..cc8aad05a0 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -174,6 +174,9 @@ $1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0,
$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)
+$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
+$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
+$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
)m4_dnl
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
diff --git a/src/core/main.c b/src/core/main.c
index fbf8876a2d..2dfd48005b 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -128,6 +128,7 @@ static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL;
static bool arg_default_cpu_accounting = false;
static bool arg_default_io_accounting = false;
+static bool arg_default_ip_accounting = false;
static bool arg_default_blockio_accounting = false;
static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
@@ -748,6 +749,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
{ "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
{ "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
+ { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
@@ -792,6 +794,7 @@ static void manager_set_defaults(Manager *m) {
m->default_start_limit_burst = arg_default_start_limit_burst;
m->default_cpu_accounting = arg_default_cpu_accounting;
m->default_io_accounting = arg_default_io_accounting;
+ m->default_ip_accounting = arg_default_ip_accounting;
m->default_blockio_accounting = arg_default_blockio_accounting;
m->default_memory_accounting = arg_default_memory_accounting;
m->default_tasks_accounting = arg_default_tasks_accounting;
@@ -1202,6 +1205,26 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
return 0;
}
+static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
+ int r;
+
+ assert(saved_rlimit);
+ assert(getuid() == 0);
+
+ /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
+ * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
+ * bump the value high enough for the root user. */
+
+ if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
+ return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
+
+ r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
+ if (r < 0)
+ return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
+
+ return 0;
+}
+
static void test_usr(void) {
/* Check that /usr is not a separate fs */
@@ -1385,7 +1408,7 @@ int main(int argc, char *argv[]) {
bool queue_default_job = false;
bool empty_etc = false;
char *switch_root_dir = NULL, *switch_root_init = NULL;
- struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
+ struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
const char *error_message = NULL;
#ifdef HAVE_SYSV_COMPAT
@@ -1812,9 +1835,11 @@ int main(int argc, char *argv[]) {
if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
log_warning_errno(errno, "Failed to make us a subreaper: %m");
- if (arg_system)
+ if (arg_system) {
/* Bump up RLIMIT_NOFILE for systemd itself */
(void) bump_rlimit_nofile(&saved_rlimit_nofile);
+ (void) bump_rlimit_memlock(&saved_rlimit_memlock);
+ }
}
r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
@@ -2048,6 +2073,8 @@ finish:
* its child processes */
if (saved_rlimit_nofile.rlim_cur > 0)
(void) setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
+ if (saved_rlimit_memlock.rlim_cur != (rlim_t) -1)
+ (void) setrlimit(RLIMIT_MEMLOCK, &saved_rlimit_memlock);
if (switch_root_dir) {
/* Kill all remaining processes from the
diff --git a/src/core/manager.c b/src/core/manager.c
index 46036aa50c..5cf4bc4ee6 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -616,6 +616,9 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->default_tasks_accounting = true;
m->default_tasks_max = UINT64_MAX;
+ m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
+ m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
+ m->default_restart_usec = DEFAULT_RESTART_USEC;
#ifdef ENABLE_EFI
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@@ -628,13 +631,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
m->unit_log_format_string = "UNIT=%s";
m->invocation_log_field = "INVOCATION_ID=";
- m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
+ m->invocation_log_format_string = "INVOCATION_ID=%s";
} else {
m->unit_log_field = "USER_UNIT=";
m->unit_log_format_string = "USER_UNIT=%s";
m->invocation_log_field = "USER_INVOCATION_ID=";
- m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
+ m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
}
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
diff --git a/src/core/manager.h b/src/core/manager.h
index 713d2db70c..e8a6267471 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -29,6 +29,7 @@
#include "cgroup-util.h"
#include "fdset.h"
#include "hashmap.h"
+#include "ip-address-access.h"
#include "list.h"
#include "ratelimit.h"
@@ -271,6 +272,7 @@ struct Manager {
bool default_io_accounting;
bool default_blockio_accounting;
bool default_tasks_accounting;
+ bool default_ip_accounting;
uint64_t default_tasks_max;
usec_t default_timer_accuracy_usec;
diff --git a/src/core/meson.build b/src/core/meson.build
index 569eed9cad..ac600be117 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -1,114 +1,118 @@
libcore_la_sources = '''
- unit.c
- unit.h
- unit-printf.c
- unit-printf.h
- job.c
- job.h
- manager.c
- manager.h
- transaction.c
- transaction.h
- load-fragment.c
- load-fragment.h
- service.c
- service.h
- socket.c
- socket.h
- target.c
- target.h
- device.c
- device.h
- mount.c
- mount.h
+ audit-fd.c
+ audit-fd.h
automount.c
automount.h
- swap.c
- swap.h
- timer.c
- timer.h
- path.c
- path.h
- slice.c
- slice.h
- scope.c
- scope.h
- load-dropin.c
- load-dropin.h
- execute.c
- execute.h
- dynamic-user.c
- dynamic-user.h
- kill.c
- kill.h
- dbus.c
- dbus.h
- dbus-manager.c
- dbus-manager.h
- dbus-unit.c
- dbus-unit.h
+ bpf-firewall.c
+ bpf-firewall.h
+ cgroup.c
+ cgroup.h
+ dbus-automount.c
+ dbus-automount.h
+ dbus-cgroup.c
+ dbus-cgroup.h
+ dbus-device.c
+ dbus-device.h
+ dbus-execute.c
+ dbus-execute.h
dbus-job.c
dbus-job.h
+ dbus-kill.c
+ dbus-kill.h
+ dbus-manager.c
+ dbus-manager.h
+ dbus-mount.c
+ dbus-mount.h
+ dbus-path.c
+ dbus-path.h
+ dbus-scope.c
+ dbus-scope.h
dbus-service.c
dbus-service.h
+ dbus-slice.c
+ dbus-slice.h
dbus-socket.c
dbus-socket.h
- dbus-target.c
- dbus-target.h
- dbus-device.c
- dbus-device.h
- dbus-mount.c
- dbus-mount.h
- dbus-automount.c
- dbus-automount.h
dbus-swap.c
dbus-swap.h
+ dbus-target.c
+ dbus-target.h
dbus-timer.c
dbus-timer.h
- dbus-path.c
- dbus-path.h
- dbus-slice.c
- dbus-slice.h
- dbus-scope.c
- dbus-scope.h
- dbus-execute.c
- dbus-execute.h
- dbus-kill.c
- dbus-kill.h
- dbus-cgroup.c
- dbus-cgroup.h
- cgroup.c
- cgroup.h
- selinux-access.c
- selinux-access.h
- selinux-setup.c
- selinux-setup.h
- smack-setup.c
- smack-setup.h
+ dbus-unit.c
+ dbus-unit.h
+ dbus.c
+ dbus.h
+ device.c
+ device.h
+ dynamic-user.c
+ dynamic-user.h
+ emergency-action.c
+ emergency-action.h
+ execute.c
+ execute.h
+ hostname-setup.c
+ hostname-setup.h
ima-setup.c
ima-setup.h
- locale-setup.h
+ ip-address-access.c
+ ip-address-access.h
+ job.c
+ job.h
+ kill.c
+ kill.h
+ killall.c
+ killall.h
+ kmod-setup.c
+ kmod-setup.h
+ load-dropin.c
+ load-dropin.h
+ load-fragment.c
+ load-fragment.h
locale-setup.c
- hostname-setup.c
- hostname-setup.h
+ locale-setup.h
+ loopback-setup.c
+ loopback-setup.h
machine-id-setup.c
machine-id-setup.h
+ manager.c
+ manager.h
mount-setup.c
mount-setup.h
- kmod-setup.c
- kmod-setup.h
- loopback-setup.h
- loopback-setup.c
+ mount.c
+ mount.h
namespace.c
namespace.h
- killall.h
- killall.c
- audit-fd.c
- audit-fd.h
+ path.c
+ path.h
+ scope.c
+ scope.h
+ selinux-access.c
+ selinux-access.h
+ selinux-setup.c
+ selinux-setup.h
+ service.c
+ service.h
show-status.c
show-status.h
- emergency-action.c
- emergency-action.h
+ slice.c
+ slice.h
+ smack-setup.c
+ smack-setup.h
+ socket.c
+ socket.h
+ swap.c
+ swap.h
+ target.c
+ target.h
+ timer.c
+ timer.h
+ transaction.c
+ transaction.h
+ unit-printf.c
+ unit-printf.h
+ unit.c
+ unit.h
'''.split()
load_fragment_gperf_gperf = custom_target(
diff --git a/src/core/mount.c b/src/core/mount.c
index c3805ee055..46bcf37ae0 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -736,6 +736,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
exec_context_dump(&m->exec_context, f, prefix);
kill_context_dump(&m->kill_context, f, prefix);
+ cgroup_context_dump(&m->cgroup_context, f, prefix);
}
static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
@@ -753,9 +754,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
assert(_pid);
(void) unit_realize_cgroup(UNIT(m));
- if (m->reset_cpu_usage) {
- (void) unit_reset_cpu_usage(UNIT(m));
- m->reset_cpu_usage = false;
+ if (m->reset_accounting) {
+ (void) unit_reset_cpu_accounting(UNIT(m));
+ (void) unit_reset_ip_accounting(UNIT(m));
+ m->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(m));
@@ -1043,7 +1045,7 @@ static int mount_start(Unit *u) {
m->result = MOUNT_SUCCESS;
m->reload_result = MOUNT_SUCCESS;
- m->reset_cpu_usage = true;
+ m->reset_accounting = true;
mount_enter_mounting(m);
return 1;
diff --git a/src/core/mount.h b/src/core/mount.h
index 9f7326ba6a..f81e4217df 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -67,7 +67,7 @@ struct Mount {
bool just_mounted:1;
bool just_changed:1;
- bool reset_cpu_usage:1;
+ bool reset_accounting:1;
bool sloppy_options;
diff --git a/src/core/scope.c b/src/core/scope.c
index a1d5c1cfd5..8f9df3b9b7 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -333,7 +333,8 @@ static int scope_start(Unit *u) {
return r;
(void) unit_realize_cgroup(u);
- (void) unit_reset_cpu_usage(u);
+ (void) unit_reset_cpu_accounting(u);
+ (void) unit_reset_ip_accounting(u);
r = unit_attach_pids_to_cgroup(u);
if (r < 0) {
diff --git a/src/core/service.c b/src/core/service.c
index c9a7222cc6..21fc4e2abe 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -803,6 +803,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
"%sFile Descriptor Store Current: %u\n",
prefix, s->n_fd_store_max,
prefix, s->n_fd_store);
+
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
}
static int service_load_pid_file(Service *s, bool may_warn) {
@@ -1242,9 +1244,10 @@ static int service_spawn(
}
(void) unit_realize_cgroup(UNIT(s));
- if (s->reset_cpu_usage) {
- (void) unit_reset_cpu_usage(UNIT(s));
- s->reset_cpu_usage = false;
+ if (s->reset_accounting) {
+ (void) unit_reset_cpu_accounting(UNIT(s));
+ (void) unit_reset_ip_accounting(UNIT(s));
+ s->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(s));
@@ -1953,6 +1956,7 @@ static void service_enter_restart(Service *s) {
log_struct(LOG_INFO,
"MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
LOG_UNIT_ID(UNIT(s)),
+ LOG_UNIT_INVOCATION_ID(UNIT(s)),
LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
"N_RESTARTS=%u", s->n_restarts,
NULL);
@@ -2136,7 +2140,7 @@ static int service_start(Unit *u) {
s->main_pid_known = false;
s->main_pid_alien = false;
s->forbid_restart = false;
- s->reset_cpu_usage = true;
+ s->reset_accounting = true;
s->status_text = mfree(s->status_text);
s->status_errno = 0;
@@ -2948,6 +2952,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"EXIT_CODE=%s", sigchld_code_to_string(code),
"EXIT_STATUS=%i", status,
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
NULL);
if (s->result == SERVICE_SUCCESS)
diff --git a/src/core/service.h b/src/core/service.h
index 0ac8bc9a67..16b700637c 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -165,7 +165,7 @@ struct Service {
bool forbid_restart:1;
bool start_timeout_defined:1;
- bool reset_cpu_usage:1;
+ bool reset_accounting:1;
char *bus_name;
char *bus_name_owner; /* unique name of the current owner */
diff --git a/src/core/show-status.c b/src/core/show-status.c
index 65f9cb888a..8c94573844 100644
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@@ -93,21 +93,21 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
}
if (prev_ephemeral)
- IOVEC_SET_STRING(iovec[n++], "\r" ANSI_ERASE_TO_END_OF_LINE);
+ iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE);
prev_ephemeral = ephemeral;
if (status) {
if (!isempty(status)) {
- IOVEC_SET_STRING(iovec[n++], "[");
- IOVEC_SET_STRING(iovec[n++], status);
- IOVEC_SET_STRING(iovec[n++], "] ");
+ iovec[n++] = IOVEC_MAKE_STRING("[");
+ iovec[n++] = IOVEC_MAKE_STRING(status);
+ iovec[n++] = IOVEC_MAKE_STRING("] ");
} else
- IOVEC_SET_STRING(iovec[n++], status_indent);
+ iovec[n++] = IOVEC_MAKE_STRING(status_indent);
}
- IOVEC_SET_STRING(iovec[n++], s);
+ iovec[n++] = IOVEC_MAKE_STRING(s);
if (!ephemeral)
- IOVEC_SET_STRING(iovec[n++], "\n");
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
if (writev(fd, iovec, n) < 0)
return -errno;
diff --git a/src/core/slice.c b/src/core/slice.c
index ed5d3fd701..b15f751c82 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -222,7 +222,8 @@ static int slice_start(Unit *u) {
return r;
(void) unit_realize_cgroup(u);
- (void) unit_reset_cpu_usage(u);
+ (void) unit_reset_cpu_accounting(u);
+ (void) unit_reset_ip_accounting(u);
slice_set_state(t, SLICE_ACTIVE);
return 1;
diff --git a/src/core/socket.c b/src/core/socket.c
index 5993ce0d00..3b84ffa2a7 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -29,6 +29,7 @@
#include <linux/sctp.h>
#include "alloc-util.h"
+#include "bpf-firewall.h"
#include "bus-error.h"
#include "bus-util.h"
#include "copy.h"
@@ -37,6 +38,7 @@
#include "exit-status.h"
#include "fd-util.h"
#include "format-util.h"
+#include "in-addr-util.h"
#include "io-util.h"
#include "label.h"
#include "log.h"
@@ -56,7 +58,6 @@
#include "unit-name.h"
#include "unit.h"
#include "user-util.h"
-#include "in-addr-util.h"
struct SocketPeer {
unsigned n_ref;
@@ -852,6 +853,8 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
exec_command_dump_list(s->exec_command[c], f, prefix2);
}
+
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
}
static int instance_from_socket(int fd, unsigned nr, char **instance) {
@@ -1435,6 +1438,102 @@ no_label:
return 0;
}
+static int socket_address_listen_do(
+ Socket *s,
+ const SocketAddress *address,
+ const char *label) {
+
+ assert(s);
+ assert(address);
+
+ return socket_address_listen(
+ address,
+ SOCK_CLOEXEC|SOCK_NONBLOCK,
+ s->backlog,
+ s->bind_ipv6_only,
+ s->bind_to_device,
+ s->reuse_port,
+ s->free_bind,
+ s->transparent,
+ s->directory_mode,
+ s->socket_mode,
+ label);
+}
+
+static int socket_address_listen_in_cgroup(
+ Socket *s,
+ const SocketAddress *address,
+ const char *label) {
+
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int fd, r;
+ pid_t pid;
+
+ assert(s);
+ assert(address);
+
+ /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
+ * in which the socket is actually created. This way we ensure the socket is actually properly attached to the
+ * unit's cgroup for the purpose of BPF filtering and such. */
+
+ if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == 0) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
+ goto shortcut;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+ r = unit_fork_helper_process(UNIT(s), &pid);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
+ if (r == 0) {
+ /* Child */
+
+ pair[0] = safe_close(pair[0]);
+
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0) {
+ log_unit_error_errno(UNIT(s), fd, "Failed to create listening socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], fd, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to send listening socket to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+ fd = receive_one_fd(pair[0], 0);
+
+ /* We synchronously wait for the helper, as it shouldn't be slow */
+ r = wait_for_terminate_and_warn("listen-cgroup-helper", pid, false);
+ if (r < 0) {
+ safe_close(fd);
+ return r;
+ }
+
+ if (fd < 0)
+ return log_unit_error_errno(UNIT(s), fd, "Failed to receive listening socket: %m");
+
+ return fd;
+
+shortcut:
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to create listening socket: %m");
+
+ return fd;
+}
+
static int socket_open_fds(Socket *s) {
_cleanup_(mac_selinux_freep) char *label = NULL;
bool know_label = false;
@@ -1478,18 +1577,7 @@ static int socket_open_fds(Socket *s) {
break;
}
- r = socket_address_listen(
- &p->address,
- SOCK_CLOEXEC|SOCK_NONBLOCK,
- s->backlog,
- s->bind_ipv6_only,
- s->bind_to_device,
- s->reuse_port,
- s->free_bind,
- s->transparent,
- s->directory_mode,
- s->socket_mode,
- label);
+ r = socket_address_listen_in_cgroup(s, &p->address, label);
if (r < 0)
goto rollback;
@@ -1773,9 +1861,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
assert(_pid);
(void) unit_realize_cgroup(UNIT(s));
- if (s->reset_cpu_usage) {
- (void) unit_reset_cpu_usage(UNIT(s));
- s->reset_cpu_usage = false;
+ if (s->reset_accounting) {
+ (void) unit_reset_cpu_accounting(UNIT(s));
+ (void) unit_reset_ip_accounting(UNIT(s));
+ s->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(s));
@@ -1826,27 +1915,23 @@ static int socket_chown(Socket *s, pid_t *_pid) {
/* We have to resolve the user names out-of-process, hence
* let's fork here. It's messy, but well, what can we do? */
- pid = fork();
- if (pid < 0)
- return -errno;
-
- if (pid == 0) {
- SocketPort *p;
+ r = unit_fork_helper_process(UNIT(s), &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
- int ret;
+ SocketPort *p;
- (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
- (void) ignore_signals(SIGPIPE, -1);
- log_forget_fds();
+ /* Child */
if (!isempty(s->user)) {
const char *user = s->user;
r = get_user_creds(&user, &uid, &gid, NULL, NULL);
if (r < 0) {
- ret = EXIT_USER;
- goto fail_child;
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
+ _exit(EXIT_USER);
}
}
@@ -1855,8 +1940,8 @@ static int socket_chown(Socket *s, pid_t *_pid) {
r = get_group_creds(&group, &gid);
if (r < 0) {
- ret = EXIT_GROUP;
- goto fail_child;
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
+ _exit(EXIT_GROUP);
}
}
@@ -1872,19 +1957,12 @@ static int socket_chown(Socket *s, pid_t *_pid) {
continue;
if (chown(path, uid, gid) < 0) {
- r = -errno;
- ret = EXIT_CHOWN;
- goto fail_child;
+ log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
+ _exit(EXIT_CHOWN);
}
}
- _exit(0);
-
- fail_child:
- log_open();
- log_error_errno(r, "Failed to chown socket at step %s: %m", exit_status_to_string(ret, EXIT_STATUS_SYSTEMD));
-
- _exit(ret);
+ _exit(EXIT_SUCCESS);
}
r = unit_watch_pid(UNIT(s), pid);
@@ -2371,7 +2449,7 @@ static int socket_start(Unit *u) {
return r;
s->result = SOCKET_SUCCESS;
- s->reset_cpu_usage = true;
+ s->reset_accounting = true;
socket_enter_start_pre(s);
return 1;
@@ -2696,6 +2774,97 @@ _pure_ static bool socket_check_gc(Unit *u) {
return s->n_connections > 0;
}
+static int socket_accept_do(Socket *s, int fd) {
+ int cfd;
+
+ assert(s);
+ assert(fd >= 0);
+
+ for (;;) {
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
+ if (cfd < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return cfd;
+}
+
+static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int cfd, r;
+ pid_t pid;
+
+ assert(s);
+ assert(p);
+ assert(fd >= 0);
+
+ /* Similar to socket_address_listen_in_cgroup(), but for accept() rathern than socket(): make sure that any
+ * connection socket is also properly associated with the cgroup. */
+
+ if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ goto shortcut;
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto shortcut;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+ r = unit_fork_helper_process(UNIT(s), &pid);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
+ if (r == 0) {
+ /* Child */
+
+ pair[0] = safe_close(pair[0]);
+
+ cfd = socket_accept_do(s, fd);
+ if (cfd < 0) {
+ log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], cfd, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+ cfd = receive_one_fd(pair[0], 0);
+
+ /* We synchronously wait for the helper, as it shouldn't be slow */
+ r = wait_for_terminate_and_warn("accept-cgroup-helper", pid, false);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ if (cfd < 0)
+ return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
+
+ return cfd;
+
+shortcut:
+ cfd = socket_accept_do(s, fd);
+ if (cfd < 0)
+ return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+
+ return cfd;
+}
+
static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
SocketPort *p = userdata;
int cfd = -1;
@@ -2721,20 +2890,9 @@ static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
p->type == SOCKET_SOCKET &&
socket_address_can_accept(&p->address)) {
- for (;;) {
-
- cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
- if (cfd < 0) {
-
- if (errno == EINTR)
- continue;
-
- log_unit_error_errno(UNIT(p->socket), errno, "Failed to accept socket: %m");
- goto fail;
- }
-
- break;
- }
+ cfd = socket_accept_in_cgroup(p->socket, p, fd);
+ if (cfd < 0)
+ goto fail;
socket_apply_socket_options(p->socket, cfd);
}
diff --git a/src/core/socket.h b/src/core/socket.h
index 89f4664510..8c263963c4 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -161,7 +161,7 @@ struct Socket {
char *user, *group;
- bool reset_cpu_usage:1;
+ bool reset_accounting:1;
char *fdname;
diff --git a/src/core/swap.c b/src/core/swap.c
index 9553ee16a8..d58f68458b 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -602,6 +602,7 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
exec_context_dump(&s->exec_context, f, prefix);
kill_context_dump(&s->kill_context, f, prefix);
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
}
static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
@@ -619,9 +620,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
assert(_pid);
(void) unit_realize_cgroup(UNIT(s));
- if (s->reset_cpu_usage) {
- (void) unit_reset_cpu_usage(UNIT(s));
- s->reset_cpu_usage = false;
+ if (s->reset_accounting) {
+ (void) unit_reset_cpu_accounting(UNIT(s));
+ (void) unit_reset_ip_accounting(UNIT(s));
+ s->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(s));
@@ -860,7 +862,7 @@ static int swap_start(Unit *u) {
return r;
s->result = SWAP_SUCCESS;
- s->reset_cpu_usage = true;
+ s->reset_accounting = true;
swap_enter_activating(s);
return 1;
diff --git a/src/core/swap.h b/src/core/swap.h
index b0ef50f1e8..45da63c5e2 100644
--- a/src/core/swap.h
+++ b/src/core/swap.h
@@ -70,7 +70,7 @@ struct Swap {
bool is_active:1;
bool just_activated:1;
- bool reset_cpu_usage:1;
+ bool reset_accounting:1;
SwapResult result;
diff --git a/src/core/system.conf b/src/core/system.conf
index 746572b7ff..6b86eac33d 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -40,6 +40,7 @@
#DefaultEnvironment=
#DefaultCPUAccounting=no
#DefaultIOAccounting=no
+#DefaultIPAccounting=no
#DefaultBlockIOAccounting=no
#DefaultMemoryAccounting=no
#DefaultTasksAccounting=yes
@@ -60,3 +61,5 @@
#DefaultLimitNICE=
#DefaultLimitRTPRIO=
#DefaultLimitRTTIME=
+#IPAddressAllow=
+#IPAddressDeny=
diff --git a/src/core/unit.c b/src/core/unit.c
index f1936bdf0b..0fe881436e 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -35,9 +35,11 @@
#include "dropin.h"
#include "escape.h"
#include "execute.h"
+#include "fd-util.h"
#include "fileio-label.h"
#include "format-util.h"
#include "id128-util.h"
+#include "io-util.h"
#include "load-dropin.h"
#include "load-fragment.h"
#include "log.h"
@@ -103,6 +105,13 @@ Unit *unit_new(Manager *m, size_t size) {
u->ref_gid = GID_INVALID;
u->cpu_usage_last = NSEC_INFINITY;
+ u->ip_accounting_ingress_map_fd = -1;
+ u->ip_accounting_egress_map_fd = -1;
+ u->ipv4_allow_map_fd = -1;
+ u->ipv6_allow_map_fd = -1;
+ u->ipv4_deny_map_fd = -1;
+ u->ipv6_deny_map_fd = -1;
+
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -153,9 +162,11 @@ static void unit_init(Unit *u) {
cc->cpu_accounting = u->manager->default_cpu_accounting;
cc->io_accounting = u->manager->default_io_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
cc->blockio_accounting = u->manager->default_blockio_accounting;
cc->memory_accounting = u->manager->default_memory_accounting;
cc->tasks_accounting = u->manager->default_tasks_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
if (u->type != UNIT_SLICE)
cc->tasks_max = u->manager->default_tasks_max;
@@ -610,6 +621,17 @@ void unit_free(Unit *u) {
while (u->refs)
unit_ref_unset(u->refs);
+ safe_close(u->ip_accounting_ingress_map_fd);
+ safe_close(u->ip_accounting_egress_map_fd);
+
+ safe_close(u->ipv4_allow_map_fd);
+ safe_close(u->ipv6_allow_map_fd);
+ safe_close(u->ipv4_deny_map_fd);
+ safe_close(u->ipv6_deny_map_fd);
+
+ bpf_program_unref(u->ip_bpf_ingress);
+ bpf_program_unref(u->ip_bpf_egress);
+
free(u);
}
@@ -1523,6 +1545,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
log_struct(LOG_INFO,
LOG_MESSAGE("%s", buf),
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
mid,
NULL);
}
@@ -1979,6 +2002,134 @@ void unit_trigger_notify(Unit *u) {
UNIT_VTABLE(other)->trigger_notify(other, u);
}
+static int unit_log_resources(Unit *u) {
+
+ struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
+ size_t n_message_parts = 0, n_iovec = 0;
+ char* message_parts[3 + 1], *t;
+ nsec_t nsec = NSEC_INFINITY;
+ CGroupIPAccountingMetric m;
+ size_t i;
+ int r;
+ const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES",
+ [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
+ [CGROUP_IP_EGRESS_BYTES] = "IP_METRIC_EGRESS_BYTES",
+ [CGROUP_IP_EGRESS_PACKETS] = "IP_METRIC_EGRESS_PACKETS",
+ };
+
+ assert(u);
+
+ /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
+ * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
+ * information and the complete data in structured fields. */
+
+ (void) unit_get_cpu_usage(u, &nsec);
+ if (nsec != NSEC_INFINITY) {
+ char buf[FORMAT_TIMESPAN_MAX] = "";
+
+ /* Format the CPU time for inclusion in the structured log message */
+ if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the CPU time for inclusion in the human language message string */
+ format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
+ t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = t;
+ }
+
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ char buf[FORMAT_BYTES_MAX] = "";
+ uint64_t value = UINT64_MAX;
+
+ assert(ip_fields[m]);
+
+ (void) unit_get_ip_accounting(u, m, &value);
+ if (value == UINT64_MAX)
+ continue;
+
+ /* Format IP accounting data for inclusion in the structured log message */
+ if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the IP accounting data for inclusion in the human language message string, but only for the
+ * bytes counters (and not for the packets counters) */
+ if (m == CGROUP_IP_INGRESS_BYTES)
+ t = strjoin(n_message_parts > 0 ? "received " : "Received ",
+ format_bytes(buf, sizeof(buf), value),
+ " IP traffic");
+ else if (m == CGROUP_IP_EGRESS_BYTES)
+ t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
+ format_bytes(buf, sizeof(buf), value),
+ " IP traffic");
+ else
+ continue;
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = t;
+ }
+
+ /* Is there any accounting data available at all? */
+ if (n_iovec == 0) {
+ r = 0;
+ goto finish;
+ }
+
+ if (n_message_parts == 0)
+ t = strjoina("MESSAGE=", u->id, ": Completed");
+ else {
+ _cleanup_free_ char *joined;
+
+ message_parts[n_message_parts] = NULL;
+
+ joined = strv_join(message_parts, ", ");
+ if (!joined) {
+ r = log_oom();
+ goto finish;
+ }
+
+ t = strjoina("MESSAGE=", u->id, ": ", joined);
+ }
+
+ /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
+ * and hence don't increase n_iovec for them */
+ iovec[n_iovec] = IOVEC_MAKE_STRING(t);
+ iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
+
+ t = strjoina(u->manager->unit_log_field, u->id);
+ iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
+
+ t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
+ iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+
+ log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_message_parts; i++)
+ free(message_parts[i]);
+
+ for (i = 0; i < n_iovec; i++)
+ free(iovec[i].iov_base);
+
+ return r;
+
+}
+
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) {
Manager *m;
bool unexpected;
@@ -2150,28 +2301,33 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
manager_send_unit_plymouth(m, u);
} else {
+ /* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it
+ * anyway. */
- /* We don't care about D-Bus here, since we'll get an
- * asynchronous notification for it anyway. */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
+ !UNIT_IS_INACTIVE_OR_FAILED(os)
+ && !MANAGER_IS_RELOADING(m)) {
- if (u->type == UNIT_SERVICE &&
- UNIT_IS_INACTIVE_OR_FAILED(ns) &&
- !UNIT_IS_INACTIVE_OR_FAILED(os) &&
- !MANAGER_IS_RELOADING(m)) {
+ /* This unit just stopped/failed. */
+ if (u->type == UNIT_SERVICE) {
- /* Hmm, if there was no start record written
- * write it now, so that we always have a nice
- * pair */
- if (!u->in_audit) {
- manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
+ /* Hmm, if there was no start record written
+ * write it now, so that we always have a nice
+ * pair */
+ if (!u->in_audit) {
+ manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
- if (ns == UNIT_INACTIVE)
- manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
- } else
- /* Write audit record if we have just finished shutting down */
- manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+ if (ns == UNIT_INACTIVE)
+ manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
+ } else
+ /* Write audit record if we have just finished shutting down */
+ manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+
+ u->in_audit = false;
+ }
- u->in_audit = false;
+ /* Write a log message about consumed resources */
+ unit_log_resources(u);
}
}
@@ -2749,7 +2905,15 @@ static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask)
return r;
}
+static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
+ [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+ [CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
+ [CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
+};
+
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
+ CGroupIPAccountingMetric m;
int r;
assert(u);
@@ -2798,6 +2962,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
(void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
(void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+ unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
if (uid_is_valid(u->ref_uid))
unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
@@ -2809,6 +2974,14 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
bus_track_serialize(u->bus_track, f, "ref");
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ uint64_t v;
+
+ r = unit_get_ip_accounting(u, m, &v);
+ if (r >= 0)
+ unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
+ }
+
if (serialize_jobs) {
if (u->job) {
fprintf(f, "job\n");
@@ -2915,6 +3088,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
for (;;) {
char line[LINE_MAX], *l, *v;
+ CGroupIPAccountingMetric m;
size_t k;
if (!fgets(line, sizeof(line), f)) {
@@ -3069,6 +3243,20 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
continue;
+ } else if (streq(l, "cgroup-bpf-realized")) {
+ int i;
+
+ r = safe_atoi(v, &i);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
+ else
+ u->cgroup_bpf_state =
+ i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
+ i > 0 ? UNIT_CGROUP_BPF_ON :
+ UNIT_CGROUP_BPF_OFF;
+
+ continue;
+
} else if (streq(l, "ref-uid")) {
uid_t uid;
@@ -3111,6 +3299,21 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
}
+ /* Check if this is an IP accounting metric serialization field */
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
+ if (streq(l, ip_accounting_metric_field[m]))
+ break;
+ if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
+ uint64_t c;
+
+ r = safe_atou64(v, &c);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
+ else
+ u->ip_accounting_extra[m] = c;
+ continue;
+ }
+
if (unit_can_serialize(u)) {
if (rt) {
r = exec_runtime_deserialize_item(u, rt, l, v, fds);
@@ -3137,6 +3340,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
if (!dual_timestamp_is_set(&u->state_change_timestamp))
dual_timestamp_get(&u->state_change_timestamp);
+ /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
+ * after we are done. For that we invalidate anything already realized, so that we can realize it again. */
+ unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
+ unit_invalidate_cgroup_bpf(u);
+
return 0;
}
@@ -4169,6 +4377,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
log_struct(LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
"WHERE=%s", where,
NULL);
@@ -4191,6 +4400,7 @@ int unit_fail_if_symlink(Unit *u, const char* where) {
log_struct(LOG_ERR,
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "Mount on symlink %s not allowed.", where),
"WHERE=%s", where,
NULL);
@@ -4436,3 +4646,43 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
p->cgroup_path = u->cgroup_path;
SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
}
+
+int unit_fork_helper_process(Unit *u, pid_t *ret) {
+ pid_t pid;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
+ * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
+
+ (void) unit_realize_cgroup(u);
+
+ pid = fork();
+ if (pid < 0)
+ return -errno;
+
+ if (pid == 0) {
+
+ (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
+ (void) ignore_signals(SIGPIPE, -1);
+
+ log_close();
+ log_open();
+
+ if (u->cgroup_path) {
+ r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
+ _exit(EXIT_CGROUP);
+ }
+ }
+
+ *ret = getpid_cached();
+ return 0;
+ }
+
+ *ret = pid;
+ return 1;
+}
diff --git a/src/core/unit.h b/src/core/unit.h
index 4d9751a406..9aa00b056f 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -28,11 +28,13 @@ typedef struct UnitVTable UnitVTable;
typedef struct UnitRef UnitRef;
typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
+#include "bpf-program.h"
#include "condition.h"
#include "emergency-action.h"
#include "install.h"
#include "list.h"
#include "unit-name.h"
+#include "cgroup.h"
typedef enum KillOperation {
KILL_TERMINATE,
@@ -70,6 +72,12 @@ struct UnitRef {
LIST_FIELDS(UnitRef, refs);
};
+typedef enum UnitCGroupBPFState {
+ UNIT_CGROUP_BPF_OFF = 0,
+ UNIT_CGROUP_BPF_ON = 1,
+ UNIT_CGROUP_BPF_INVALIDATED = -1,
+} UnitCGroupBPFState;
+
struct Unit {
Manager *manager;
@@ -205,6 +213,20 @@ struct Unit {
CGroupMask cgroup_members_mask;
int cgroup_inotify_wd;
+ /* IP BPF Firewalling/accounting */
+ int ip_accounting_ingress_map_fd;
+ int ip_accounting_egress_map_fd;
+
+ int ipv4_allow_map_fd;
+ int ipv6_allow_map_fd;
+ int ipv4_deny_map_fd;
+ int ipv6_deny_map_fd;
+
+ BPFProgram *ip_bpf_ingress;
+ BPFProgram *ip_bpf_egress;
+
+ uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
/* How to start OnFailure units */
JobMode on_failure_job_mode;
@@ -254,6 +276,8 @@ struct Unit {
bool cgroup_members_mask_valid:1;
bool cgroup_subtree_mask_valid:1;
+ UnitCGroupBPFState cgroup_bpf_state:2;
+
bool start_limit_hit:1;
/* Did we already invoke unit_coldplug() for this unit? */
@@ -661,6 +685,8 @@ bool unit_shall_confirm_spawn(Unit *u);
void unit_set_exec_params(Unit *s, ExecParameters *p);
+int unit_fork_helper_process(Unit *u, pid_t *ret);
+
/* Macros which append UNIT= or USER_UNIT= to the message */
#define log_unit_full(unit, level, error, ...) \
@@ -684,3 +710,4 @@ void unit_set_exec_params(Unit *s, ExecParameters *p);
#define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
#define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
+#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
index 57d1af454a..96a4d400f8 100644
--- a/src/coredump/coredump.c
+++ b/src/coredump/coredump.c
@@ -749,7 +749,7 @@ static int submit_coredump(
const char *coredump_filename;
coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
- IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(coredump_filename);
} else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
coredump_size, arg_external_size_max);
@@ -804,10 +804,10 @@ log:
return 0;
}
- IOVEC_SET_STRING(iovec[n_iovec++], core_message);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(core_message);
if (truncated)
- IOVEC_SET_STRING(iovec[n_iovec++], "COREDUMP_TRUNCATED=1");
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
/* Optionally store the entire coredump in the journal */
if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
@@ -817,11 +817,9 @@ log:
/* Store the coredump itself in the journal */
r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
- if (r >= 0) {
- iovec[n_iovec].iov_base = coredump_data;
- iovec[n_iovec].iov_len = sz;
- n_iovec++;
- } else
+ if (r >= 0)
+ iovec[n_iovec++] = IOVEC_MAKE(coredump_data, sz);
+ else
log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
} else
log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
@@ -1070,7 +1068,7 @@ static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char
x = strappend(field, value);
if (x)
- IOVEC_SET_STRING(iovec[(*n_iovec)++], x);
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
return x;
}
@@ -1162,7 +1160,7 @@ static int gather_pid_metadata(
if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
if (r > 0)
- IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
}
if (sd_pid_get_slice(pid, &t) >= 0)
@@ -1218,7 +1216,7 @@ static int gather_pid_metadata(
t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL);
if (t)
- IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
@@ -1253,10 +1251,10 @@ static int process_kernel(int argc, char* argv[]) {
n_iovec = n_to_free;
- IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
assert_cc(2 == LOG_CRIT);
- IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
assert(n_iovec <= ELEMENTSOF(iovec));
@@ -1344,15 +1342,15 @@ static int process_backtrace(int argc, char *argv[]) {
r = log_oom();
goto finish;
}
- IOVEC_SET_STRING(iovec[n_iovec++], message);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
} else {
for (i = 0; i < importer.iovw.count; i++)
iovec[n_iovec++] = importer.iovw.iovec[i];
}
- IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
assert_cc(2 == LOG_CRIT);
- IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
assert(n_iovec <= n_allocated);
diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c
index 440fba67ca..5d8b394752 100644
--- a/src/journal/journal-send.c
+++ b/src/journal/journal-send.c
@@ -114,9 +114,8 @@ _public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
if (isempty(buffer+8))
return 0;
- zero(iov);
- IOVEC_SET_STRING(iov[0], buffer);
- IOVEC_SET_STRING(iov[1], p);
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
return sd_journal_sendv(iov, 2);
}
@@ -167,7 +166,7 @@ _printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int
(void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */
- IOVEC_SET_STRING(iov[i++], buffer);
+ iov[i++] = IOVEC_MAKE_STRING(buffer);
format = va_arg(ap, char *);
}
@@ -259,27 +258,19 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
* newline, then the size (64bit LE), followed
* by the data and a final newline */
- w[j].iov_base = iov[i].iov_base;
- w[j].iov_len = c - (char*) iov[i].iov_base;
- j++;
-
- IOVEC_SET_STRING(w[j++], "\n");
+ w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
+ w[j++] = IOVEC_MAKE_STRING("\n");
l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
- w[j].iov_base = &l[i];
- w[j].iov_len = sizeof(uint64_t);
- j++;
-
- w[j].iov_base = c + 1;
- w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
- j++;
+ w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
+ w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
} else
/* Nothing special? Then just add the line and
* append a newline */
w[j++] = iov[i];
- IOVEC_SET_STRING(w[j++], "\n");
+ w[j++] = IOVEC_MAKE_STRING("\n");
}
if (!have_syslog_identifier &&
@@ -291,9 +282,9 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
* since everything else is much nicer to retrieve
* from the outside. */
- IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
- IOVEC_SET_STRING(w[j++], program_invocation_short_name);
- IOVEC_SET_STRING(w[j++], "\n");
+ w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
+ w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ w[j++] = IOVEC_MAKE_STRING("\n");
}
fd = journal_fd();
@@ -380,9 +371,9 @@ static int fill_iovec_perror_and_send(const char *message, int skip, struct iove
xsprintf(error, "ERRNO=%i", _saved_errno_);
assert_cc(3 == LOG_ERR);
- IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
- IOVEC_SET_STRING(iov[skip+1], buffer);
- IOVEC_SET_STRING(iov[skip+2], error);
+ iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
+ iov[skip+1] = IOVEC_MAKE_STRING(buffer);
+ iov[skip+2] = IOVEC_MAKE_STRING(error);
return sd_journal_sendv(iov, skip + 3);
}
@@ -492,20 +483,19 @@ _public_ int sd_journal_printv_with_location(int priority, const char *file, con
* CODE_FUNC=, hence let's do it manually here. */
ALLOCA_CODE_FUNC(f, func);
- zero(iov);
- IOVEC_SET_STRING(iov[0], buffer);
- IOVEC_SET_STRING(iov[1], p);
- IOVEC_SET_STRING(iov[2], file);
- IOVEC_SET_STRING(iov[3], line);
- IOVEC_SET_STRING(iov[4], f);
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+ iov[2] = IOVEC_MAKE_STRING(file);
+ iov[3] = IOVEC_MAKE_STRING(line);
+ iov[4] = IOVEC_MAKE_STRING(f);
return sd_journal_sendv(iov, ELEMENTSOF(iov));
}
_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
+ _cleanup_free_ struct iovec *iov = NULL;
int r, i, j;
va_list ap;
- struct iovec *iov = NULL;
char *f;
va_start(ap, format);
@@ -519,9 +509,9 @@ _public_ int sd_journal_send_with_location(const char *file, const char *line, c
ALLOCA_CODE_FUNC(f, func);
- IOVEC_SET_STRING(iov[0], file);
- IOVEC_SET_STRING(iov[1], line);
- IOVEC_SET_STRING(iov[2], f);
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
r = sd_journal_sendv(iov, i);
@@ -529,8 +519,6 @@ finish:
for (j = 3; j < i; j++)
free(iov[j].iov_base);
- free(iov);
-
return r;
}
@@ -550,9 +538,9 @@ _public_ int sd_journal_sendv_with_location(
ALLOCA_CODE_FUNC(f, func);
- IOVEC_SET_STRING(niov[n++], file);
- IOVEC_SET_STRING(niov[n++], line);
- IOVEC_SET_STRING(niov[n++], f);
+ niov[n++] = IOVEC_MAKE_STRING(file);
+ niov[n++] = IOVEC_MAKE_STRING(line);
+ niov[n++] = IOVEC_MAKE_STRING(f);
return sd_journal_sendv(niov, n);
}
@@ -567,9 +555,9 @@ _public_ int sd_journal_perror_with_location(
ALLOCA_CODE_FUNC(f, func);
- IOVEC_SET_STRING(iov[0], file);
- IOVEC_SET_STRING(iov[1], line);
- IOVEC_SET_STRING(iov[2], f);
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
return fill_iovec_perror_and_send(message, 3, iov);
}
diff --git a/src/journal/journald-audit.c b/src/journal/journald-audit.c
index 38ac3befdd..869c996aef 100644
--- a/src/journal/journald-audit.c
+++ b/src/journal/journald-audit.c
@@ -383,26 +383,26 @@ static void process_audit_string(Server *s, int type, const char *data, size_t s
return;
}
- IOVEC_SET_STRING(iov[n_iov++], "_TRANSPORT=audit");
+ iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");
sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
(usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
- IOVEC_SET_STRING(iov[n_iov++], source_time_field);
+ iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);
sprintf(type_field, "_AUDIT_TYPE=%i", type);
- IOVEC_SET_STRING(iov[n_iov++], type_field);
+ iov[n_iov++] = IOVEC_MAKE_STRING(type_field);
sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
- IOVEC_SET_STRING(iov[n_iov++], id_field);
+ iov[n_iov++] = IOVEC_MAKE_STRING(id_field);
assert_cc(4 == LOG_FAC(LOG_AUTH));
- IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_FACILITY=4");
- IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_IDENTIFIER=audit");
+ iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
+ iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");
type_name = audit_type_name_alloca(type);
m = strjoina("MESSAGE=", type_name, " ", p);
- IOVEC_SET_STRING(iov[n_iov++], m);
+ iov[n_iov++] = IOVEC_MAKE_STRING(m);
z = n_iov;
diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c
index 5fbcdb43c2..039f1a68ce 100644
--- a/src/journal/journald-console.c
+++ b/src/journal/journald-console.c
@@ -59,9 +59,10 @@ void server_forward_console(
struct timespec ts;
char tbuf[sizeof("[] ")-1 + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
char header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t)];
- int n = 0, fd;
_cleanup_free_ char *ident_buf = NULL;
+ _cleanup_close_ int fd = -1;
const char *tty;
+ int n = 0;
assert(s);
assert(message);
@@ -75,7 +76,8 @@ void server_forward_console(
xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
ts.tv_sec,
(nsec_t)ts.tv_nsec / 1000);
- IOVEC_SET_STRING(iovec[n++], tbuf);
+
+ iovec[n++] = IOVEC_MAKE_STRING(tbuf);
}
/* Second: identifier and PID */
@@ -88,19 +90,19 @@ void server_forward_console(
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
if (identifier)
- IOVEC_SET_STRING(iovec[n++], identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
- IOVEC_SET_STRING(iovec[n++], header_pid);
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
} else if (identifier) {
- IOVEC_SET_STRING(iovec[n++], identifier);
- IOVEC_SET_STRING(iovec[n++], ": ");
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
}
/* Fourth: message */
- IOVEC_SET_STRING(iovec[n++], message);
- IOVEC_SET_STRING(iovec[n++], "\n");
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
- tty = s->tty_path ? s->tty_path : "/dev/console";
+ tty = s->tty_path ?: "/dev/console";
/* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
* good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
@@ -115,6 +117,4 @@ void server_forward_console(
if (writev(fd, iovec, n) < 0)
log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
-
- safe_close(fd);
}
diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c
index 2be82be5f6..1bad7cb2ee 100644
--- a/src/journal/journald-kmsg.c
+++ b/src/journal/journald-kmsg.c
@@ -26,6 +26,7 @@
#include "libudev.h"
#include "sd-messages.h"
+#include "alloc-util.h"
#include "escape.h"
#include "fd-util.h"
#include "format-util.h"
@@ -45,11 +46,11 @@ void server_forward_kmsg(
const char *message,
const struct ucred *ucred) {
+ _cleanup_free_ char *ident_buf = NULL;
struct iovec iovec[5];
char header_priority[DECIMAL_STR_MAX(priority) + 3],
header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
int n = 0;
- char *ident_buf = NULL;
assert(s);
assert(priority >= 0);
@@ -68,7 +69,7 @@ void server_forward_kmsg(
/* First: priority field */
xsprintf(header_priority, "<%i>", priority);
- IOVEC_SET_STRING(iovec[n++], header_priority);
+ iovec[n++] = IOVEC_MAKE_STRING(header_priority);
/* Second: identifier and PID */
if (ucred) {
@@ -80,22 +81,20 @@ void server_forward_kmsg(
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
if (identifier)
- IOVEC_SET_STRING(iovec[n++], identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
- IOVEC_SET_STRING(iovec[n++], header_pid);
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
} else if (identifier) {
- IOVEC_SET_STRING(iovec[n++], identifier);
- IOVEC_SET_STRING(iovec[n++], ": ");
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
}
/* Fourth: message */
- IOVEC_SET_STRING(iovec[n++], message);
- IOVEC_SET_STRING(iovec[n++], "\n");
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
if (writev(s->dev_kmsg_fd, iovec, n) < 0)
log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
-
- free(ident_buf);
}
static bool is_us(const char *pid) {
@@ -111,11 +110,11 @@ static bool is_us(const char *pid) {
static void dev_kmsg_record(Server *s, const char *p, size_t l) {
struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
- char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
+ _cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
int priority, r;
unsigned n = 0, z = 0, j;
unsigned long long usec;
- char *identifier = NULL, *pid = NULL, *e, *f, *k;
+ char *e, *f, *k;
uint64_t serial;
size_t pl;
char *kernel_device = NULL;
@@ -216,7 +215,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (startswith(m, "_KERNEL_DEVICE="))
kernel_device = m + 15;
- IOVEC_SET_STRING(iovec[n++], m);
+ iovec[n++] = IOVEC_MAKE_STRING(m);
z++;
l -= (e - k) + 1;
@@ -236,7 +235,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (g) {
b = strappend("_UDEV_DEVNODE=", g);
if (b) {
- IOVEC_SET_STRING(iovec[n++], b);
+ iovec[n++] = IOVEC_MAKE_STRING(b);
z++;
}
}
@@ -245,7 +244,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (g) {
b = strappend("_UDEV_SYSNAME=", g);
if (b) {
- IOVEC_SET_STRING(iovec[n++], b);
+ iovec[n++] = IOVEC_MAKE_STRING(b);
z++;
}
}
@@ -261,7 +260,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (g) {
b = strappend("_UDEV_DEVLINK=", g);
if (b) {
- IOVEC_SET_STRING(iovec[n++], b);
+ iovec[n++] = IOVEC_MAKE_STRING(b);
z++;
}
}
@@ -274,18 +273,18 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
}
if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
- IOVEC_SET_STRING(iovec[n++], source_time);
+ iovec[n++] = IOVEC_MAKE_STRING(source_time);
- IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
- IOVEC_SET_STRING(iovec[n++], syslog_priority);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
- IOVEC_SET_STRING(iovec[n++], syslog_facility);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
if ((priority & LOG_FACMASK) == LOG_KERN)
- IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
else {
pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
@@ -297,33 +296,24 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (identifier) {
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
if (syslog_identifier)
- IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
}
if (pid) {
syslog_pid = strappend("SYSLOG_PID=", pid);
if (syslog_pid)
- IOVEC_SET_STRING(iovec[n++], syslog_pid);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
}
}
if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
- IOVEC_SET_STRING(iovec[n++], message);
+ iovec[n++] = IOVEC_MAKE_STRING(message);
server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);
finish:
for (j = 0; j < z; j++)
free(iovec[j].iov_base);
-
- free(message);
- free(syslog_priority);
- free(syslog_identifier);
- free(syslog_pid);
- free(syslog_facility);
- free(source_time);
- free(identifier);
- free(pid);
}
static int server_read_dev_kmsg(Server *s) {
diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c
index 23afe59bd5..554f91460d 100644
--- a/src/journal/journald-native.c
+++ b/src/journal/journald-native.c
@@ -282,7 +282,7 @@ static int server_process_entry(
}
tn = n++;
- IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
+ iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
entry_size += strlen("_TRANSPORT=journal");
if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 27c2571cfc..2d51be7c89 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -724,14 +724,14 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
char *k; \
k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
sprintf(k, field "=" format, value); \
- IOVEC_SET_STRING(iovec[n++], k); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
}
#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
if (!isempty(value)) { \
char *k; \
k = strjoina(field "=", value); \
- IOVEC_SET_STRING(iovec[n++], k); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
}
#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
@@ -739,7 +739,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
char *k; \
k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
sd_id128_to_string(value, stpcpy(k, field "=")); \
- IOVEC_SET_STRING(iovec[n++], k); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
}
#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
@@ -747,7 +747,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
char *k; \
k = newa(char, strlen(field "=") + value_size + 1); \
*((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
- IOVEC_SET_STRING(iovec[n++], k); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
} \
static void dispatch_message_real(
@@ -826,20 +826,20 @@ static void dispatch_message_real(
if (tv) {
sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
- IOVEC_SET_STRING(iovec[n++], source_time);
+ iovec[n++] = IOVEC_MAKE_STRING(source_time);
}
/* Note that strictly speaking storing the boot id here is
* redundant since the entry includes this in-line
* anyway. However, we need this indexed, too. */
if (!isempty(s->boot_id_field))
- IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
+ iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
if (!isempty(s->machine_id_field))
- IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
+ iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
if (!isempty(s->hostname_field))
- IOVEC_SET_STRING(iovec[n++], s->hostname_field);
+ iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
assert(n <= m);
@@ -870,15 +870,15 @@ void server_driver_message(Server *s, const char *message_id, const char *format
assert(format);
assert_cc(3 == LOG_FAC(LOG_DAEMON));
- IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
- IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
- IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
assert_cc(6 == LOG_INFO);
- IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
+ iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
if (message_id)
- IOVEC_SET_STRING(iovec[n++], message_id);
+ iovec[n++] = IOVEC_MAKE_STRING(message_id);
m = n;
va_start(ap, format);
@@ -899,8 +899,8 @@ void server_driver_message(Server *s, const char *message_id, const char *format
xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
n = 3;
- IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
- IOVEC_SET_STRING(iovec[n++], buf);
+ iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
+ iovec[n++] = IOVEC_MAKE_STRING(buf);
dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
}
}
diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c
index a44c540f67..d0b95ea02c 100644
--- a/src/journal/journald-stream.c
+++ b/src/journal/journald-stream.c
@@ -282,22 +282,21 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
if (s->server->forward_to_wall)
server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
- IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
-
- IOVEC_SET_STRING(iovec[n++], s->id_field);
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
+ iovec[n++] = IOVEC_MAKE_STRING(s->id_field);
syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
- IOVEC_SET_STRING(iovec[n++], syslog_priority);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
if (priority & LOG_FACMASK) {
xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
- IOVEC_SET_STRING(iovec[n++], syslog_facility);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
}
if (s->identifier) {
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
if (syslog_identifier)
- IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
}
if (line_break != LINE_BREAK_NEWLINE) {
@@ -309,12 +308,12 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
c = line_break == LINE_BREAK_NUL ? "_LINE_BREAK=nul" :
line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
"_LINE_BREAK=eof";
- IOVEC_SET_STRING(iovec[n++], c);
+ iovec[n++] = IOVEC_MAKE_STRING(c);
}
message = strappend("MESSAGE=", p);
if (message)
- IOVEC_SET_STRING(iovec[n++], message);
+ iovec[n++] = IOVEC_MAKE_STRING(message);
if (s->context)
(void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);
diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c
index a03c36df34..fa597e47a2 100644
--- a/src/journal/journald-syslog.c
+++ b/src/journal/journald-syslog.c
@@ -124,7 +124,7 @@ static void forward_syslog_raw(Server *s, int priority, const char *buffer, cons
if (LOG_PRI(priority) > s->max_level_syslog)
return;
- IOVEC_SET_STRING(iovec, buffer);
+ iovec = IOVEC_MAKE_STRING(buffer);
forward_syslog_iovec(s, &iovec, 1, ucred, tv);
}
@@ -135,7 +135,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
int n = 0;
time_t t;
struct tm *tm;
- char *ident_buf = NULL;
+ _cleanup_free_ char *ident_buf = NULL;
assert(s);
assert(priority >= 0);
@@ -147,7 +147,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
/* First: priority field */
xsprintf(header_priority, "<%i>", priority);
- IOVEC_SET_STRING(iovec[n++], header_priority);
+ iovec[n++] = IOVEC_MAKE_STRING(header_priority);
/* Second: timestamp */
t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
@@ -156,7 +156,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
return;
if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
return;
- IOVEC_SET_STRING(iovec[n++], header_time);
+ iovec[n++] = IOVEC_MAKE_STRING(header_time);
/* Third: identifier and PID */
if (ucred) {
@@ -168,20 +168,18 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
if (identifier)
- IOVEC_SET_STRING(iovec[n++], identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
- IOVEC_SET_STRING(iovec[n++], header_pid);
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
} else if (identifier) {
- IOVEC_SET_STRING(iovec[n++], identifier);
- IOVEC_SET_STRING(iovec[n++], ": ");
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
}
/* Fourth: message */
- IOVEC_SET_STRING(iovec[n++], message);
+ iovec[n++] = IOVEC_MAKE_STRING(message);
forward_syslog_iovec(s, iovec, n, ucred, tv);
-
- free(ident_buf);
}
int syslog_fixup_facility(int priority) {
@@ -353,29 +351,29 @@ void server_process_syslog_message(
if (s->forward_to_wall)
server_forward_wall(s, priority, identifier, buf, ucred);
- IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");
xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
- IOVEC_SET_STRING(iovec[n++], syslog_priority);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
if (priority & LOG_FACMASK) {
xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
- IOVEC_SET_STRING(iovec[n++], syslog_facility);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
}
if (identifier) {
syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier);
- IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
}
if (pid) {
syslog_pid = strjoina("SYSLOG_PID=", pid);
- IOVEC_SET_STRING(iovec[n++], syslog_pid);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
}
message = strjoina("MESSAGE=", buf);
if (message)
- IOVEC_SET_STRING(iovec[n++], message);
+ iovec[n++] = IOVEC_MAKE_STRING(message);
if (ucred && pid_is_valid(ucred->pid)) {
r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
diff --git a/src/libsystemd-network/sd-dhcp-lease.c b/src/libsystemd-network/sd-dhcp-lease.c
index 6f0e51720a..1ab569765d 100644
--- a/src/libsystemd-network/sd-dhcp-lease.c
+++ b/src/libsystemd-network/sd-dhcp-lease.c
@@ -471,7 +471,7 @@ static int lease_parse_routes(
struct sd_dhcp_route *route = *routes + *routes_size;
int r;
- r = in_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
+ r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
if (r < 0) {
log_debug("Failed to determine destination prefix length from class based IP, ignoring");
continue;
@@ -1253,7 +1253,7 @@ int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
address.s_addr = lease->address;
/* fall back to the default subnet masks based on address class */
- r = in_addr_default_subnet_mask(&address, &mask);
+ r = in4_addr_default_subnet_mask(&address, &mask);
if (r < 0)
return r;
diff --git a/src/libsystemd-network/sd-dhcp-server.c b/src/libsystemd-network/sd-dhcp-server.c
index 5a59c377f8..727cc16ab5 100644
--- a/src/libsystemd-network/sd-dhcp-server.c
+++ b/src/libsystemd-network/sd-dhcp-server.c
@@ -56,7 +56,7 @@ int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *addres
assert_return(address->s_addr != INADDR_ANY, -EINVAL);
assert_return(prefixlen <= 32, -ERANGE);
- assert_se(in_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
+ assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
netmask = netmask_addr.s_addr;
server_off = be32toh(address->s_addr & ~netmask);
diff --git a/src/network/networkd-address.c b/src/network/networkd-address.c
index 7f536b4ba9..8f625975fb 100644
--- a/src/network/networkd-address.c
+++ b/src/network/networkd-address.c
@@ -768,7 +768,7 @@ int config_parse_address(const char *unit,
}
if (!e && f == AF_INET) {
- r = in_addr_default_prefixlen(&buffer.in, &n->prefixlen);
+ r = in4_addr_default_prefixlen(&buffer.in, &n->prefixlen);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Prefix length not specified, and a default one can not be deduced for '%s', ignoring assignment", address);
return 0;
diff --git a/src/network/networkd-dhcp4.c b/src/network/networkd-dhcp4.c
index 9c69979c7b..3b5bacd13b 100644
--- a/src/network/networkd-dhcp4.c
+++ b/src/network/networkd-dhcp4.c
@@ -237,7 +237,7 @@ static int dhcp_lease_lost(Link *link) {
if (r >= 0) {
r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
if (r >= 0)
- prefixlen = in_addr_netmask_to_prefixlen(&netmask);
+ prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
address->family = AF_INET;
address->in_addr.in = addr;
@@ -316,7 +316,7 @@ static int dhcp4_update_address(Link *link,
assert(netmask);
assert(lifetime);
- prefixlen = in_addr_netmask_to_prefixlen(netmask);
+ prefixlen = in4_addr_netmask_to_prefixlen(netmask);
r = address_new(&addr);
if (r < 0)
@@ -406,7 +406,7 @@ static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
if (r < 0)
return log_link_error_errno(link, r, "DHCP error: No netmask: %m");
- prefixlen = in_addr_netmask_to_prefixlen(&netmask);
+ prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
r = sd_dhcp_lease_get_router(lease, &gateway);
if (r < 0 && r != -ENODATA)
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index cf804ed1b3..5ba09a994a 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -1616,6 +1616,27 @@ static int setup_dev_console(const char *dest, const char *console) {
return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
}
+static int setup_keyring(void) {
+ key_serial_t keyring;
+
+ /* Allocate a new session keyring for the container. This makes sure the keyring of the session systemd-nspawn
+ * was invoked from doesn't leak into the container. Note that by default we block keyctl() and request_key()
+ * anyway via seccomp so doing this operation isn't strictly necessary, but in case people explicitly whitelist
+ * these system calls let's make sure we don't leak anything into the container. */
+
+ keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+ if (keyring == -1) {
+ if (errno == ENOSYS)
+ log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
+ else if (IN_SET(errno, EACCES, EPERM))
+ log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
+ else
+ return log_error_errno(errno, "Setting up kernel keyring failed: %m");
+ }
+
+ return 0;
+}
+
static int setup_kmsg(const char *dest, int kmsg_socket) {
const char *from, *to;
_cleanup_umask_ mode_t u;
@@ -2642,6 +2663,10 @@ static int outer_child(
if (r < 0)
return r;
+ r = setup_keyring();
+ if (r < 0)
+ return r;
+
r = setup_seccomp(arg_caps_retain, arg_syscall_whitelist, arg_syscall_blacklist);
if (r < 0)
return r;
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index d6b119987c..d216df465f 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -28,6 +28,8 @@
#include "errno-list.h"
#include "escape.h"
#include "hashmap.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
#include "list.h"
#include "locale-util.h"
#include "mount-util.h"
@@ -66,6 +68,31 @@ int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
&u->job_path);
}
+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+ int r;
+
+ assert(m);
+ assert(prefix);
+
+ r = sd_bus_message_open_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "i", family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "u", prefixlen);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignment) {
const char *eq, *field;
UnitDependency dep;
@@ -207,13 +234,13 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur);
} else if (STR_IN_SET(field,
- "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting",
- "SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
- "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "RemainAfterExit",
- "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
- "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
- "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
- "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
+ "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
+ "TasksAccounting", "IPAccounting", "SendSIGHUP", "SendSIGKILL", "WakeSystem",
+ "DefaultDependencies", "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
+ "RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+ "NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse",
+ "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
+ "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
"CPUSchedulingResetOnFork", "LockPersonality")) {
r = parse_boolean(eq);
@@ -433,6 +460,98 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "a(st)", 1, path, u);
}
+ } else if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "v", "a(iayu)", 0);
+ else {
+ unsigned char prefixlen;
+ union in_addr_union prefix = {};
+ int family;
+
+ r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(eq, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (is_localhost(eq)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ prefix.in.s_addr = htobe32(0x7f000000);
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+ if (r < 0)
+ return r;
+
+ } else if (streq(eq, "link-local")) {
+
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (streq(eq, "multicast")) {
+
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .__in6_u.__u6_addr32[0] = htobe32(0xff000000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else {
+ r = in_addr_prefix_from_string_auto(eq, &family, &prefix, &prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse IP address prefix: %s", eq);
+
+ r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
} else if (streq(field, "CPUSchedulingPolicy")) {
int n;
diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c
index 952fc48c45..3a6e987ee1 100644
--- a/src/shared/firewall-util.c
+++ b/src/shared/firewall-util.c
@@ -72,7 +72,7 @@ static int entry_fill_basics(
}
if (source) {
entry->ip.src = source->in;
- in_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+ in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
}
if (out_interface) {
@@ -84,7 +84,7 @@ static int entry_fill_basics(
}
if (destination) {
entry->ip.dst = destination->in;
- in_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+ in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
}
return 0;
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h
new file mode 100644
index 0000000000..8477b44609
--- /dev/null
+++ b/src/shared/linux/bpf.h
@@ -0,0 +1,673 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+#define BPF_PSEUDO_MAP_FD 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* prealloc or not */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* checked when prog_type=kprobe */
+ __u32 prog_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in;
+ __u32 data_size_out;
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ } test;
+} __attribute__((aligned(8)));
+
+/* BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(&map, &key)
+ * Return: Map value or NULL
+ *
+ * int bpf_map_update_elem(&map, &key, &value, flags)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_map_delete_elem(&map, &key)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_probe_read(void *dst, int size, void *src)
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Return: current ktime
+ *
+ * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
+ * Return: length of buffer written or negative error
+ *
+ * u32 bpf_prandom_u32(void)
+ * Return: random value
+ *
+ * u32 bpf_raw_smp_processor_id(void)
+ * Return: SMP processor ID
+ *
+ * int bpf_skb_store_bytes(skb, offset, from, len, flags)
+ * store bytes into packet
+ * @skb: pointer to skb
+ * @offset: offset within packet from skb->mac_header
+ * @from: pointer where to copy bytes from
+ * @len: number of bytes to store into packet
+ * @flags: bit 0 - if true, recompute skb->csum
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_l3_csum_replace(skb, offset, from, to, flags)
+ * recompute IP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where IP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_l4_csum_replace(skb, offset, from, to, flags)
+ * recompute TCP/UDP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where TCP/UDP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * bit 4 - is pseudo header
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_tail_call(ctx, prog_array_map, index)
+ * jump into another BPF program
+ * @ctx: context pointer passed to next program
+ * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ * @index: index inside array that selects specific program to run
+ * Return: 0 on success or negative error
+ *
+ * int bpf_clone_redirect(skb, ifindex, flags)
+ * redirect to another netdev
+ * @skb: pointer to skb
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return: current->tgid << 32 | current->pid
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return: current_gid << 32 | current_uid
+ *
+ * int bpf_get_current_comm(char *buf, int size_of_buf)
+ * stores current->comm into buf
+ * Return: 0 on success or negative error
+ *
+ * u32 bpf_get_cgroup_classid(skb)
+ * retrieve a proc's classid
+ * @skb: pointer to skb
+ * Return: classid if != 0
+ *
+ * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_vlan_pop(skb)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_get_tunnel_key(skb, key, size, flags)
+ * int bpf_skb_set_tunnel_key(skb, key, size, flags)
+ * retrieve or populate tunnel metadata
+ * @skb: pointer to skb
+ * @key: pointer to 'struct bpf_tunnel_key'
+ * @size: size of 'struct bpf_tunnel_key'
+ * @flags: room for future extensions
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_perf_event_read(&map, index)
+ * Return: Number events read or error code
+ *
+ * int bpf_redirect(ifindex, flags)
+ * redirect to another netdev
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: TC_ACT_REDIRECT
+ *
+ * u32 bpf_get_route_realm(skb)
+ * retrieve a dst's tclassid
+ * @skb: pointer to skb
+ * Return: realm if != 0
+ *
+ * int bpf_perf_event_output(ctx, map, index, data, size)
+ * output perf raw sample
+ * @ctx: struct pt_regs*
+ * @map: pointer to perf_event_array map
+ * @index: index of event in the map
+ * @data: data on stack to be output as raw data
+ * @size: size of data
+ * Return: 0 on success or negative error
+ *
+ * int bpf_get_stackid(ctx, map, flags)
+ * walk user or kernel stack and return id
+ * @ctx: struct pt_regs*
+ * @map: pointer to stack_trace map
+ * @flags: bits 0-7 - numer of stack frames to skip
+ * bit 8 - collect user stack instead of kernel
+ * bit 9 - compare stacks by hash only
+ * bit 10 - if two different stacks hash into the same stackid
+ * discard old
+ * other bits - reserved
+ * Return: >= 0 stackid on success or negative error
+ *
+ * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
+ * calculate csum diff
+ * @from: raw from buffer
+ * @from_size: length of from buffer
+ * @to: raw to buffer
+ * @to_size: length of to buffer
+ * @seed: optional seed
+ * Return: csum result or negative error code
+ *
+ * int bpf_skb_get_tunnel_opt(skb, opt, size)
+ * retrieve tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: option size
+ *
+ * int bpf_skb_set_tunnel_opt(skb, opt, size)
+ * populate tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_proto(skb, proto, flags)
+ * Change protocol of the skb. Currently supported is v4 -> v6,
+ * v6 -> v4 transitions. The helper will also resize the skb. eBPF
+ * program is expected to fill the new headers via skb_store_bytes
+ * and lX_csum_replace.
+ * @skb: pointer to skb
+ * @proto: new skb->protocol type
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_type(skb, type)
+ * Change packet type of skb.
+ * @skb: pointer to skb
+ * @type: new skb->pkt_type type
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_under_cgroup(skb, map, index)
+ * Check cgroup2 membership of skb
+ * @skb: pointer to skb
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 skb failed the cgroup2 descendant test
+ * == 1 skb succeeded the cgroup2 descendant test
+ * < 0 error
+ *
+ * u32 bpf_get_hash_recalc(skb)
+ * Retrieve and possibly recalculate skb->hash.
+ * @skb: pointer to skb
+ * Return: hash
+ *
+ * u64 bpf_get_current_task(void)
+ * Returns current task_struct
+ * Return: current
+ *
+ * int bpf_probe_write_user(void *dst, void *src, int len)
+ * safely attempt to write to a location
+ * @dst: destination address in userspace
+ * @src: source address on stack
+ * @len: number of bytes to copy
+ * Return: 0 on success or negative error
+ *
+ * int bpf_current_task_under_cgroup(map, index)
+ * Check cgroup2 membership of current task
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 current failed the cgroup2 descendant test
+ * == 1 current succeeded the cgroup2 descendant test
+ * < 0 error
+ *
+ * int bpf_skb_change_tail(skb, len, flags)
+ * The helper will resize the skb to the given new size, to be used f.e.
+ * with control messages.
+ * @skb: pointer to skb
+ * @len: new skb length
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_pull_data(skb, len)
+ * The helper will pull in non-linear data in case the skb is non-linear
+ * and not all of len are part of the linear section. Only needed for
+ * read/write with direct packet access.
+ * @skb: pointer to skb
+ * @len: len to make read/writeable
+ * Return: 0 on success or negative error
+ *
+ * s64 bpf_csum_update(skb, csum)
+ * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+ * @skb: pointer to skb
+ * @csum: csum to add
+ * Return: csum on success or negative error
+ *
+ * void bpf_set_hash_invalid(skb)
+ * Invalidate current skb->hash.
+ * @skb: pointer to skb
+ *
+ * int bpf_get_numa_node_id()
+ * Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ * Grows headroom of skb and adjusts MAC header offset accordingly.
+ * Will extends/reallocae as required automatically.
+ * May change skb data pointer and will thus invalidate any check
+ * performed for direct packet access.
+ * @skb: pointer to skb
+ * @len: length of header to be pushed in front
+ * @flags: Flags (unused for now)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ * Adjust the xdp_md.data by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data
+ * Return: 0 on success or negative on error
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ *
+ * u64 bpf_get_socket_cookie(skb)
+ * Get the cookie for the socket stored inside sk_buff.
+ * @skb: pointer to skb
+ * Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ * field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ * Get the owner uid of the socket stored inside sk_buff.
+ * @skb: pointer to skb
+ * Return: uid of the socket owner on success or overflowuid if failed.
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext;
+ __u32 tunnel_label;
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+};
+
+#endif /* __LINUX_BPF_H__ */
diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h
new file mode 100644
index 0000000000..afe7433b98
--- /dev/null
+++ b/src/shared/linux/bpf_common.h
@@ -0,0 +1,55 @@
+#ifndef __LINUX_BPF_COMMON_H__
+#define __LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* __LINUX_BPF_COMMON_H__ */
diff --git a/src/shared/linux/libbpf.h b/src/shared/linux/libbpf.h
new file mode 100644
index 0000000000..1989e3a869
--- /dev/null
+++ b/src/shared/linux/libbpf.h
@@ -0,0 +1,198 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+#include <linux/bpf.h>
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#endif
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 318cd35bf6..897fc48b98 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -3878,6 +3878,9 @@ typedef struct UnitStatusInfo {
uint64_t tasks_current;
uint64_t tasks_max;
+ uint64_t ip_ingress_bytes;
+ uint64_t ip_egress_bytes;
+
LIST_HEAD(ExecStatusInfo, exec);
} UnitStatusInfo;
@@ -4194,6 +4197,14 @@ static void print_status_info(
if (i->status_errno > 0)
printf(" Error: %i (%s)\n", i->status_errno, strerror(i->status_errno));
+ if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
+ char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
+
+ printf(" IP: %s in, %s out\n",
+ format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
+ format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
+ }
+
if (i->tasks_current != (uint64_t) -1) {
printf(" Tasks: %" PRIu64, i->tasks_current);
@@ -4484,6 +4495,10 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
i->next_elapse_monotonic = u;
else if (streq(name, "NextElapseUSecRealtime"))
i->next_elapse_real = u;
+ else if (streq(name, "IPIngressBytes"))
+ i->ip_ingress_bytes = u;
+ else if (streq(name, "IPEgressBytes"))
+ i->ip_egress_bytes = u;
break;
}
@@ -4998,6 +5013,8 @@ static int show_one(
.cpu_usage_nsec = (uint64_t) -1,
.tasks_current = (uint64_t) -1,
.tasks_max = (uint64_t) -1,
+ .ip_ingress_bytes = (uint64_t) -1,
+ .ip_egress_bytes = (uint64_t) -1,
};
int r;
diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h
index 4bc248a4b1..8c23486779 100644
--- a/src/systemd/sd-messages.h
+++ b/src/systemd/sd-messages.h
@@ -103,6 +103,9 @@ _SD_BEGIN_DECLARATIONS;
#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \
SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+#define SD_MESSAGE_UNIT_RESOURCES SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+#define SD_MESSAGE_UNIT_RESOURCES_STR SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+
#define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
#define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
diff --git a/src/test/meson.build b/src/test/meson.build
index 57f76559a7..1f3db65781 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -277,6 +277,10 @@ tests += [
[],
[]],
+ [['src/test/test-in-addr-util.c'],
+ [],
+ []],
+
[['src/test/test-barrier.c'],
[],
[]],
@@ -335,6 +339,17 @@ tests += [
[libbasic],
[]],
+ [['src/test/test-bpf.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [libmount,
+ threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libblkid]],
+
[['src/test/test-hashmap.c',
'src/test/test-hashmap-plain.c',
test_hashmap_ordered_c],
diff --git a/src/test/test-bpf.c b/src/test/test-bpf.c
new file mode 100644
index 0000000000..74e9d50561
--- /dev/null
+++ b/src/test/test-bpf.c
@@ -0,0 +1,162 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Daniel Mack
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <linux/libbpf.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "load-fragment.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+
+int main(int argc, char *argv[]) {
+ struct bpf_insn exit_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ CGroupContext *cc = NULL;
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ Manager *m = NULL;
+ Unit *u;
+ char log_buf[65535];
+ int r;
+
+ log_set_max_level(LOG_DEBUG);
+ log_parse_environment();
+ log_open();
+
+ enter_cgroup_subroot();
+ assert_se(set_unit_path(get_testdata_dir("")) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+ assert(r == 0);
+
+ r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
+ assert(r == 0);
+
+ if (getuid() != 0) {
+ log_notice("Not running as root, skipping kernel related tests.");
+ return EXIT_TEST_SKIP;
+ }
+
+ r = bpf_firewall_supported();
+ if (r == 0) {
+ log_notice("BPF firewalling not supported, skipping");
+ return EXIT_TEST_SKIP;
+ }
+ assert_se(r > 0);
+
+ r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
+ assert(r >= 0);
+
+ p = bpf_program_unref(p);
+
+ /* The simple tests suceeded. Now let's try full unit-based use-case. */
+
+ assert_se(manager_new(UNIT_FILE_USER, true, &m) >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "foo.service") == 0);
+ assert_se(cc = unit_get_cgroup_context(u));
+ u->perpetual = true;
+
+ cc->ip_accounting = true;
+
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
+
+ assert(cc->ip_address_allow);
+ assert(cc->ip_address_allow->items_next);
+ assert(!cc->ip_address_allow->items_next->items_next);
+
+ /* The deny list is defined redundantly, let's ensure it got properly reduced */
+ assert(cc->ip_address_deny);
+ assert(cc->ip_address_deny->items_next);
+ assert(!cc->ip_address_deny->items_next->items_next);
+
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
+ assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
+
+ SERVICE(u)->type = SERVICE_ONESHOT;
+ u->load_state = UNIT_LOADED;
+
+ unit_dump(u, stdout, NULL);
+
+ r = bpf_firewall_compile(u);
+ if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM )) {
+ /* Kernel doesn't support the necessary bpf bits, or masked out via seccomp? */
+ manager_free(m);
+ return EXIT_TEST_SKIP;
+ }
+ assert_se(r >= 0);
+
+ assert(u->ip_bpf_ingress);
+ assert(u->ip_bpf_egress);
+
+ r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
+
+ log_notice("log:");
+ log_notice("-------");
+ log_notice("%s", log_buf);
+ log_notice("-------");
+
+ assert(r >= 0);
+
+ r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
+
+ log_notice("log:");
+ log_notice("-------");
+ log_notice("%s", log_buf);
+ log_notice("-------");
+
+ assert(r >= 0);
+
+ assert(unit_start(u) >= 0);
+
+ while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+ assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
+
+ manager_free(m);
+
+ return 0;
+}
diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c
index 4f319b3c6b..b5b6391cdd 100644
--- a/src/test/test-fileio.c
+++ b/src/test/test-fileio.c
@@ -609,9 +609,9 @@ static void test_writing_tmpfile(void) {
int fd, r;
struct iovec iov[3];
- IOVEC_SET_STRING(iov[0], "abc\n");
- IOVEC_SET_STRING(iov[1], ALPHANUMERICAL "\n");
- IOVEC_SET_STRING(iov[2], "");
+ iov[0] = IOVEC_MAKE_STRING("abc\n");
+ iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
+ iov[2] = IOVEC_MAKE_STRING("");
fd = mkostemp_safe(name);
printf("tmpfile: %s", name);
diff --git a/src/test/test-in-addr-util.c b/src/test/test-in-addr-util.c
new file mode 100644
index 0000000000..8b7a1229fe
--- /dev/null
+++ b/src/test/test-in-addr-util.c
@@ -0,0 +1,75 @@
+/***
+ This file is part of systemd
+
+ Copyright 2017 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <netinet/in.h>
+
+#include "in-addr-util.h"
+
+static void test_in_addr_prefix_from_string(const char *p, int family, int ret, const union in_addr_union *u, unsigned char prefixlen) {
+ union in_addr_union q;
+ unsigned char l;
+ int r;
+
+ r = in_addr_prefix_from_string(p, family, &q, &l);
+ assert_se(r == ret);
+
+ if (r >= 0) {
+ int f;
+
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen);
+
+ r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
+ assert_se(r >= 0);
+
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_in_addr_prefix_from_string("", AF_INET, -EINVAL, NULL, 0);
+ test_in_addr_prefix_from_string("/", AF_INET, -EINVAL, NULL, 0);
+ test_in_addr_prefix_from_string("/8", AF_INET, -EINVAL, NULL, 0);
+ test_in_addr_prefix_from_string("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+ test_in_addr_prefix_from_string("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
+ test_in_addr_prefix_from_string("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
+ test_in_addr_prefix_from_string("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
+ test_in_addr_prefix_from_string("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+ test_in_addr_prefix_from_string("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0);
+ test_in_addr_prefix_from_string("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0);
+ test_in_addr_prefix_from_string("::1", AF_INET, -EINVAL, NULL, 0);
+
+ test_in_addr_prefix_from_string("", AF_INET6, -EINVAL, NULL, 0);
+ test_in_addr_prefix_from_string("/", AF_INET6, -EINVAL, NULL, 0);
+ test_in_addr_prefix_from_string("/8", AF_INET6, -EINVAL, NULL, 0);
+ test_in_addr_prefix_from_string("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+ test_in_addr_prefix_from_string("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+ test_in_addr_prefix_from_string("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
+ test_in_addr_prefix_from_string("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
+ test_in_addr_prefix_from_string("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
+ test_in_addr_prefix_from_string("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
+ test_in_addr_prefix_from_string("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
+ test_in_addr_prefix_from_string("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+ test_in_addr_prefix_from_string("::1/129", AF_INET6, -ERANGE, NULL, 0);
+ test_in_addr_prefix_from_string("::1/-1", AF_INET6, -ERANGE, NULL, 0);
+
+ return 0;
+}