diff options
author | David S. Miller <davem@davemloft.net> | 2017-11-05 15:26:51 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-11-05 15:26:51 +0100 |
commit | 2798b80b385384d51a81832556ee9ad25d175f9b (patch) | |
tree | a6b3aebc786eeb512dfde7d6bae130cc136ede4b | |
parent | Merge tag 'mlx5-updates-2017-11-04' of git://git.kernel.org/pub/scm/linux/ker... (diff) | |
parent | selftests/bpf: add a test for device cgroup controller (diff) | |
download | linux-2798b80b385384d51a81832556ee9ad25d175f9b.tar.xz linux-2798b80b385384d51a81832556ee9ad25d175f9b.zip |
Merge branch 'eBPF-based-device-cgroup-controller'
Roman Gushchin says:
====================
eBPF-based device cgroup controller
This patchset introduces an eBPF-based device controller for cgroup v2.
Patches (1) and (2) are a preparational work required to share some code
with the existing device controller implementation.
Patch (3) is the main patch, which introduces a new bpf prog type
and all necessary infrastructure.
Patch (4) moves cgroup_helpers.c/h to use them by patch (4).
Patch (5) implements an example of eBPF program which controls access
to device files and corresponding userspace test.
v3:
Renamed constants introduced by patch (3) to BPF_DEVCG_*
v2:
Added patch (1).
v1:
https://lkml.org/lkml/2017/11/1/363
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/bpf-cgroup.h | 15 | ||||
-rw-r--r-- | include/linux/bpf_types.h | 3 | ||||
-rw-r--r-- | include/linux/device_cgroup.h | 67 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 15 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 67 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 7 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 1 | ||||
-rw-r--r-- | samples/bpf/Makefile | 5 | ||||
-rw-r--r-- | security/device_cgroup.c | 91 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 15 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/Makefile | 6 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/cgroup_helpers.c (renamed from samples/bpf/cgroup_helpers.c) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/cgroup_helpers.h (renamed from samples/bpf/cgroup_helpers.h) | 0 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/dev_cgroup.c | 60 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_dev_cgroup.c | 93 |
15 files changed, 369 insertions, 76 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 87a7db9feb38..a7f16e0f8d68 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum bpf_attach_type type); +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } \ __ret; \ }) + +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ + access, \ + BPF_CGROUP_DEVICE); \ + \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 53c5b9ad7220..978c1d9c9383 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index cdbc344a92e4..8557efe096dc 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,17 +1,76 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/fs.h> +#include <linux/bpf-cgroup.h> + +#define DEVCG_ACC_MKNOD 1 +#define DEVCG_ACC_READ 2 +#define DEVCG_ACC_WRITE 4 +#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE) + +#define DEVCG_DEV_BLOCK 1 +#define DEVCG_DEV_CHAR 2 +#define DEVCG_DEV_ALL 4 /* this represents all devices */ #ifdef CONFIG_CGROUP_DEVICE -extern int __devcgroup_inode_permission(struct inode *inode, int mask); -extern int devcgroup_inode_mknod(int mode, dev_t dev); +extern int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access); +#else +static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ return 0; } +#endif + +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) +static inline int devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ + int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); + + if (rc) + return -EPERM; + + return __devcgroup_check_permission(type, major, minor, access); +} + static inline int devcgroup_inode_permission(struct inode *inode, int mask) { + short type, access = 0; + if (likely(!inode->i_rdev)) return 0; - if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + + if (S_ISBLK(inode->i_mode)) + type = DEVCG_DEV_BLOCK; + else if (S_ISCHR(inode->i_mode)) + type = DEVCG_DEV_CHAR; + else + return 0; + + if (mask & MAY_WRITE) + access |= DEVCG_ACC_WRITE; + if (mask & MAY_READ) + access |= DEVCG_ACC_READ; + + return devcgroup_check_permission(type, imajor(inode), iminor(inode), + access); +} + +static inline int devcgroup_inode_mknod(int mode, dev_t dev) +{ + short type; + + if (!S_ISBLK(mode) && !S_ISCHR(mode)) return 0; - return __devcgroup_inode_permission(inode, mask); + + if (S_ISBLK(mode)) + type = DEVCG_DEV_BLOCK; + else + type = DEVCG_DEV_CHAR; + + return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), + DEVCG_ACC_MKNOD); } + #else static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4455dd195201..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -141,6 +142,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -991,4 +993,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 3db5a17fcfe8..b789ab78d28f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); + +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type) +{ + struct cgroup *cgrp; + struct bpf_cgroup_dev_ctx ctx = { + .access_type = (access << 16) | dev_type, + .major = major, + .minor = minor, + }; + int allow = 1; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, + BPF_PROG_RUN); + rcu_read_unlock(); + + return !allow; +} +EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); + +static const struct bpf_func_proto * +cgroup_dev_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_trace_printk: + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); + default: + return NULL; + } +} + +static bool cgroup_dev_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) + return false; + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + if (size != sizeof(__u32)) + return false; + + return true; +} + +const struct bpf_prog_ops cg_dev_prog_ops = { +}; + +const struct bpf_verifier_ops cg_dev_verifier_ops = { + .get_func_proto = cgroup_dev_func_proto, + .is_valid_access = cgroup_dev_is_valid_access, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 416d70cdfc76..09badc37e864 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, true); @@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; + case BPF_CGROUP_DEVICE: + ptype = BPF_PROG_TYPE_CGROUP_DEVICE; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, false); @@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: break; default: return -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index add845fe788a..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_CGROUP_DEVICE: break; default: return 0; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6a9321ec348a..5994075b080d 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ hostprogs-y += syscall_tp # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o +CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF) @@ -69,13 +70,13 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o -test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o +test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS) test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o -test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ +test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 5ef7e5240563..c65b39bafdfe 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -15,15 +15,6 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> -#define ACC_MKNOD 1 -#define ACC_READ 2 -#define ACC_WRITE 4 -#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) - -#define DEV_BLOCK 1 -#define DEV_CHAR 2 -#define DEV_ALL 4 /* this represents all devices */ - static DEFINE_MUTEX(devcgroup_mutex); enum devcg_behavior { @@ -246,21 +237,21 @@ static void set_access(char *acc, short access) { int idx = 0; memset(acc, 0, ACCLEN); - if (access & ACC_READ) + if (access & DEVCG_ACC_READ) acc[idx++] = 'r'; - if (access & ACC_WRITE) + if (access & DEVCG_ACC_WRITE) acc[idx++] = 'w'; - if (access & ACC_MKNOD) + if (access & DEVCG_ACC_MKNOD) acc[idx++] = 'm'; } static char type_to_char(short type) { - if (type == DEV_ALL) + if (type == DEVCG_DEV_ALL) return 'a'; - if (type == DEV_CHAR) + if (type == DEVCG_DEV_CHAR) return 'c'; - if (type == DEV_BLOCK) + if (type == DEVCG_DEV_BLOCK) return 'b'; return 'X'; } @@ -287,10 +278,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) * This way, the file remains as a "whitelist of devices" */ if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { - set_access(acc, ACC_MASK); + set_access(acc, DEVCG_ACC_MASK); set_majmin(maj, ~0); set_majmin(min, ~0); - seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL), + seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL), maj, min, acc); } else { list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) { @@ -309,10 +300,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) /** * match_exception - iterates the exception list trying to find a complete match * @exceptions: list of exceptions - * @type: device type (DEV_BLOCK or DEV_CHAR) + * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR) * @major: device file major number, ~0 to match all * @minor: device file minor number, ~0 to match all - * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) + * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD) * * It is considered a complete match if an exception is found that will * contain the entire range of provided parameters. @@ -325,9 +316,9 @@ static bool match_exception(struct list_head *exceptions, short type, struct dev_exception_item *ex; list_for_each_entry_rcu(ex, exceptions, list) { - if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) + if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK)) continue; - if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) + if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR)) continue; if (ex->major != ~0 && ex->major != major) continue; @@ -344,10 +335,10 @@ static bool match_exception(struct list_head *exceptions, short type, /** * match_exception_partial - iterates the exception list trying to find a partial match * @exceptions: list of exceptions - * @type: device type (DEV_BLOCK or DEV_CHAR) + * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR) * @major: device file major number, ~0 to match all * @minor: device file minor number, ~0 to match all - * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) + * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD) * * It is considered a partial match if an exception's range is found to * contain *any* of the devices specified by provided parameters. This is @@ -362,9 +353,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type, struct dev_exception_item *ex; list_for_each_entry_rcu(ex, exceptions, list) { - if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) + if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK)) continue; - if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) + if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR)) continue; /* * We must be sure that both the exception and the provided @@ -647,10 +638,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, } return 0; case 'b': - ex.type = DEV_BLOCK; + ex.type = DEVCG_DEV_BLOCK; break; case 'c': - ex.type = DEV_CHAR; + ex.type = DEVCG_DEV_CHAR; break; default: return -EINVAL; @@ -703,13 +694,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, for (b++, count = 0; count < 3; count++, b++) { switch (*b) { case 'r': - ex.access |= ACC_READ; + ex.access |= DEVCG_ACC_READ; break; case 'w': - ex.access |= ACC_WRITE; + ex.access |= DEVCG_ACC_WRITE; break; case 'm': - ex.access |= ACC_MKNOD; + ex.access |= DEVCG_ACC_MKNOD; break; case '\n': case '\0': @@ -806,12 +797,12 @@ struct cgroup_subsys devices_cgrp_subsys = { * @type: device type * @major: device major number * @minor: device minor number - * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD + * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD * * returns 0 on success, -EPERM case the operation is not permitted */ -static int __devcgroup_check_permission(short type, u32 major, u32 minor, - short access) +int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access) { struct dev_cgroup *dev_cgroup; bool rc; @@ -833,37 +824,3 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, return 0; } - -int __devcgroup_inode_permission(struct inode *inode, int mask) -{ - short type, access = 0; - - if (S_ISBLK(inode->i_mode)) - type = DEV_BLOCK; - if (S_ISCHR(inode->i_mode)) - type = DEV_CHAR; - if (mask & MAY_WRITE) - access |= ACC_WRITE; - if (mask & MAY_READ) - access |= ACC_READ; - - return __devcgroup_check_permission(type, imajor(inode), iminor(inode), - access); -} - -int devcgroup_inode_mknod(int mode, dev_t dev) -{ - short type; - - if (!S_ISBLK(mode) && !S_ISCHR(mode)) - return 0; - - if (S_ISBLK(mode)) - type = DEV_BLOCK; - else - type = DEV_CHAR; - - return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), - ACC_MKNOD); - -} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e92f62cf933a..b280f37cd057 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -131,6 +131,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -140,6 +141,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -990,4 +992,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4f0734aa6e93..333a48655ee0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -13,17 +13,17 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log + test_align test_verifier_log test_dev_cgroup TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o + sockmap_verdict_prog.o dev_cgroup.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh include ../lib.mk -BPFOBJ := $(OUTPUT)/libbpf.a +BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c $(TEST_GEN_PROGS): $(BPFOBJ) diff --git a/samples/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index f3bca3ade0f3..f3bca3ade0f3 100644 --- a/samples/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c diff --git a/samples/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 06485e0002b3..06485e0002b3 100644 --- a/samples/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c new file mode 100644 index 000000000000..ce41a3475f27 --- /dev/null +++ b/tools/testing/selftests/bpf/dev_cgroup.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +SEC("cgroup/dev") +int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) +{ + short type = ctx->access_type & 0xFFFF; +#ifdef DEBUG + short access = ctx->access_type >> 16; + char fmt[] = " %d:%d \n"; + + switch (type) { + case BPF_DEVCG_DEV_BLOCK: + fmt[0] = 'b'; + break; + case BPF_DEVCG_DEV_CHAR: + fmt[0] = 'c'; + break; + default: + fmt[0] = '?'; + break; + } + + if (access & BPF_DEVCG_ACC_READ) + fmt[8] = 'r'; + + if (access & BPF_DEVCG_ACC_WRITE) + fmt[9] = 'w'; + + if (access & BPF_DEVCG_ACC_MKNOD) + fmt[10] = 'm'; + + bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor); +#endif + + /* Allow access to /dev/zero and /dev/random. + * Forbid everything else. + */ + if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR) + return 0; + + switch (ctx->minor) { + case 5: /* 1:5 /dev/zero */ + case 9: /* 1:9 /dev/urandom */ + return 1; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c new file mode 100644 index 000000000000..02c85d6c89b0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" + +#define DEV_CGROUP_PROG "./dev_cgroup.o" + +#define TEST_CGROUP "test-bpf-based-device-cgroup/" + +int main(int argc, char **argv) +{ + struct bpf_object *obj; + int error = EXIT_FAILURE; + int prog_fd, cgroup_fd; + __u32 prog_cnt; + + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, + &obj, &prog_fd)) { + printf("Failed to load DEV_CGROUP program\n"); + goto err; + } + + if (setup_cgroup_environment()) { + printf("Failed to load DEV_CGROUP program\n"); + goto err; + } + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (!cgroup_fd) { + printf("Failed to create test cgroup\n"); + goto err; + } + + if (join_cgroup(TEST_CGROUP)) { + printf("Failed to join cgroup\n"); + goto err; + } + + /* Attach bpf program */ + if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) { + printf("Failed to attach DEV_CGROUP program"); + goto err; + } + + if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL, + &prog_cnt)) { + printf("Failed to query attached programs"); + goto err; + } + + /* All operations with /dev/zero and and /dev/urandom are allowed, + * everything else is forbidden. + */ + assert(system("rm -f /tmp/test_dev_cgroup_null") == 0); + assert(system("mknod /tmp/test_dev_cgroup_null c 1 3")); + assert(system("rm -f /tmp/test_dev_cgroup_null") == 0); + + /* /dev/zero is whitelisted */ + assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0); + assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0); + assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0); + + assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0); + + /* src is allowed, target is forbidden */ + assert(system("dd if=/dev/urandom of=/dev/full count=64")); + + /* src is forbidden, target is allowed */ + assert(system("dd if=/dev/random of=/dev/zero count=64")); + + error = 0; + printf("test_dev_cgroup:PASS\n"); + +err: + cleanup_cgroup_environment(); + + return error; +} |