summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-11-05 15:26:51 +0100
committerDavid S. Miller <davem@davemloft.net>2017-11-05 15:26:51 +0100
commit2798b80b385384d51a81832556ee9ad25d175f9b (patch)
treea6b3aebc786eeb512dfde7d6bae130cc136ede4b
parentMerge tag 'mlx5-updates-2017-11-04' of git://git.kernel.org/pub/scm/linux/ker... (diff)
parentselftests/bpf: add a test for device cgroup controller (diff)
downloadlinux-2798b80b385384d51a81832556ee9ad25d175f9b.tar.xz
linux-2798b80b385384d51a81832556ee9ad25d175f9b.zip
Merge branch 'eBPF-based-device-cgroup-controller'
Roman Gushchin says: ==================== eBPF-based device cgroup controller This patchset introduces an eBPF-based device controller for cgroup v2. Patches (1) and (2) are a preparational work required to share some code with the existing device controller implementation. Patch (3) is the main patch, which introduces a new bpf prog type and all necessary infrastructure. Patch (4) moves cgroup_helpers.c/h to use them by patch (4). Patch (5) implements an example of eBPF program which controls access to device files and corresponding userspace test. v3: Renamed constants introduced by patch (3) to BPF_DEVCG_* v2: Added patch (1). v1: https://lkml.org/lkml/2017/11/1/363 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf-cgroup.h15
-rw-r--r--include/linux/bpf_types.h3
-rw-r--r--include/linux/device_cgroup.h67
-rw-r--r--include/uapi/linux/bpf.h15
-rw-r--r--kernel/bpf/cgroup.c67
-rw-r--r--kernel/bpf/syscall.c7
-rw-r--r--kernel/bpf/verifier.c1
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--security/device_cgroup.c91
-rw-r--r--tools/include/uapi/linux/bpf.h15
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c (renamed from samples/bpf/cgroup_helpers.c)0
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h (renamed from samples/bpf/cgroup_helpers.h)0
-rw-r--r--tools/testing/selftests/bpf/dev_cgroup.c60
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c93
15 files changed, 369 insertions, 76 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 87a7db9feb38..a7f16e0f8d68 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
enum bpf_attach_type type);
+int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+ short access, enum bpf_attach_type type);
+
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
@@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
} \
__ret; \
})
+
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
+ access, \
+ BPF_CGROUP_DEVICE); \
+ \
+ __ret; \
+})
#else
struct cgroup_bpf {};
@@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 53c5b9ad7220..978c1d9c9383 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
#endif
+#ifdef CONFIG_CGROUP_BPF
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
+#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index cdbc344a92e4..8557efe096dc 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -1,17 +1,76 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/fs.h>
+#include <linux/bpf-cgroup.h>
+
+#define DEVCG_ACC_MKNOD 1
+#define DEVCG_ACC_READ 2
+#define DEVCG_ACC_WRITE 4
+#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE)
+
+#define DEVCG_DEV_BLOCK 1
+#define DEVCG_DEV_CHAR 2
+#define DEVCG_DEV_ALL 4 /* this represents all devices */
#ifdef CONFIG_CGROUP_DEVICE
-extern int __devcgroup_inode_permission(struct inode *inode, int mask);
-extern int devcgroup_inode_mknod(int mode, dev_t dev);
+extern int __devcgroup_check_permission(short type, u32 major, u32 minor,
+ short access);
+#else
+static inline int __devcgroup_check_permission(short type, u32 major, u32 minor,
+ short access)
+{ return 0; }
+#endif
+
+#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
+static inline int devcgroup_check_permission(short type, u32 major, u32 minor,
+ short access)
+{
+ int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
+
+ if (rc)
+ return -EPERM;
+
+ return __devcgroup_check_permission(type, major, minor, access);
+}
+
static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{
+ short type, access = 0;
+
if (likely(!inode->i_rdev))
return 0;
- if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
+
+ if (S_ISBLK(inode->i_mode))
+ type = DEVCG_DEV_BLOCK;
+ else if (S_ISCHR(inode->i_mode))
+ type = DEVCG_DEV_CHAR;
+ else
+ return 0;
+
+ if (mask & MAY_WRITE)
+ access |= DEVCG_ACC_WRITE;
+ if (mask & MAY_READ)
+ access |= DEVCG_ACC_READ;
+
+ return devcgroup_check_permission(type, imajor(inode), iminor(inode),
+ access);
+}
+
+static inline int devcgroup_inode_mknod(int mode, dev_t dev)
+{
+ short type;
+
+ if (!S_ISBLK(mode) && !S_ISCHR(mode))
return 0;
- return __devcgroup_inode_permission(inode, mask);
+
+ if (S_ISBLK(mode))
+ type = DEVCG_DEV_BLOCK;
+ else
+ type = DEVCG_DEV_CHAR;
+
+ return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
+ DEVCG_ACC_MKNOD);
}
+
#else
static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{ return 0; }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4455dd195201..e880ae6434ee 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -132,6 +132,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_XMIT,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
};
enum bpf_attach_type {
@@ -141,6 +142,7 @@ enum bpf_attach_type {
BPF_CGROUP_SOCK_OPS,
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
__MAX_BPF_ATTACH_TYPE
};
@@ -991,4 +993,17 @@ struct bpf_perf_event_value {
__u64 running;
};
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ __u32 access_type; /* (access << 16) | type */
+ __u32 major;
+ __u32 minor;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 3db5a17fcfe8..b789ab78d28f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
+
+int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+ short access, enum bpf_attach_type type)
+{
+ struct cgroup *cgrp;
+ struct bpf_cgroup_dev_ctx ctx = {
+ .access_type = (access << 16) | dev_type,
+ .major = major,
+ .minor = minor,
+ };
+ int allow = 1;
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(current);
+ allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
+ BPF_PROG_RUN);
+ rcu_read_unlock();
+
+ return !allow;
+}
+EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
+
+static const struct bpf_func_proto *
+cgroup_dev_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_map_lookup_elem:
+ return &bpf_map_lookup_elem_proto;
+ case BPF_FUNC_map_update_elem:
+ return &bpf_map_update_elem_proto;
+ case BPF_FUNC_map_delete_elem:
+ return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_trace_printk:
+ if (capable(CAP_SYS_ADMIN))
+ return bpf_get_trace_printk_proto();
+ default:
+ return NULL;
+ }
+}
+
+static bool cgroup_dev_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
+ return false;
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u32))
+ return false;
+
+ return true;
+}
+
+const struct bpf_prog_ops cg_dev_prog_ops = {
+};
+
+const struct bpf_verifier_ops cg_dev_verifier_ops = {
+ .get_func_proto = cgroup_dev_func_proto,
+ .is_valid_access = cgroup_dev_is_valid_access,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 416d70cdfc76..09badc37e864 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
+ case BPF_CGROUP_DEVICE:
+ ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
+ break;
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
return sockmap_get_from_fd(attr, true);
@@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
+ case BPF_CGROUP_DEVICE:
+ ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
+ break;
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
return sockmap_get_from_fd(attr, false);
@@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_SOCK_OPS:
+ case BPF_CGROUP_DEVICE:
break;
default:
return -EINVAL;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index add845fe788a..4a942e2e753d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
break;
default:
return 0;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6a9321ec348a..5994075b080d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -46,6 +46,7 @@ hostprogs-y += syscall_tp
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
+CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
sock_example-objs := sock_example.o $(LIBBPF)
@@ -69,13 +70,13 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
-test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o
+test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
+test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 5ef7e5240563..c65b39bafdfe 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -15,15 +15,6 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
-#define ACC_MKNOD 1
-#define ACC_READ 2
-#define ACC_WRITE 4
-#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
-
-#define DEV_BLOCK 1
-#define DEV_CHAR 2
-#define DEV_ALL 4 /* this represents all devices */
-
static DEFINE_MUTEX(devcgroup_mutex);
enum devcg_behavior {
@@ -246,21 +237,21 @@ static void set_access(char *acc, short access)
{
int idx = 0;
memset(acc, 0, ACCLEN);
- if (access & ACC_READ)
+ if (access & DEVCG_ACC_READ)
acc[idx++] = 'r';
- if (access & ACC_WRITE)
+ if (access & DEVCG_ACC_WRITE)
acc[idx++] = 'w';
- if (access & ACC_MKNOD)
+ if (access & DEVCG_ACC_MKNOD)
acc[idx++] = 'm';
}
static char type_to_char(short type)
{
- if (type == DEV_ALL)
+ if (type == DEVCG_DEV_ALL)
return 'a';
- if (type == DEV_CHAR)
+ if (type == DEVCG_DEV_CHAR)
return 'c';
- if (type == DEV_BLOCK)
+ if (type == DEVCG_DEV_BLOCK)
return 'b';
return 'X';
}
@@ -287,10 +278,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
* This way, the file remains as a "whitelist of devices"
*/
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
- set_access(acc, ACC_MASK);
+ set_access(acc, DEVCG_ACC_MASK);
set_majmin(maj, ~0);
set_majmin(min, ~0);
- seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
+ seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL),
maj, min, acc);
} else {
list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
@@ -309,10 +300,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
/**
* match_exception - iterates the exception list trying to find a complete match
* @exceptions: list of exceptions
- * @type: device type (DEV_BLOCK or DEV_CHAR)
+ * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
* @major: device file major number, ~0 to match all
* @minor: device file minor number, ~0 to match all
- * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
+ * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
*
* It is considered a complete match if an exception is found that will
* contain the entire range of provided parameters.
@@ -325,9 +316,9 @@ static bool match_exception(struct list_head *exceptions, short type,
struct dev_exception_item *ex;
list_for_each_entry_rcu(ex, exceptions, list) {
- if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
+ if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
continue;
- if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
+ if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
continue;
if (ex->major != ~0 && ex->major != major)
continue;
@@ -344,10 +335,10 @@ static bool match_exception(struct list_head *exceptions, short type,
/**
* match_exception_partial - iterates the exception list trying to find a partial match
* @exceptions: list of exceptions
- * @type: device type (DEV_BLOCK or DEV_CHAR)
+ * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
* @major: device file major number, ~0 to match all
* @minor: device file minor number, ~0 to match all
- * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
+ * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
*
* It is considered a partial match if an exception's range is found to
* contain *any* of the devices specified by provided parameters. This is
@@ -362,9 +353,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type,
struct dev_exception_item *ex;
list_for_each_entry_rcu(ex, exceptions, list) {
- if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
+ if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
continue;
- if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
+ if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
continue;
/*
* We must be sure that both the exception and the provided
@@ -647,10 +638,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
}
return 0;
case 'b':
- ex.type = DEV_BLOCK;
+ ex.type = DEVCG_DEV_BLOCK;
break;
case 'c':
- ex.type = DEV_CHAR;
+ ex.type = DEVCG_DEV_CHAR;
break;
default:
return -EINVAL;
@@ -703,13 +694,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
for (b++, count = 0; count < 3; count++, b++) {
switch (*b) {
case 'r':
- ex.access |= ACC_READ;
+ ex.access |= DEVCG_ACC_READ;
break;
case 'w':
- ex.access |= ACC_WRITE;
+ ex.access |= DEVCG_ACC_WRITE;
break;
case 'm':
- ex.access |= ACC_MKNOD;
+ ex.access |= DEVCG_ACC_MKNOD;
break;
case '\n':
case '\0':
@@ -806,12 +797,12 @@ struct cgroup_subsys devices_cgrp_subsys = {
* @type: device type
* @major: device major number
* @minor: device minor number
- * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
+ * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD
*
* returns 0 on success, -EPERM case the operation is not permitted
*/
-static int __devcgroup_check_permission(short type, u32 major, u32 minor,
- short access)
+int __devcgroup_check_permission(short type, u32 major, u32 minor,
+ short access)
{
struct dev_cgroup *dev_cgroup;
bool rc;
@@ -833,37 +824,3 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
return 0;
}
-
-int __devcgroup_inode_permission(struct inode *inode, int mask)
-{
- short type, access = 0;
-
- if (S_ISBLK(inode->i_mode))
- type = DEV_BLOCK;
- if (S_ISCHR(inode->i_mode))
- type = DEV_CHAR;
- if (mask & MAY_WRITE)
- access |= ACC_WRITE;
- if (mask & MAY_READ)
- access |= ACC_READ;
-
- return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
- access);
-}
-
-int devcgroup_inode_mknod(int mode, dev_t dev)
-{
- short type;
-
- if (!S_ISBLK(mode) && !S_ISCHR(mode))
- return 0;
-
- if (S_ISBLK(mode))
- type = DEV_BLOCK;
- else
- type = DEV_CHAR;
-
- return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
- ACC_MKNOD);
-
-}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e92f62cf933a..b280f37cd057 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -131,6 +131,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_XMIT,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
};
enum bpf_attach_type {
@@ -140,6 +141,7 @@ enum bpf_attach_type {
BPF_CGROUP_SOCK_OPS,
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
__MAX_BPF_ATTACH_TYPE
};
@@ -990,4 +992,17 @@ struct bpf_perf_event_value {
__u64 running;
};
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ __u32 access_type; /* (access << 16) | type */
+ __u32 major;
+ __u32 minor;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4f0734aa6e93..333a48655ee0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,17 +13,17 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i
LDLIBS += -lcap -lelf
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log
+ test_align test_verifier_log test_dev_cgroup
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
- sockmap_verdict_prog.o
+ sockmap_verdict_prog.o dev_cgroup.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh
include ../lib.mk
-BPFOBJ := $(OUTPUT)/libbpf.a
+BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c
$(TEST_GEN_PROGS): $(BPFOBJ)
diff --git a/samples/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index f3bca3ade0f3..f3bca3ade0f3 100644
--- a/samples/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
diff --git a/samples/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 06485e0002b3..06485e0002b3 100644
--- a/samples/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c
new file mode 100644
index 000000000000..ce41a3475f27
--- /dev/null
+++ b/tools/testing/selftests/bpf/dev_cgroup.c
@@ -0,0 +1,60 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/dev")
+int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
+{
+ short type = ctx->access_type & 0xFFFF;
+#ifdef DEBUG
+ short access = ctx->access_type >> 16;
+ char fmt[] = " %d:%d \n";
+
+ switch (type) {
+ case BPF_DEVCG_DEV_BLOCK:
+ fmt[0] = 'b';
+ break;
+ case BPF_DEVCG_DEV_CHAR:
+ fmt[0] = 'c';
+ break;
+ default:
+ fmt[0] = '?';
+ break;
+ }
+
+ if (access & BPF_DEVCG_ACC_READ)
+ fmt[8] = 'r';
+
+ if (access & BPF_DEVCG_ACC_WRITE)
+ fmt[9] = 'w';
+
+ if (access & BPF_DEVCG_ACC_MKNOD)
+ fmt[10] = 'm';
+
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
+#endif
+
+ /* Allow access to /dev/zero and /dev/random.
+ * Forbid everything else.
+ */
+ if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
+ return 0;
+
+ switch (ctx->minor) {
+ case 5: /* 1:5 /dev/zero */
+ case 9: /* 1:9 /dev/urandom */
+ return 1;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
new file mode 100644
index 000000000000..02c85d6c89b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+
+#define DEV_CGROUP_PROG "./dev_cgroup.o"
+
+#define TEST_CGROUP "test-bpf-based-device-cgroup/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_object *obj;
+ int error = EXIT_FAILURE;
+ int prog_fd, cgroup_fd;
+ __u32 prog_cnt;
+
+ if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
+ &obj, &prog_fd)) {
+ printf("Failed to load DEV_CGROUP program\n");
+ goto err;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to load DEV_CGROUP program\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) {
+ printf("Failed to attach DEV_CGROUP program");
+ goto err;
+ }
+
+ if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL,
+ &prog_cnt)) {
+ printf("Failed to query attached programs");
+ goto err;
+ }
+
+ /* All operations with /dev/zero and and /dev/urandom are allowed,
+ * everything else is forbidden.
+ */
+ assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+ assert(system("mknod /tmp/test_dev_cgroup_null c 1 3"));
+ assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+
+ /* /dev/zero is whitelisted */
+ assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+ assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0);
+ assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0);
+
+ /* src is allowed, target is forbidden */
+ assert(system("dd if=/dev/urandom of=/dev/full count=64"));
+
+ /* src is forbidden, target is allowed */
+ assert(system("dd if=/dev/random of=/dev/zero count=64"));
+
+ error = 0;
+ printf("test_dev_cgroup:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+ return error;
+}