diff options
Diffstat (limited to 'net/xdp')
-rw-r--r-- | net/xdp/Kconfig | 8 | ||||
-rw-r--r-- | net/xdp/Makefile | 1 | ||||
-rw-r--r-- | net/xdp/xdp_umem.c | 25 | ||||
-rw-r--r-- | net/xdp/xsk.c | 56 | ||||
-rw-r--r-- | net/xdp/xsk.h | 12 | ||||
-rw-r--r-- | net/xdp/xsk_diag.c | 191 |
6 files changed, 282 insertions, 11 deletions
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig index 90e4a7152854..0255b33cff4b 100644 --- a/net/xdp/Kconfig +++ b/net/xdp/Kconfig @@ -5,3 +5,11 @@ config XDP_SOCKETS help XDP sockets allows a channel between XDP programs and userspace applications. + +config XDP_SOCKETS_DIAG + tristate "XDP sockets: monitoring interface" + depends on XDP_SOCKETS + default n + help + Support for PF_XDP sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/xdp/Makefile b/net/xdp/Makefile index 04f073146256..59dbfdf93dca 100644 --- a/net/xdp/Makefile +++ b/net/xdp/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index d4de871e7d4d..77520eacee8f 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -13,12 +13,15 @@ #include <linux/mm.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> +#include <linux/idr.h> #include "xdp_umem.h" #include "xsk_queue.h" #define XDP_UMEM_MIN_CHUNK_SIZE 2048 +static DEFINE_IDA(umem_ida); + void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; @@ -67,6 +70,7 @@ struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, return NULL; } +EXPORT_SYMBOL(xdp_get_umem_from_qid); static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id) { @@ -125,9 +129,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, return 0; err_unreg_umem: - xdp_clear_umem_at_qid(dev, queue_id); if (!force_zc) err = 0; /* fallback to copy mode */ + if (err) + xdp_clear_umem_at_qid(dev, queue_id); out_rtnl_unlock: rtnl_unlock(); return err; @@ -193,6 +198,8 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_clear_dev(umem); + ida_simple_remove(&umem_ida, umem->id); + if (umem->fq) { xskq_destroy(umem->fq); umem->fq = NULL; @@ -259,10 +266,10 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem) if (!umem->pgs) return -ENOMEM; - down_write(¤t->mm->mmap_sem); - npgs = get_user_pages(umem->address, umem->npgs, - gup_flags, &umem->pgs[0], NULL); - up_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); + npgs = get_user_pages_longterm(umem->address, umem->npgs, + gup_flags, &umem->pgs[0], NULL); + up_read(¤t->mm->mmap_sem); if (npgs != umem->npgs) { if (npgs >= 0) { @@ -399,8 +406,16 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) if (!umem) return ERR_PTR(-ENOMEM); + err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); + if (err < 0) { + kfree(umem); + return ERR_PTR(err); + } + umem->id = err; + err = xdp_umem_reg(umem, mr); if (err) { + ida_simple_remove(&umem_ida, umem->id); kfree(umem); return ERR_PTR(err); } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a03268454a27..6697084e3fdf 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -27,14 +27,10 @@ #include "xsk_queue.h" #include "xdp_umem.h" +#include "xsk.h" #define TX_BATCH_SIZE 16 -static struct xdp_sock *xdp_sk(struct sock *sk) -{ - return (struct xdp_sock *)sk; -} - bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && @@ -350,6 +346,10 @@ static int xsk_release(struct socket *sock) net = sock_net(sk); + mutex_lock(&net->xdp.lock); + sk_del_node_init_rcu(sk); + mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); @@ -366,7 +366,6 @@ static int xsk_release(struct socket *sock) xskq_destroy(xs->rx); xskq_destroy(xs->tx); - xdp_put_umem(xs->umem); sock_orphan(sk); sock->sk = NULL; @@ -669,6 +668,8 @@ static int xsk_mmap(struct file *file, struct socket *sock, if (!umem) return -EINVAL; + /* Matches the smp_wmb() in XDP_UMEM_REG */ + smp_rmb(); if (offset == XDP_UMEM_PGOFF_FILL_RING) q = READ_ONCE(umem->fq); else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) @@ -678,6 +679,8 @@ static int xsk_mmap(struct file *file, struct socket *sock, if (!q) return -EINVAL; + /* Matches the smp_wmb() in xsk_init_queue */ + smp_rmb(); qpg = virt_to_head_page(q->ring); if (size > (PAGE_SIZE << compound_order(qpg))) return -EINVAL; @@ -714,6 +717,18 @@ static const struct proto_ops xsk_proto_ops = { .sendpage = sock_no_sendpage, }; +static void xsk_destruct(struct sock *sk) +{ + struct xdp_sock *xs = xdp_sk(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + return; + + xdp_put_umem(xs->umem); + + sk_refcnt_debug_dec(sk); +} + static int xsk_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -740,12 +755,19 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk->sk_family = PF_XDP; + sk->sk_destruct = xsk_destruct; + sk_refcnt_debug_inc(sk); + sock_set_flag(sk, SOCK_RCU_FREE); xs = xdp_sk(sk); mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); + mutex_lock(&net->xdp.lock); + sk_add_node_rcu(sk, &net->xdp.list); + mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, &xsk_proto, 1); local_bh_enable(); @@ -759,6 +781,23 @@ static const struct net_proto_family xsk_family_ops = { .owner = THIS_MODULE, }; +static int __net_init xsk_net_init(struct net *net) +{ + mutex_init(&net->xdp.lock); + INIT_HLIST_HEAD(&net->xdp.list); + return 0; +} + +static void __net_exit xsk_net_exit(struct net *net) +{ + WARN_ON_ONCE(!hlist_empty(&net->xdp.list)); +} + +static struct pernet_operations xsk_net_ops = { + .init = xsk_net_init, + .exit = xsk_net_exit, +}; + static int __init xsk_init(void) { int err; @@ -771,8 +810,13 @@ static int __init xsk_init(void) if (err) goto out_proto; + err = register_pernet_subsys(&xsk_net_ops); + if (err) + goto out_sk; return 0; +out_sk: + sock_unregister(PF_XDP); out_proto: proto_unregister(&xsk_proto); out: diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h new file mode 100644 index 000000000000..ba8120610426 --- /dev/null +++ b/net/xdp/xsk.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2019 Intel Corporation. */ + +#ifndef XSK_H_ +#define XSK_H_ + +static inline struct xdp_sock *xdp_sk(struct sock *sk) +{ + return (struct xdp_sock *)sk; +} + +#endif /* XSK_H_ */ diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c new file mode 100644 index 000000000000..661d007c3b28 --- /dev/null +++ b/net/xdp/xsk_diag.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 +/* XDP sockets monitoring support + * + * Copyright(c) 2019 Intel Corporation. + * + * Author: Björn Töpel <bjorn.topel@intel.com> + */ + +#include <linux/module.h> +#include <net/xdp_sock.h> +#include <linux/xdp_diag.h> +#include <linux/sock_diag.h> + +#include "xsk_queue.h" +#include "xsk.h" + +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_diag_info di = {}; + + di.ifindex = xs->dev ? xs->dev->ifindex : 0; + di.queue_id = xs->queue_id; + return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); +} + +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, + struct sk_buff *nlskb) +{ + struct xdp_diag_ring dr = {}; + + dr.entries = queue->nentries; + return nla_put(nlskb, nl_type, sizeof(dr), &dr); +} + +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, + struct sk_buff *nlskb) +{ + int err = 0; + + if (xs->rx) + err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); + if (!err && xs->tx) + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); + return err; +} + +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_umem *umem = xs->umem; + struct xdp_diag_umem du = {}; + int err; + + if (!umem) + return 0; + + du.id = umem->id; + du.size = umem->size; + du.num_pages = umem->npgs; + du.chunk_size = (__u32)(~umem->chunk_mask + 1); + du.headroom = umem->headroom; + du.ifindex = umem->dev ? umem->dev->ifindex : 0; + du.queue_id = umem->queue_id; + du.flags = 0; + if (umem->zc) + du.flags |= XDP_DU_F_ZEROCOPY; + du.refs = refcount_read(&umem->users); + + err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); + + if (!err && umem->fq) + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); + if (!err && umem->cq) { + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, + nlskb); + } + return err; +} + +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, + struct xdp_diag_req *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct xdp_sock *xs = xdp_sk(sk); + struct xdp_diag_msg *msg; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), + flags); + if (!nlh) + return -EMSGSIZE; + + msg = nlmsg_data(nlh); + memset(msg, 0, sizeof(*msg)); + msg->xdiag_family = AF_XDP; + msg->xdiag_type = sk->sk_type; + msg->xdiag_ino = sk_ino; + sock_diag_save_cookie(sk, msg->xdiag_cookie); + + if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_INFO) && + nla_put_u32(nlskb, XDP_DIAG_UID, + from_kuid_munged(user_ns, sock_i_uid(sk)))) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_RING_CFG) && + xsk_diag_put_rings_cfg(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_UMEM) && + xsk_diag_put_umem(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) + goto out_nlmsg_trim; + + nlmsg_end(nlskb, nlh); + return 0; + +out_nlmsg_trim: + nlmsg_cancel(nlskb, nlh); + return -EMSGSIZE; +} + +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) +{ + struct xdp_diag_req *req = nlmsg_data(cb->nlh); + struct net *net = sock_net(nlskb->sk); + int num = 0, s_num = cb->args[0]; + struct sock *sk; + + mutex_lock(&net->xdp.lock); + + sk_for_each(sk, &net->xdp.list) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num++ < s_num) + continue; + + if (xsk_diag_fill(sk, nlskb, req, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + num--; + break; + } + } + + mutex_unlock(&net->xdp.lock); + cb->args[0] = num; + return nlskb->len; +} + +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) +{ + struct netlink_dump_control c = { .dump = xsk_diag_dump }; + int hdrlen = sizeof(struct xdp_diag_req); + struct net *net = sock_net(nlskb->sk); + + if (nlmsg_len(hdr) < hdrlen) + return -EINVAL; + + if (!(hdr->nlmsg_flags & NLM_F_DUMP)) + return -EOPNOTSUPP; + + return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); +} + +static const struct sock_diag_handler xsk_diag_handler = { + .family = AF_XDP, + .dump = xsk_diag_handler_dump, +}; + +static int __init xsk_diag_init(void) +{ + return sock_diag_register(&xsk_diag_handler); +} + +static void __exit xsk_diag_exit(void) +{ + sock_diag_unregister(&xsk_diag_handler); +} + +module_init(xsk_diag_init); +module_exit(xsk_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); |