diff options
Diffstat (limited to 'net/ipv6')
67 files changed, 3863 insertions, 808 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2343e4f2e0bf..e2afe677a9d9 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -75,6 +75,19 @@ config INET6_ESP If unsure, say Y. +config INET6_ESP_OFFLOAD + tristate "IPv6: ESP transformation offload" + depends on INET6_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL @@ -208,6 +221,7 @@ config IPV6_TUNNEL tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL select DST_CACHE + select GRO_CELLS ---help--- Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in RFC 2473. @@ -289,4 +303,39 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_SEG6_LWTUNNEL + bool "IPv6: Segment Routing Header encapsulation support" + depends on IPV6 + select LWTUNNEL + ---help--- + Support for encapsulation of packets within an outer IPv6 + header and a Segment Routing Header using the lightweight + tunnels mechanism. + + If unsure, say N. + +config IPV6_SEG6_INLINE + bool "IPv6: direct Segment Routing Header insertion " + depends on IPV6_SEG6_LWTUNNEL + ---help--- + Support for direct insertion of the Segment Routing Header, + also known as inline mode. Be aware that direct insertion of + extension headers (as opposed to encapsulation) may break + multiple mechanisms such as PMTUD or IPSec AH. Use this feature + only if you know exactly what you are doing. + + If unsure, say N. + +config IPV6_SEG6_HMAC + bool "IPv6: Segment Routing HMAC support" + depends on IPV6 + select CRYPTO_HMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + ---help--- + Support for HMAC signature generation and verification + of SR-enabled packets. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c174ccb340a1..217e9ff0e24b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o + udp_offload.o seg6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -23,11 +23,14 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-$(CONFIG_NETLABEL) += calipso.o +ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o +ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o +obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4bc5ba3ae452..80ce478c4851 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -43,6 +43,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> @@ -238,6 +239,12 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif + .enhanced_dad = 1, + .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -284,6 +291,12 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif + .enhanced_dad = 1, + .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, }; /* Check if a valid qdisc is available */ @@ -376,9 +389,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); if (ndev->cnf.stable_secret.initialized) - ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; else - ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; + ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode; ndev->cnf.mtu6 = dev->mtu; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -2134,12 +2147,14 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) case ARPHRD_SIT: return addrconf_ifid_sit(eui, dev); case ARPHRD_IPGRE: + case ARPHRD_TUNNEL: return addrconf_ifid_gre(eui, dev); case ARPHRD_6LOWPAN: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); case ARPHRD_TUNNEL6: + case ARPHRD_IP6GRE: return addrconf_ifid_ip6tnl(eui, dev); } return -1; @@ -2377,8 +2392,8 @@ static void manage_tempaddrs(struct inet6_dev *idev, static bool is_addr_mode_generate_stable(struct inet6_dev *idev) { - return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || - idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; + return idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; } int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, @@ -3142,7 +3157,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - switch (idev->addr_gen_mode) { + switch (idev->cnf.addr_gen_mode) { case IN6_ADDR_GEN_MODE_RANDOM: ipv6_gen_mode_random_init(idev); /* fallthrough */ @@ -3183,6 +3198,9 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN) && + (dev->type != ARPHRD_IP6GRE) && + (dev->type != ARPHRD_IPGRE) && + (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ return; @@ -3194,8 +3212,8 @@ static void addrconf_dev_config(struct net_device *dev) /* this device type has no EUI support */ if (dev->type == ARPHRD_NONE && - idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) - idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; addrconf_addr_gen(idev, false); } @@ -3376,9 +3394,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } if (idev) { - if (idev->if_flags & IF_READY) - /* device is already configured. */ + if (idev->if_flags & IF_READY) { + /* device is already configured - + * but resend MLD reports, we might + * have roamed and need to update + * multicast snooping switches + */ + ipv6_mc_up(idev); break; + } idev->if_flags |= IF_READY; } @@ -3602,14 +3626,19 @@ restart: INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { struct rt6_info *rt = NULL; + bool keep; addrconf_del_dad_work(ifa); + keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr); + if (!keep) + list_move(&ifa->if_list, &del_list); + write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); - if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && - !addr_is_local(&ifa->addr)) { + if (keep) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; ifa->state = 0; @@ -3621,8 +3650,6 @@ restart: } else { state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; - - list_move(&ifa->if_list, &del_list); } spin_unlock_bh(&ifa->lock); @@ -3727,12 +3754,21 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { unsigned long rand_num; struct inet6_dev *idev = ifp->idev; + u64 nonce; if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); + nonce = 0; + if (idev->cnf.enhanced_dad || + dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) { + do + get_random_bytes(&nonce, 6); + while (nonce == 0); + } + ifp->dad_nonce = nonce; ifp->dad_probes = idev->cnf.dad_transmits; addrconf_mod_dad_work(ifp, rand_num); } @@ -3910,7 +3946,8 @@ static void addrconf_dad_work(struct work_struct *w) /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any); + ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, + ifp->dad_nonce); out: in6_ifa_put(ifp); rtnl_unlock(); @@ -3989,6 +4026,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) if (bump_id) rt_genid_bump_ipv6(dev_net(dev)); + + /* Make sure that a new temporary address will be created + * before this temporary address becomes deprecated. + */ + if (ifp->flags & IFA_F_TEMPORARY) + addrconf_verify_rtnl(); } static void addrconf_dad_run(struct inet6_dev *idev) @@ -4868,6 +4911,13 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) struct net *net = dev_net(ifa->idev->dev); int err = -ENOBUFS; + /* Don't send DELADDR notification for TENTATIVE address, + * since NEWADDR notification is sent only after removing + * TENTATIVE flag. + */ + if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR) + return; + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); if (!skb) goto errout; @@ -4950,6 +5000,12 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; +#endif + array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; + array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; } static inline size_t inet6_ifla6_size(void) @@ -5061,7 +5117,7 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, if (!nla) goto nla_put_failure; - if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) goto nla_put_failure; read_lock_bh(&idev->lock); @@ -5179,6 +5235,26 @@ static int inet6_validate_link_af(const struct net_device *dev, return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); } +static int check_addr_gen_mode(int mode) +{ + if (mode != IN6_ADDR_GEN_MODE_EUI64 && + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + mode != IN6_ADDR_GEN_MODE_RANDOM) + return -EINVAL; + return 1; +} + +static int check_stable_privacy(struct inet6_dev *idev, struct net *net, + int mode) +{ + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !net->ipv6.devconf_dflt->stable_secret.initialized) + return -EINVAL; + return 1; +} + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) { int err = -EINVAL; @@ -5200,18 +5276,11 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (tb[IFLA_INET6_ADDR_GEN_MODE]) { u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); - if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE && - mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && - mode != IN6_ADDR_GEN_MODE_RANDOM) - return -EINVAL; - - if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && - !idev->cnf.stable_secret.initialized && - !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) + if (check_addr_gen_mode(mode) < 0 || + check_stable_privacy(idev, dev_net(dev), mode) < 0) return -EINVAL; - idev->addr_gen_mode = mode; + idev->cnf.addr_gen_mode = mode; err = 0; } @@ -5515,8 +5584,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -5525,7 +5593,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf) dev_disable_change(idev); } } - rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) @@ -5620,6 +5687,55 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret = 0; + int new_val; + struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; + struct net *net = (struct net *)ctl->extra2; + + if (!rtnl_trylock()) + return restart_syscall(); + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write) { + new_val = *((int *)ctl->data); + + if (check_addr_gen_mode(new_val) < 0) { + ret = -EINVAL; + goto out; + } + + /* request for default */ + if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) { + ipv6_devconf_dflt.addr_gen_mode = new_val; + + /* request for individual net device */ + } else { + if (!idev) + goto out; + + if (check_stable_privacy(idev, net, new_val) < 0) { + ret = -EINVAL; + goto out; + } + + if (idev->cnf.addr_gen_mode != new_val) { + idev->cnf.addr_gen_mode = new_val; + addrconf_dev_config(idev->dev); + } + } + } + +out: + rtnl_unlock(); + + return ret; +} + static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -5670,14 +5786,14 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, struct inet6_dev *idev = __in6_dev_get(dev); if (idev) { - idev->addr_gen_mode = + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; } } } else { struct inet6_dev *idev = ctl->extra1; - idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; } out: @@ -6042,6 +6158,36 @@ static const struct ctl_table addrconf_sysctl[] = { }, { + .procname = "seg6_enabled", + .data = &ipv6_devconf.seg6_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IPV6_SEG6_HMAC + { + .procname = "seg6_require_hmac", + .data = &ipv6_devconf.seg6_require_hmac, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif + { + .procname = "enhanced_dad", + .data = &ipv6_devconf.enhanced_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "addr_gen_mode", + .data = &ipv6_devconf.addr_gen_mode, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_addr_gen_mode, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..a9a9553ee63d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,8 +61,9 @@ #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> +#include <net/seg6.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/mroute6.h> #include "ip6_offload.h" @@ -257,6 +258,14 @@ lookup_protocol: goto out; } } + + if (!kern) { + err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); + if (err) { + sk_common_release(sk); + goto out; + } + } out: return err; out_rcu_unlock: @@ -293,7 +302,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; snum = ntohs(addr->sin6_port); - if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) + if (snum && snum < inet_prot_sock(net) && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; lock_sock(sk); @@ -678,6 +688,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); @@ -909,12 +920,12 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - err = icmpv6_init(); - if (err) - goto icmp_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; + err = icmpv6_init(); + if (err) + goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -990,6 +1001,10 @@ static int __init inet6_init(void) if (err) goto calipso_fail; + err = seg6_init(); + if (err) + goto seg6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1000,8 +1015,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - calipso_exit(); + seg6_exit(); #endif +seg6_fail: + calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: @@ -1044,10 +1061,10 @@ igmp_fail: ndisc_cleanup(); ndisc_fail: ip6_mr_cleanup(); -ipmr_fail: - icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); +ipmr_fail: + icmpv6_cleanup(); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..dda6035e3b84 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -474,6 +474,9 @@ static void ah6_input_done(struct crypto_async_request *base, int err) int hdr_len = skb_network_header_len(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + if (err) + goto out; + work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); @@ -662,9 +665,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ccf40550c475..e011122ebd43 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -33,7 +33,7 @@ #include <net/dsfield.h> #include <linux/errqueue.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> static bool ipv6_mapped_addr_any(const struct in6_addr *a) { @@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; @@ -166,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (np->sndflow) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type == IPV6_ADDR_ANY) { + if (ipv6_addr_any(&usin->sin6_addr)) { /* * connect to self */ - usin->sin6_addr.s6_addr[15] = 0x01; + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; } + addr_type = ipv6_addr_type(&usin->sin6_addr); + daddr = &usin->sin6_addr; - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { struct sockaddr_in sin; if (__ipv6_only_sock(sk)) { @@ -400,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) * At one point, excluding local errors was a quick test to identify icmp/icmp6 * errors. This is no longer true, but the test remained, so the v6 stack, * unlike v4, also honors cmsg requests on all wifi and timestamp errors. - * - * Timestamp code paths do not initialize the fields expected by cmsg: - * the PKTINFO fields in skb->cb[]. Fill those in here. */ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, struct sock_exterr_skb *serr) @@ -414,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) return false; - if (!skb->dev) + if (!IP6CB(skb)->iif) return false; - if (skb->protocol == htons(ETH_P_IPV6)) - IP6CB(skb)->iif = skb->dev->ifindex; - else - PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; - return true; } @@ -700,7 +697,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sockaddr_in6 sin6; __be16 *ports = (__be16 *) skb_transport_header(skb); - if (skb_transport_offset(skb) + 4 <= skb->len) { + if (skb_transport_offset(skb) + 4 <= (int)skb->len) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. @@ -717,6 +714,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } + if (np->rxopt.bits.recvfragsize && opt->frag_max_size) { + int val = opt->frag_max_size; + + put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val); + } } void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 111ba55fd512..ff54faa75631 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -44,6 +44,8 @@ #include <net/protocol.h> #include <linux/icmpv6.h> +#include <linux/highmem.h> + struct esp_skb_cb { struct xfrm_skb_cb xfrm; void *tmp; @@ -114,11 +116,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } +static void esp_ssg_unref(struct xfrm_state *x, void *tmp) +{ + __be32 *seqhi; + struct crypto_aead *aead = x->data; + int seqhilen = 0; + u8 *iv; + struct aead_request *req; + struct scatterlist *sg; + + if (x->props.flags & XFRM_STATE_ESN) + seqhilen += sizeof(__be32); + + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); + req = esp_tmp_req(aead, iv); + + /* Unref skb_frag_pages in the src scatterlist if necessary. + * Skip the first sg which comes from skb->data. + */ + if (req->src != req->dst) + for (sg = sg_next(req->src); sg; sg = sg_next(sg)) + put_page(sg_page(sg)); +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + void *tmp; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; - kfree(ESP_SKB_CB(skb)->tmp); + tmp = ESP_SKB_CB(skb)->tmp; + esp_ssg_unref(x, tmp); + kfree(tmp); xfrm_output_resume(skb, err); } @@ -138,6 +169,27 @@ static void esp_output_restore_header(struct sk_buff *skb) esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); } +static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, + struct ip_esp_hdr *esph, + __be32 *seqhi) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); + *seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + } + + esph->spi = x->id.spi; + + return esph; +} + static void esp_output_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -146,14 +198,31 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) +{ + /* Fill padding... */ + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } + do { + int i; + for (i = 0; i < plen - 2; i++) + tail[i] = i + 1; + } while (0); + tail[plen - 2] = plen - 2; + tail[plen - 1] = proto; +} + static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; - struct scatterlist *sg; + struct scatterlist *sg, *dsg; struct sk_buff *trailer; + struct page *page; void *tmp; int blksize; int clen; @@ -164,10 +233,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int nfrags; int assoclen; int seqhilen; + int tailen; u8 *iv; u8 *tail; + u8 *vaddr; __be32 *seqhi; __be64 seqno; + __u8 proto = *skb_mac_header(skb); /* skb is pure payload to encrypt */ aead = x->data; @@ -186,11 +258,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); plen = clen - skb->len - tfclen; - - err = skb_cow_data(skb, tfclen + plen + alen, &trailer); - if (err < 0) - goto error; - nfrags = err; + tailen = tfclen + plen + alen; assoclen = sizeof(*esph); seqhilen = 0; @@ -200,59 +268,152 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) assoclen += seqhilen; } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); - if (!tmp) { - err = -ENOMEM; - goto error; + *skb_mac_header(skb) = IPPROTO_ESP; + esph = ip_esp_hdr(skb); + + if (!skb_cloned(skb)) { + if (tailen <= skb_availroom(skb)) { + nfrags = 1; + trailer = skb; + tail = skb_tail_pointer(trailer); + + goto skip_cow; + } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS) + && !skb_has_frag_list(skb)) { + int allocsize; + struct sock *sk = skb->sk; + struct page_frag *pfrag = &x->xfrag; + + allocsize = ALIGN(tailen, L1_CACHE_BYTES); + + spin_lock_bh(&x->lock); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + goto cow; + } + + page = pfrag->page; + get_page(page); + + vaddr = kmap_atomic(page); + + tail = vaddr + pfrag->offset; + + esp_output_fill_trailer(tail, tfclen, plen, proto); + + kunmap_atomic(vaddr); + + nfrags = skb_shinfo(skb)->nr_frags; + + __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, + tailen); + skb_shinfo(skb)->nr_frags = ++nfrags; + + pfrag->offset = pfrag->offset + allocsize; + nfrags++; + + skb->len += tailen; + skb->data_len += tailen; + skb->truesize += tailen; + if (sk) + atomic_add(tailen, &sk->sk_wmem_alloc); + + skb_push(skb, -skb_network_offset(skb)); + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esph->spi = x->id.spi; + + tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen); + if (!tmp) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = &sg[nfrags]; + + esph = esp_output_set_esn(skb, esph, seqhi); + + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + + skb_shinfo(skb)->nr_frags = 1; + + page = pfrag->page; + get_page(page); + /* replace page frags in skb with new page */ + __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); + pfrag->offset = pfrag->offset + allocsize; + + sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); + skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + spin_unlock_bh(&x->lock); + + goto skip_cow2; + } } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); - req = esp_tmp_req(aead, iv); - sg = esp_req_sg(aead, req); +cow: + err = skb_cow_data(skb, tailen, &trailer); + if (err < 0) + goto error; + nfrags = err; - /* Fill padding... */ tail = skb_tail_pointer(trailer); - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = *skb_mac_header(skb); - pskb_put(skb, trailer, clen - skb->len + alen); + esph = ip_esp_hdr(skb); +skip_cow: + esp_output_fill_trailer(tail, tfclen, plen, proto); + + pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); - esph = ip_esp_hdr(skb); - *skb_mac_header(skb) = IPPROTO_ESP; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esph->spi = x->id.spi; - aead_request_set_callback(req, 0, esp_output_done, skb); - - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * encryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - aead_request_set_callback(req, 0, esp_output_done_esn, skb); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + if (!tmp) { + err = -ENOMEM; + goto error; } - esph->spi = x->id.spi; + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = sg; + + esph = esp_output_set_esn(skb, esph, seqhi); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, assoclen + ivlen + clen + alen); - aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); +skip_cow2: + if ((x->props.flags & XFRM_STATE_ESN)) + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + else + aead_request_set_callback(req, 0, esp_output_done, skb); + + aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + @@ -278,6 +439,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esp_output_restore_header(skb); } + if (sg != dsg) + esp_ssg_unref(x, tmp); kfree(tmp); error: @@ -343,6 +506,23 @@ static void esp_input_restore_header(struct sk_buff *skb) __skb_pull(skb, 4); } +static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; + } +} + static void esp_input_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -378,14 +558,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - nfrags = skb_cow_data(skb, 0, &trailer); - if (nfrags < 0) { - ret = -EINVAL; - goto out; - } - - ret = -ENOMEM; - assoclen = sizeof(*esph); seqhilen = 0; @@ -394,6 +566,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) assoclen += seqhilen; } + if (!skb_cloned(skb)) { + if (!skb_is_nonlinear(skb)) { + nfrags = 1; + + goto skip_cow; + } else if (!skb_has_frag_list(skb)) { + nfrags = skb_shinfo(skb)->nr_frags; + nfrags++; + + goto skip_cow; + } + } + + nfrags = skb_cow_data(skb, 0, &trailer); + if (nfrags < 0) { + ret = -EINVAL; + goto out; + } + +skip_cow: + ret = -ENOMEM; tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) goto out; @@ -404,26 +597,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); - skb->ip_summed = CHECKSUM_NONE; + esp_input_set_header(skb, seqhi); - esph = (struct ip_esp_hdr *)skb->data; + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); - aead_request_set_callback(req, 0, esp_input_done, skb); + skb->ip_summed = CHECKSUM_NONE; - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * decryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)skb_push(skb, 4); - *seqhi = esph->spi; - esph->spi = esph->seq_no; - esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; + if ((x->props.flags & XFRM_STATE_ESN)) aead_request_set_callback(req, 0, esp_input_done_esn, skb); - } - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + else + aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); @@ -474,9 +658,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c new file mode 100644 index 000000000000..d914eb93204a --- /dev/null +++ b/net/ipv6/esp6_offload.c @@ -0,0 +1,108 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/protocol.h> +#include <crypto/aead.h> +#include <crypto/authenc.h> +#include <linux/err.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/xfrm.h> +#include <net/esp.h> +#include <linux/scatterlist.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <net/ip6_route.h> +#include <net/ipv6.h> +#include <linux/icmpv6.h> + +static struct sk_buff **esp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp6_offload = { + .callbacks = { + .gro_receive = esp6_gro_receive, + }, +}; + +static int __init esp6_offload_init(void) +{ + return inet6_add_offload(&esp6_offload, IPPROTO_ESP); +} + +static void __exit esp6_offload_exit(void) +{ + inet6_del_offload(&esp6_offload, IPPROTO_ESP); +} + +module_init(esp6_offload_init); +module_exit(esp6_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 139ceb68bd37..25192a3b0cd7 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,6 +47,11 @@ #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif +#include <linux/seg6.h> +#include <net/seg6.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif #include <linux/uaccess.h> @@ -227,7 +232,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) ipv6h->saddr = hao->addr; hao->addr = tmp_addr; - if (skb->tstamp.tv64 == 0) + if (skb->tstamp == 0) __net_timestamp(skb); return true; @@ -286,6 +291,156 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } +static void seg6_update_csum(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *hdr; + struct in6_addr *addr; + __be32 from, to; + + /* srh is at transport offset and seg_left is already decremented + * but daddr is not yet updated with next segment + */ + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + addr = hdr->segments + hdr->segments_left; + + hdr->segments_left++; + from = *(__be32 *)hdr; + + hdr->segments_left--; + to = *(__be32 *)hdr; + + /* update skb csum with diff resulting from seg_left decrement */ + + update_csum_diff4(skb, from, to); + + /* compute csum diff between current and next segment and update */ + + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), + (__be32 *)addr); +} + +static int ipv6_srh_rcv(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *hdr; + struct inet6_dev *idev; + struct in6_addr *addr; + int accept_seg6; + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + idev = __in6_dev_get(skb->dev); + + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; + if (accept_seg6 > idev->cnf.seg6_enabled) + accept_seg6 = idev->cnf.seg6_enabled; + + if (!accept_seg6) { + kfree_skb(skb); + return -1; + } + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (!seg6_hmac_validate_skb(skb)) { + kfree_skb(skb); + return -1; + } +#endif + +looped_back: + if (hdr->segments_left == 0) { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + hdr->segments_left--; + addr = hdr->segments + hdr->segments_left; + + skb_push(skb, sizeof(struct ipv6hdr)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + seg6_update_csum(skb); + + ipv6_hdr(skb)->daddr = *addr; + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -326,6 +481,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } + /* segment routing */ + if (hdr->type == IPV6_SRCRT_TYPE_4) + return ipv6_srh_rcv(skb); + looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { @@ -679,9 +838,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb) * for headers. */ -static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p) +static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -704,6 +863,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, *proto = NEXTHDR_ROUTING; } +static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + struct ipv6_sr_hdr *sr_phdr, *sr_ihdr; + int plen, hops; + + sr_ihdr = (struct ipv6_sr_hdr *)opt; + plen = (sr_ihdr->hdrlen + 1) << 3; + + sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen); + memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr)); + + hops = sr_ihdr->first_segment + 1; + memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1, + (hops - 1) * sizeof(struct in6_addr)); + + sr_phdr->segments[0] = **addr_p; + *addr_p = &sr_ihdr->segments[hops - 1]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(sr_phdr)) { + struct net *net = NULL; + + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + + WARN_ON(!net); + + if (net) + seg6_push_hmac(net, saddr, sr_phdr); + } +#endif + + sr_phdr->nexthdr = *proto; + *proto = NEXTHDR_ROUTING; +} + +static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + switch (opt->type) { + case IPV6_SRCRT_TYPE_0: + ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); + break; + case IPV6_SRCRT_TYPE_4: + ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); + break; + default: + break; + } +} + static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) { struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); @@ -715,10 +930,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, - struct in6_addr **daddr) + struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { - ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); + ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). @@ -945,7 +1160,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, return NULL; *orig = fl6->daddr; - fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + + switch (opt->srcrt->type) { + case IPV6_SRCRT_TYPE_0: + fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + break; + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; + + fl6->daddr = srh->segments[srh->first_segment]; + break; + } + default: + return NULL; + } + return orig; } EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2772004ba5a1..230b5aac9f03 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -70,7 +70,7 @@ #include <net/dsfield.h> #include <net/l3mdev.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* * The ICMP socket(s). This is the most convenient way to flow control @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -109,19 +110,17 @@ static const struct inet6_protocol icmpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +/* Called with BH disabled */ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) { struct sock *sk; - local_bh_disable(); - sk = icmpv6_sk(net); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. */ - local_bh_enable(); return NULL; } return sk; @@ -129,7 +128,7 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) static __inline__ void icmpv6_xmit_unlock(struct sock *sk) { - spin_unlock_bh(&sk->sk_lock.slock); + spin_unlock(&sk->sk_lock.slock); } /* @@ -167,6 +166,30 @@ static bool is_ineligible(const struct sk_buff *skb) return false; } +static bool icmpv6_mask_allow(int type) +{ + /* Informational messages are not limited. */ + if (type & ICMPV6_INFOMSG_MASK) + return true; + + /* Do not limit pmtu discovery, it would break it. */ + if (type == ICMPV6_PKT_TOOBIG) + return true; + + return false; +} + +static bool icmpv6_global_allow(int type) +{ + if (icmpv6_mask_allow(type)) + return true; + + if (icmp_global_allow()) + return true; + + return false; +} + /* * Check the ICMP output rate limit */ @@ -177,12 +200,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct dst_entry *dst; bool res = false; - /* Informational messages are not limited. */ - if (type & ICMPV6_INFOMSG_MASK) - return true; - - /* Do not limit pmtu discovery, it would break it. */ - if (type == ICMPV6_PKT_TOOBIG) + if (icmpv6_mask_allow(type)) return true; /* @@ -199,20 +217,16 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; + struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - if (icmp_global_allow()) { - struct inet_peer *peer; - - peer = inet_getpeer_v6(net->ipv6.peers, - &fl6->daddr, 1); - res = inet_peer_xrlim_allow(peer, tmo); - if (peer) - inet_putpeer(peer); - } + peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); + res = inet_peer_xrlim_allow(peer, tmo); + if (peer) + inet_putpeer(peer); } dst_release(dst); return res; @@ -473,6 +487,13 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, return; } + /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ + local_bh_disable(); + + /* Check global sysctl_icmp_msgs_per_sec ratelimit */ + if (!icmpv6_global_allow(type)) + goto out_bh_enable; + mip6_addr_swap(skb); memset(&fl6, 0, sizeof(fl6)); @@ -486,11 +507,13 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (!sk) - return; + goto out_bh_enable; + sk->sk_mark = mark; np = inet6_sk(sk); @@ -550,6 +573,8 @@ out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); +out_bh_enable: + local_bh_enable(); } /* Slightly more convenient version of icmp6_send. @@ -660,11 +685,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + local_bh_disable(); sk = icmpv6_xmit_lock(net); if (!sk) - return; + goto out_bh_enable; sk->sk_mark = mark; np = inet6_sk(sk); @@ -706,6 +733,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) dst_release(dst); out: icmpv6_xmit_unlock(sk); +out_bh_enable: + local_bh_enable(); } void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index e50c27a93e17..ce1aae4a7fc8 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -6,29 +6,88 @@ #include <linux/socket.h> #include <linux/types.h> #include <net/checksum.h> +#include <net/dst_cache.h> #include <net/ip.h> #include <net/ip6_fib.h> +#include <net/ip6_route.h> #include <net/lwtunnel.h> #include <net/protocol.h> #include <uapi/linux/ila.h> #include "ila.h" +struct ila_lwt { + struct ila_params p; + struct dst_cache dst_cache; + u32 connected : 1; +}; + +static inline struct ila_lwt *ila_lwt_lwtunnel( + struct lwtunnel_state *lwt) +{ + return (struct ila_lwt *)lwt->data; +} + static inline struct ila_params *ila_params_lwtunnel( - struct lwtunnel_state *lwstate) + struct lwtunnel_state *lwt) { - return (struct ila_params *)lwstate->data; + return &ila_lwt_lwtunnel(lwt)->p; } static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *orig_dst = skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)orig_dst; + struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); + struct dst_entry *dst; + int err = -EINVAL; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), + true); - return dst->lwtstate->orig_output(net, sk, skb); + if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { + /* Already have a next hop address in route, no need for + * dest cache route. + */ + return orig_dst->lwtstate->orig_output(net, sk, skb); + } + + dst = dst_cache_get(&ilwt->dst_cache); + if (unlikely(!dst)) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct flowi6 fl6; + + /* Lookup a route for the new destination. Take into + * account that the base route may already have a gateway. + */ + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = orig_dst->dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; + fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, + &ip6h->daddr); + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = -EHOSTUNREACH; + dst_release(dst); + goto drop; + } + + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto drop; + } + + if (ilwt->connected) + dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); + } + + skb_dst_set(skb, dst); + return dst_output(net, sk, skb); drop: kfree_skb(skb); @@ -56,13 +115,13 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; -static int ila_build_state(struct net_device *dev, struct nlattr *nla, +static int ila_build_state(struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { + struct ila_lwt *ilwt; struct ila_params *p; struct nlattr *tb[ILA_ATTR_MAX + 1]; - size_t encap_len = sizeof(*p); struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; @@ -71,7 +130,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (family != AF_INET6) return -EINVAL; - if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) { + if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { /* Need to have full locator and at least type field * included in destination */ @@ -95,11 +154,17 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; - newts = lwtunnel_state_alloc(encap_len); + newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) return -ENOMEM; - newts->len = encap_len; + ilwt = ila_lwt_lwtunnel(newts); + ret = dst_cache_init(&ilwt->dst_cache, GFP_ATOMIC); + if (ret) { + kfree(newts); + return ret; + } + p = ila_params_lwtunnel(newts); p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); @@ -120,11 +185,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | LWTUNNEL_STATE_INPUT_REDIRECT; + if (cfg6->fc_dst_len == 8 * sizeof(struct in6_addr)) + ilwt->connected = 1; + *ts = newts; return 0; } +static void ila_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&ila_lwt_lwtunnel(lwt)->dst_cache); +} + static int ila_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { @@ -159,11 +232,13 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops ila_encap_ops = { .build_state = ila_build_state, + .destroy_state = ila_destroy_state, .output = ila_output, .input = ila_input, .fill_encap = ila_fill_encap_info, .get_encap_size = ila_encap_nlsize, .cmp_encap = ila_encap_cmp, + .owner = THIS_MODULE, }; int ila_lwt_init(void) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e604013dd814..af8f52ee7180 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = ila_cmpfn, }; -static struct genl_family ila_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = ILA_GENL_NAME, - .version = ILA_GENL_VERSION, - .maxattr = ILA_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; +static struct genl_family ila_nl_family; static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, @@ -482,7 +474,15 @@ static int ila_nl_dump_start(struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, GFP_KERNEL); @@ -490,16 +490,18 @@ static int ila_nl_dump_start(struct netlink_callback *cb) static int ila_nl_dump_done(struct netlink_callback *cb) { - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; rhashtable_walk_exit(&iter->rhiter); + kfree(iter); + return 0; } static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; struct rhashtable_iter *rhiter = &iter->rhiter; struct ila_map *ila; int ret; @@ -561,6 +563,18 @@ static const struct genl_ops ila_nl_ops[] = { }, }; +static struct genl_family ila_nl_family __ro_after_init = { + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .ops = ila_nl_ops, + .n_ops = ARRAY_SIZE(ila_nl_ops), +}; + #define ILA_HASH_TABLE_SIZE 1024 static __net_init int ila_init_net(struct net *net) @@ -623,7 +637,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) return 0; } -int ila_xlat_init(void) +int __init ila_xlat_init(void) { int ret; @@ -631,8 +645,7 @@ int ila_xlat_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&ila_nl_family, - ila_nl_ops); + ret = genl_register_family(&ila_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 532c3ef282c5..9a31d13bf180 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -28,45 +28,6 @@ #include <net/inet6_connection_sock.h> #include <net/sock_reuseport.h> -int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax) -{ - const struct sock *sk2; - int reuse = sk->sk_reuse; - int reuseport = sk->sk_reuseport; - kuid_t uid = sock_i_uid((struct sock *)sk); - - /* We must walk the whole port owner list in this case. -DaveM */ - /* - * See comment in inet_csk_bind_conflict about sock lookup - * vs net namespaces issues. - */ - sk_for_each_bound(sk2, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, - sock_i_uid((struct sock *)sk2))))) { - if (ipv6_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && - sk2->sk_state != TCP_LISTEN && - ipv6_rcv_saddr_equal(sk, sk2, true)) - break; - } - } - - return sk2 != NULL; -} -EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); - struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, @@ -88,6 +49,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -136,6 +98,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); @@ -173,7 +136,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; - res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), np->tclass); rcu_read_unlock(); return res; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 02761c9fe43e..d0900918a19e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -268,54 +268,10 @@ int inet6_hash(struct sock *sk) if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + err = __inet_hash(sk, NULL); local_bh_enable(); } return err; } EXPORT_SYMBOL_GPL(inet6_hash); - -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only - */ -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) -{ - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { - if (!sk2_ipv6only) { - if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) - return 1; - if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) - return match_wildcard; - } - return 0; - } - - if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && match_wildcard && - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) - return 1; - - return 0; -} -EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ef5485204522..d4bf2c68a545 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -318,6 +318,16 @@ static int fib6_dump_node(struct fib6_walker *w) w->leaf = rt; return 1; } + + /* Multipath routes are dumped in one route with the + * RTA_MULTIPATH attribute. Jump 'rt' to point to the + * last sibling of this route (no need to dump the + * sibling routes again) + */ + if (rt->rt6i_nsiblings) + rt = list_last_entry(&rt->rt6i_siblings, + struct rt6_info, + rt6i_siblings); } w->leaf = NULL; return 0; @@ -746,6 +756,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, u16 nlflags = NLM_F_EXCL; int err; + if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND)) + nlflags |= NLM_F_APPEND; + ins = &fn->leaf; for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { @@ -868,7 +881,8 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + if (!info->skip_notify) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { @@ -894,7 +908,8 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); + if (!info->skip_notify) + inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; @@ -908,6 +923,8 @@ add: ins = &rt->dst.rt6_next; iter = *ins; while (iter) { + if (iter->rt6i_metric > rt->rt6i_metric) + break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); @@ -1439,7 +1456,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fib6_purge_rt(rt, fn, net); - inet6_rt_notify(RTM_DELROUTE, rt, info, 0); + if (!info->skip_notify) + inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b912f0dbaf72..8081bafe441b 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -29,7 +29,7 @@ #include <net/rawv6.h> #include <net/transp_v6.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified in old IPv6 RFC. Well, it was reasonable value. diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b..6fcb7cb49bb2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -64,7 +64,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define IP6_GRE_HASH_SIZE_SHIFT 5 #define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) -static int ip6gre_net_id __read_mostly; +static unsigned int ip6gre_net_id __read_mostly; struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; @@ -367,35 +367,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (!t) return; @@ -484,11 +486,6 @@ drop: return 0; } -struct ipv6_tel_txoption { - struct ipv6_txoptions ops; - __u8 dst_opt[8]; -}; - static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, @@ -548,6 +545,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -580,6 +579,9 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) return -1; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); + if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; @@ -994,6 +998,9 @@ static void ip6gre_tunnel_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); } static int ip6gre_tunnel_init_common(struct net_device *dev) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4bce153..c45b12b4431c 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); /* * RFC4291 2.5.3 + * The loopback address must not be used as the source address in IPv6 + * packets that are sent outside of a single node. [..] * A packet received on an interface with a destination address * of loopback must be dropped. */ - if (!(dev->flags & IFF_LOOPBACK) && - ipv6_addr_loopback(&hdr->daddr)) + if ((ipv6_addr_loopback(&hdr->saddr) || + ipv6_addr_loopback(&hdr->daddr)) && + !(dev->flags & IFF_LOOPBACK)) goto err; /* RFC4291 Errata ID: 3480 diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 89c59e656f44..93e58a5e1837 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); + skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); @@ -252,7 +253,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } @@ -293,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; - if (skb->encapsulation) + if (skb->encapsulation) { + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); skb_set_inner_network_header(skb, nhoff); + } iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59eb4ed99ce8..58f6288e9ba5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -39,6 +39,7 @@ #include <linux/module.h> #include <linux/slab.h> +#include <linux/bpf-cgroup.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -118,7 +119,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { - ret = dst_neigh_output(dst, neigh, skb); + sock_confirm_neigh(skb, neigh); + ret = neigh_output(neigh, skb); rcu_read_unlock_bh(); return ret; } @@ -131,6 +133,14 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + int ret; + + ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); + if (ret) { + kfree_skb(skb); + return ret; + } + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) @@ -163,7 +173,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) * which are using proper atomic operations or spinlocks. */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - struct ipv6_txoptions *opt, int tclass) + __u32 mark, struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); @@ -203,7 +213,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, + &fl6->saddr); } skb_push(skb, sizeof(struct ipv6hdr)); @@ -230,7 +241,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { @@ -624,7 +635,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { - int first_len = skb_pagelen(skb); + unsigned int first_len = skb_pagelen(skb); struct sk_buff *frag2; if (first_len - hlen > mtu || @@ -757,13 +768,14 @@ slow_path: * Fragment the datagram. */ - *prevhdr = NEXTHDR_FRAGMENT; troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. */ while (left > 0) { + u8 *fragnexthdr_offset; + len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -808,6 +820,10 @@ slow_path: */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); + fragnexthdr_offset = skb_network_header(frag); + fragnexthdr_offset += prevhdr - skb_network_header(skb); + *fragnexthdr_offset = NEXTHDR_FRAGMENT; + /* * Build fragment header. */ @@ -1011,6 +1027,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } } #endif + if (ipv6_addr_v4mapped(&fl6->saddr) && + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { + err = -EAFNOSUPPORT; + goto out_err_release; + } return 0; @@ -1134,6 +1155,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1334,7 +1358,7 @@ emsgsize: */ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && - length < mtu - headersize && + length <= mtu - headersize && !(flags & MSG_MORE) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; @@ -1363,10 +1387,10 @@ emsgsize: */ cork->length += length; - if (((length > mtu) || + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, @@ -1506,6 +1530,9 @@ alloc_new_skb: exthdrlen = 0; dst_exthdrlen = 0; + if ((flags & MSG_CONFIRM) && !skb_prev) + skb_set_dst_pending_confirm(skb, 1); + /* * Put the packet on the pending queue */ @@ -1672,7 +1699,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); + ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d76674efe523..75fac933c209 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -42,7 +42,7 @@ #include <linux/hash.h> #include <linux/etherdevice.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/atomic.h> #include <net/icmp.h> @@ -83,7 +83,7 @@ static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static struct rtnl_link_ops ip6_link_ops __read_mostly; -static int ip6_tnl_net_id __read_mostly; +static unsigned int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; @@ -400,18 +400,19 @@ ip6_tnl_dev_uninit(struct net_device *dev) __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; - __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof(*ipv6h); + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; + unsigned int nhoff = raw - skb->data; + unsigned int off = nhoff + sizeof(*ipv6h); + u8 next, nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { - __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof(*hdr) > skb->data && - !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) + u16 optlen; + + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; - hdr = (struct ipv6_opt_hdr *) (raw + off); + hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; if (frag_hdr->frag_off) @@ -422,20 +423,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } else { optlen = ipv6_optlen(hdr); } + /* cache hdr->nexthdr, since pskb_may_pull() might + * invalidate hdr + */ + next = hdr->nexthdr; if (nexthdr == NEXTHDR_DEST) { - __u16 i = off + 2; + u16 i = 2; + + /* Remember : hdr is no longer valid at this point. */ + if (!pskb_may_pull(skb, off + optlen)) + break; + while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ - if (i + sizeof (*tel) > off + optlen) + if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; + tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) - return i; + return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; @@ -443,7 +453,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) i++; } } - nexthdr = hdr->nexthdr; + nexthdr = next; off += optlen; } return 0; @@ -1108,7 +1118,7 @@ route_lookup: t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen; + mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1117,7 +1127,7 @@ route_lookup: mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; @@ -1166,7 +1176,7 @@ route_lookup: if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL); } /* Calculate max headroom for all the headers and adjust @@ -1248,6 +1258,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1301,6 +1313,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowlabel = key->label; } else { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; @@ -1326,6 +1340,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1645,7 +1661,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) struct ip6_tnl *tnl = netdev_priv(dev); if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < 68) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } else { if (new_mtu < IPV6_MIN_MTU) @@ -1798,6 +1814,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 0xFFF8 - dev->hard_header_len; return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c299c1e2bbf0..3d8a3b63b4fd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -49,6 +49,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/etherdevice.h> #define IP6_VTI_HASH_SIZE_SHIFT 5 #define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT) @@ -64,7 +65,7 @@ static int vti6_dev_init(struct net_device *dev); static void vti6_dev_setup(struct net_device *dev); static struct rtnl_link_ops vti6_link_ops __read_mostly; -static int vti6_net_id __read_mostly; +static unsigned int vti6_net_id __read_mostly; struct vti6_net { /* the vti6 tunnel fallback device */ struct net_device *fb_tnl_dev; @@ -189,12 +190,12 @@ static int vti6_tnl_create2(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; + dev->rtnl_link_ops = &vti6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &vti6_link_ops; dev_hold(dev); vti6_tnl_link(ip6n, t); @@ -484,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); - if (skb->protocol == htons(ETH_P_IPV6)) + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - else + } else { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } return -EMSGSIZE; } @@ -608,9 +613,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; @@ -691,6 +697,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; + if (u->i_key) + u->i_flags |= GRE_KEY; + if (u->o_key) + u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); @@ -812,30 +822,11 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -/** - * vti6_tnl_change_mtu - change mtu manually for tunnel device - * @dev: virtual device associated with tunnel - * @new_mtu: the new mtu - * - * Return: - * 0 on success, - * %-EINVAL if mtu too small - **/ -static int vti6_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < IPV6_MIN_MTU) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, - .ndo_change_mtu = vti6_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -855,9 +846,14 @@ static void vti6_dev_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); dev->mtu = ETH_DATA_LEN; + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = IP_MAX_MTU; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); } /** diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7f4265b1649b..bf34d0950752 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -16,7 +16,7 @@ * */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> @@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || + if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, sizeof(*pim), IPPROTO_PIM, @@ -774,7 +774,8 @@ failure: * Delete a VIF entry */ -static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) +static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, + struct list_head *head) { struct mif_device *v; struct net_device *dev; @@ -820,7 +821,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) dev->ifindex, &in6_dev->cnf); } - if (v->flags & MIFF_REGISTER) + if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); dev_put(dev); @@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct notifier_block *this, struct mr6_table *mrt; struct mif_device *v; int ct; - LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; @@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct notifier_block *this, v = &mrt->vif6_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(mrt, ct, &list); + mif6_delete(mrt, ct, 1, NULL); } } - unregister_netdevice_many(&list); return NOTIFY_DONE; } @@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all) for (i = 0; i < mrt->maxvif; i++) { if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) continue; - mif6_delete(mrt, i, &list); + mif6_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); @@ -1666,6 +1665,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; @@ -1677,9 +1680,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: - if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; @@ -1706,7 +1706,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(mrt, mifi, NULL); + ret = mif6_delete(mrt, mifi, 0, NULL); rtnl_unlock(); return ret; @@ -1815,6 +1815,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; @@ -2243,8 +2247,10 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mf6c_parent >= MAXMIFS) + if (c->mf6c_parent >= MAXMIFS) { + rtm->rtm_flags |= RTNH_F_UNRESOLVED; return -ENOENT; + } if (MIF_EXISTS(mrt, c->mf6c_parent) && nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) @@ -2286,7 +2292,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, } int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, - int nowait, u32 portid) + u32 portid) { int err; struct mr6_table *mrt; @@ -2313,11 +2319,6 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, struct net_device *dev; int vif; - if (nowait) { - read_unlock(&mrt_lock); - return -EAGAIN; - } - dev = skb->dev; if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { read_unlock(&mrt_lock); @@ -2355,7 +2356,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, return err; } - if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) + if (rtm->rtm_flags & RTM_F_NOTIFY) cache->mfc_flags |= MFC_NOTIFY; err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 636ec56f5f50..a531ba032b85 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -52,8 +52,9 @@ #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/seg6.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); @@ -430,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; #endif + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *) + opt->srcrt; + + if (!seg6_validate_srh(srh, optlen)) + goto sticky_done; + break; + } default: goto sticky_done; } @@ -585,16 +595,24 @@ done: if (val) { struct net_device *dev; + int midx; - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) - goto e_inval; + rcu_read_lock(); - dev = dev_get_by_index(net, val); + dev = dev_get_by_index_rcu(net, val); if (!dev) { + rcu_read_unlock(); retv = -ENODEV; break; } - dev_put(dev); + midx = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != val && + (!midx || midx != sk->sk_bound_dev_if)) + goto e_inval; } np->mcast_oif = val; retv = 0; @@ -868,6 +886,10 @@ pref_skip_coa: np->autoflowlabel = valbool; retv = 0; break; + case IPV6_RECVFRAGSIZE: + np->rxopt.bits.recvfragsize = valbool; + retv = 0; + break; } release_sock(sk); @@ -1310,6 +1332,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->autoflowlabel; break; + case IPV6_RECVFRAGSIZE: + val = np->rxopt.bits.recvfragsize; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 14a3903f1c82..1bdc703cb966 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -81,7 +81,7 @@ static void mld_gq_timer_expire(unsigned long data); static void mld_ifc_timer_expire(unsigned long data); static void mld_ifc_event(struct inet6_dev *idev); static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); -static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr); +static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); static void mld_clear_delrec(struct inet6_dev *idev); static bool mld_in_v1_mode(const struct inet6_dev *idev); static int sf_setstate(struct ifmcaddr6 *pmc); @@ -692,9 +692,9 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) dev_mc_del(dev, buf); } - if (mc->mca_flags & MAF_NOREPORT) - goto done; spin_unlock_bh(&mc->mca_lock); + if (mc->mca_flags & MAF_NOREPORT) + return; if (!mc->idev->dead) igmp6_leave_group(mc); @@ -702,8 +702,6 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) spin_lock_bh(&mc->mca_lock); if (del_timer(&mc->mca_timer)) atomic_dec(&mc->mca_refcnt); -done: - ip6_mc_clear_src(mc); spin_unlock_bh(&mc->mca_lock); } @@ -748,10 +746,11 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) spin_unlock_bh(&idev->mc_lock); } -static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) +static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ifmcaddr6 *pmc, *pmc_prev; - struct ip6_sf_list *psf, *psf_next; + struct ip6_sf_list *psf; + struct in6_addr *pmca = &im->mca_addr; spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; @@ -768,14 +767,21 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) } spin_unlock_bh(&idev->mc_lock); + spin_lock_bh(&im->mca_lock); if (pmc) { - for (psf = pmc->mca_tomb; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + im->idev = pmc->idev; + im->mca_crcount = idev->mc_qrv; + im->mca_sfmode = pmc->mca_sfmode; + if (pmc->mca_sfmode == MCAST_INCLUDE) { + im->mca_tomb = pmc->mca_tomb; + im->mca_sources = pmc->mca_sources; + for (psf = im->mca_sources; psf; psf = psf->sf_next) + psf->sf_crcount = im->mca_crcount; } in6_dev_put(pmc->idev); kfree(pmc); } + spin_unlock_bh(&im->mca_lock); } static void mld_clear_delrec(struct inet6_dev *idev) @@ -904,7 +910,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) mca_get(mc); write_unlock_bh(&idev->lock); - mld_del_delrec(idev, &mc->mca_addr); + mld_del_delrec(idev, mc); igmp6_group_added(mc); ma_put(mc); return 0; @@ -927,6 +933,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_unlock_bh(&idev->lock); igmp6_group_dropped(ma); + ip6_mc_clear_src(ma); ma_put(ma); return 0; @@ -2501,15 +2508,17 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Withdraw multicast list */ read_lock_bh(&idev->lock); - mld_ifc_stop_timer(idev); - mld_gq_stop_timer(idev); - mld_dad_stop_timer(idev); for (i = idev->mc_list; i; i = i->next) igmp6_group_dropped(i); - read_unlock_bh(&idev->lock); - mld_clear_delrec(idev); + /* Should stop timer after group drop. or we will + * start timer again in mld_ifc_event() + */ + mld_ifc_stop_timer(idev); + mld_gq_stop_timer(idev); + mld_dad_stop_timer(idev); + read_unlock_bh(&idev->lock); } static void ipv6_mc_reset(struct inet6_dev *idev) @@ -2531,8 +2540,10 @@ void ipv6_mc_up(struct inet6_dev *idev) read_lock_bh(&idev->lock); ipv6_mc_reset(idev); - for (i = idev->mc_list; i; i = i->next) + for (i = idev->mc_list; i; i = i->next) { + mld_del_delrec(idev, i); igmp6_group_added(i); + } read_unlock_bh(&idev->lock); } @@ -2565,6 +2576,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) /* Deactivate timers */ ipv6_mc_down(idev); + mld_clear_delrec(idev); /* Delete all-nodes address. */ /* We cannot call ipv6_dev_mc_dec() directly, our caller in @@ -2579,11 +2591,9 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((i = idev->mc_list) != NULL) { idev->mc_list = i->next; - write_unlock_bh(&idev->lock); - igmp6_group_dropped(i); + write_unlock_bh(&idev->lock); ma_put(i); - write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 60c79a08e14a..64f0f7be9e5e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -191,7 +191,7 @@ static inline int mip6_report_rl_allow(ktime_t stamp, int allow = 0; spin_lock_bh(&mip6_report_rl.lock); - if (!ktime_equal(mip6_report_rl.stamp, stamp) || + if (mip6_report_rl.stamp != stamp || mip6_report_rl.iif != iif || !ipv6_addr_equal(&mip6_report_rl.src, src) || !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d8e671457d10..7ebac630d3c6 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -233,6 +233,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: case ND_OPT_MTU: + case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { ND_PRINTK(2, warn, @@ -568,7 +569,8 @@ static void ndisc_send_unsol_na(struct net_device *dev) } void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr) + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce) { struct sk_buff *skb; struct in6_addr addr_buf; @@ -588,6 +590,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_SOLICITATION); + if (nonce != 0) + optlen += 8; skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -605,6 +609,13 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_SOLICITATION); + if (nonce != 0) { + u8 *opt = skb_put(skb, 8); + + opt[0] = ND_OPT_NONCE; + opt[1] = 8 >> 3; + memcpy(opt + 2, &nonce, 6); + } ndisc_send_skb(skb, daddr, saddr); } @@ -693,12 +704,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } - ndisc_send_ns(dev, target, target, saddr); + ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(dev, target, &mcaddr, saddr); + ndisc_send_ns(dev, target, &mcaddr, saddr, 0); } } @@ -742,6 +753,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) int dad = ipv6_addr_any(saddr); bool inc; int is_router = -1; + u64 nonce = 0; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NS: packet too short\n"); @@ -786,6 +798,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) return; } } + if (ndopts.nd_opts_nonce) + memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6); inc = ipv6_addr_is_multicast(daddr); @@ -794,6 +808,15 @@ static void ndisc_recv_ns(struct sk_buff *skb) have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { + if (nonce != 0 && ifp->dad_nonce == nonce) { + u8 *np = (u8 *)&nonce; + /* Matching nonce if looped back */ + ND_PRINTK(2, notice, + "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", + ifp->idev->dev->name, + &ifp->addr, np); + goto out; + } /* * We are colliding with another node * who is doing DAD diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index e10a04c9cdc7..6acb2eecd986 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV6 + tristate "IPv6 socket lookup support" + help + This option enables the IPv6 socket lookup infrastructure. This + is used by the ip6tables socket match. + if NF_TABLES config NF_TABLES_IPV6 @@ -54,6 +60,14 @@ config NFT_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. +config NFT_FIB_IPV6 + tristate "nf_tables fib / ipv6 route lookup support" + select NFT_FIB + help + This module enables IPv6 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV6 endif # NF_TABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b4f7d0b4e2af..fe180c96040e 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o + # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o @@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o +obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 55aacea24396..1e15c54fd5e2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -24,7 +24,7 @@ #include <linux/icmpv6.h> #include <net/ipv6.h> #include <net/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> @@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV6; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -566,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name) static int find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, - unsigned int size) + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; @@ -574,12 +571,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - unsigned long pcnt; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -616,7 +610,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, cleanup_match(ematch, net); } - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -703,8 +697,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in @@ -713,6 +706,7 @@ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ip6t_entry *iter; unsigned int *offsets; unsigned int i; @@ -772,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, net, repl->name, repl->size); + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; @@ -860,10 +855,6 @@ copy_entries_to_user(unsigned int total_size, return PTR_ERR(counters); loc_cpu_entry = private->entries; - if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { - ret = -EFAULT; - goto free_counters; - } /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ @@ -873,6 +864,10 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = (struct ip6t_entry *)(loc_cpu_entry + off); + if (copy_to_user(userptr + off, e, sizeof(*e))) { + ret = -EFAULT; + goto free_counters; + } if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], @@ -886,23 +881,14 @@ copy_entries_to_user(unsigned int total_size, i += m->u.match_size) { m = (void *)e + i; - if (copy_to_user(userptr + off + i - + offsetof(struct xt_entry_match, - u.user.name), - m->u.kernel.match->name, - strlen(m->u.kernel.match->name)+1) - != 0) { + if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ip6t_get_target_c(e); - if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct xt_entry_target, - u.user.name), - t->u.kernel.target->name, - strlen(t->u.kernel.target->name)+1) != 0) { + if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } @@ -1007,7 +993,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1037,7 +1023,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET6); @@ -1063,7 +1049,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1074,7 +1060,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; return ret; } @@ -1099,8 +1085,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free_newinfo_counters_untrans; } @@ -1214,8 +1200,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); - if (IS_ERR_OR_NULL(t)) { - ret = t ? PTR_ERR(t) : -ENOENT; + if (!t) { + ret = -ENOENT; goto free; } @@ -1651,7 +1637,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (!IS_ERR_OR_NULL(t)) { + if (t) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1665,7 +1651,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = t ? PTR_ERR(t) : -ENOENT; + ret = -ENOENT; xt_compat_unlock(AF_INET6); return ret; diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 7f9f45d829d2..2b1a15846f9a 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -24,7 +24,7 @@ static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out); + return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); } static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 590f767db5d4..a379d2f79b19 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -112,6 +112,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), + .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_LOCAL_IN) | @@ -123,6 +124,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), + .usersize = offsetof(struct ip6t_npt_tginfo, adjustment), .checkentry = ip6t_npt_checkentry, .family = NFPROTO_IPV6, .hooks = (1 << NF_INET_PRE_ROUTING) | diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index db29bbf41b59..fa51a205918d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,35 +39,40 @@ static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par)); break; case IP6T_ICMP6_ADM_PROHIBITED: - nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, + xt_hooknum(par)); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, + xt_hooknum(par)); break; case IP6T_ICMP6_ADDR_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_PORT_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - nf_send_reset6(net, skb, par->hooknum); + nf_send_reset6(net, skb, xt_hooknum(par)); break; case IP6T_ICMP6_POLICY_FAIL: - nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par)); break; case IP6T_ICMP6_REJECT_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, + xt_hooknum(par)); break; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 06bed74cf5ee..4ef1ddd4bbbd 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -71,8 +71,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set(nskb, dst); if (nfct) { - nskb->nfct = nfct; - nskb->nfctinfo = ctinfo; + nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } @@ -121,8 +120,8 @@ synproxy_send_client_synack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void @@ -244,8 +243,8 @@ synproxy_send_client_ack(struct net *net, synproxy_build_options(nth, opts); - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool @@ -277,12 +276,12 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); @@ -440,12 +439,12 @@ static int synproxy_tg6_check(const struct xt_tgchk_param *par) e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_l3proto_try_module_get(par->family); + return nf_ct_netns_get(par->net, par->family); } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg6_reg __read_mostly = { diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 1ee1b25df096..b12e61b7b16c 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -72,10 +72,10 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, return ret; } -static bool rpfilter_is_local(const struct sk_buff *skb) +static bool +rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { - const struct rt6_info *rt = (const void *) skb_dst(skb); - return rt && (rt->rt6i_flags & RTF_LOCAL); + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (rpfilter_is_local(skb)) + if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ipv6_hdr(skb); @@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ - return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par), + info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 963ee3848675..4e3402486833 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -34,6 +34,13 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_log.h> +static int conntrack6_net_id; +static DEFINE_MUTEX(register_ipv6_hooks); + +struct conntrack6_net { + unsigned int users; +}; + static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -308,6 +315,42 @@ static int ipv6_nlattr_tuple_size(void) } #endif +static int ipv6_hooks_register(struct net *net) +{ + struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); + int err = 0; + + mutex_lock(®ister_ipv6_hooks); + cnet->users++; + if (cnet->users > 1) + goto out_unlock; + + err = nf_defrag_ipv6_enable(net); + if (err < 0) { + cnet->users = 0; + goto out_unlock; + } + + err = nf_register_net_hooks(net, ipv6_conntrack_ops, + ARRAY_SIZE(ipv6_conntrack_ops)); + if (err) + cnet->users = 0; + out_unlock: + mutex_unlock(®ister_ipv6_hooks); + return err; +} + +static void ipv6_hooks_unregister(struct net *net) +{ + struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); + + mutex_lock(®ister_ipv6_hooks); + if (cnet->users && (--cnet->users == 0)) + nf_unregister_net_hooks(net, ipv6_conntrack_ops, + ARRAY_SIZE(ipv6_conntrack_ops)); + mutex_unlock(®ister_ipv6_hooks); +} + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .name = "ipv6", @@ -321,6 +364,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, #endif + .net_ns_get = ipv6_hooks_register, + .net_ns_put = ipv6_hooks_unregister, .me = THIS_MODULE, }; @@ -336,52 +381,51 @@ static struct nf_sockopt_ops so_getorigdst6 = { .owner = THIS_MODULE, }; +static struct nf_conntrack_l4proto *builtin_l4proto6[] = { + &nf_conntrack_l4proto_tcp6, + &nf_conntrack_l4proto_udp6, + &nf_conntrack_l4proto_icmpv6, +#ifdef CONFIG_NF_CT_PROTO_DCCP + &nf_conntrack_l4proto_dccp6, +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + &nf_conntrack_l4proto_sctp6, +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + &nf_conntrack_l4proto_udplite6, +#endif +}; + static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); - if (ret < 0) { - pr_err("nf_conntrack_tcp6: pernet registration failed\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); - if (ret < 0) { - pr_err("nf_conntrack_udp6: pernet registration failed\n"); - goto cleanup_tcp6; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); - if (ret < 0) { - pr_err("nf_conntrack_icmp6: pernet registration failed\n"); - goto cleanup_udp6; - } + ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); + if (ret < 0) + return ret; + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); - goto cleanup_icmpv6; + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } - return 0; - cleanup_icmpv6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); - cleanup_udp6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); - cleanup_tcp6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); - out: return ret; } static void ipv6_net_exit(struct net *net) { nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } static struct pernet_operations ipv6_net_ops = { .init = ipv6_net_init, .exit = ipv6_net_exit, + .id = &conntrack6_net_id, + .size = sizeof(struct conntrack6_net), }; static int __init nf_conntrack_l3proto_ipv6_init(void) @@ -389,7 +433,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) int ret = 0; need_conntrack(); - nf_defrag_ipv6_enable(); ret = nf_register_sockopt(&so_getorigdst6); if (ret < 0) { @@ -401,47 +444,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) if (ret < 0) goto cleanup_sockopt; - ret = nf_register_hooks(ipv6_conntrack_ops, - ARRAY_SIZE(ipv6_conntrack_ops)); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " - "hook.\n"); + ret = nf_ct_l4proto_register(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); + if (ret < 0) goto cleanup_pernet; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); - goto cleanup_hooks; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); - goto cleanup_tcp6; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); - goto cleanup_udp6; - } ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); - goto cleanup_icmpv6; + goto cleanup_l4proto; } return ret; - - cleanup_icmpv6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - cleanup_udp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); - cleanup_tcp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); - cleanup_hooks: - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); +cleanup_l4proto: + nf_ct_l4proto_unregister(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); cleanup_pernet: unregister_pernet_subsys(&ipv6_net_ops); cleanup_sockopt: @@ -453,10 +469,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); + nf_ct_l4proto_unregister(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index f5a61bc3ec2b..d2c2ccbfbe72 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -145,15 +145,15 @@ static int icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, - enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(skb->nfct == NULL); + NF_CT_ASSERT(!skb_nfct(skb)); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, @@ -176,7 +176,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), &intuple); @@ -185,19 +185,18 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } else { if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + ctinfo += IP_CT_IS_REPLY; } /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; + nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); return NF_ACCEPT; } static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) + u8 pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; @@ -222,9 +221,8 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); + nf_conntrack_get(skb_nfct(skb)); return NF_ACCEPT; } @@ -232,7 +230,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9948b5ce52da..986d4ca38832 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index f06b0471f39f..ada60d1a991b 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -30,12 +30,14 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +static DEFINE_MUTEX(defrag6_mutex); + static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) { + if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -59,7 +61,7 @@ static unsigned int ipv6_defrag(void *priv, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif @@ -87,6 +89,19 @@ static struct nf_hook_ops ipv6_defrag_ops[] = { }, }; +static void __net_exit defrag6_net_exit(struct net *net) +{ + if (net->nf.defrag_ipv6) { + nf_unregister_net_hooks(net, ipv6_defrag_ops, + ARRAY_SIZE(ipv6_defrag_ops)); + net->nf.defrag_ipv6 = false; + } +} + +static struct pernet_operations defrag6_net_ops = { + .exit = defrag6_net_exit, +}; + static int __init nf_defrag_init(void) { int ret = 0; @@ -96,9 +111,9 @@ static int __init nf_defrag_init(void) pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); return ret; } - ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + ret = register_pernet_subsys(&defrag6_net_ops); if (ret < 0) { - pr_err("nf_defrag_ipv6: can't register hooks\n"); + pr_err("nf_defrag_ipv6: can't register pernet ops\n"); goto cleanup_frag6; } return ret; @@ -111,12 +126,31 @@ cleanup_frag6: static void __exit nf_defrag_fini(void) { - nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + unregister_pernet_subsys(&defrag6_net_ops); nf_ct_frag6_cleanup(); } -void nf_defrag_ipv6_enable(void) +int nf_defrag_ipv6_enable(struct net *net) { + int err = 0; + + might_sleep(); + + if (net->nf.defrag_ipv6) + return 0; + + mutex_lock(&defrag6_mutex); + if (net->nf.defrag_ipv6) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv6_defrag_ops, + ARRAY_SIZE(ipv6_defrag_ops)); + if (err == 0) + net->nf.defrag_ipv6 = true; + + out_unlock: + mutex_unlock(&defrag6_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 4a84b5ad9ecb..888ecd106e5f 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -57,10 +57,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + nf_reset(skb); + nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); + nf_conntrack_get(skb_nfct(skb)); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 57d86066a13b..97c724224da7 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -64,7 +64,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", ntohs(ih->payload_len) + sizeof(struct ipv6hdr), (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, ih->hop_limit, @@ -351,7 +351,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, struct nf_log_buf *m; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; m = nf_log_buf_open(); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 10090400c72f..eedee5d108d9 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); + fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { @@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) skb_dst_set(nskb, dst); + nskb->mark = fl6.flowi6_mark; + skb_reserve(nskb, hh_len + dst->header_len); ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c new file mode 100644 index 000000000000..ebb2bf84232a --- /dev/null +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/inet6_hashtables.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/netfilter/nf_socket.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp6_fields(const struct sk_buff *skb, + unsigned int outside_hdrlen, + int *protocol, + const struct in6_addr **raddr, + const struct in6_addr **laddr, + __be16 *rport, + __be16 *lport, + struct ipv6hdr *ipv6_var) +{ + const struct ipv6hdr *inside_iph; + struct icmp6hdr *icmph, _icmph; + __be16 *ports, _ports[2]; + u8 inside_nexthdr; + __be16 inside_fragoff; + int inside_hdrlen; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); + if (inside_iph == NULL) + return 1; + inside_nexthdr = inside_iph->nexthdr; + + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), + &inside_nexthdr, &inside_fragoff); + if (inside_hdrlen < 0) + return 1; /* hjm: Packet has no/incomplete transport layer headers. */ + + if (inside_nexthdr != IPPROTO_TCP && + inside_nexthdr != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, inside_hdrlen, + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_nexthdr; + *laddr = &inside_iph->saddr; + *lport = ports[0]; + *raddr = &inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet6_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + + return NULL; +} + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; + int thoff = 0, tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NULL; + } + + if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + saddr = &iph->saddr; + sport = hp->source; + daddr = &iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); + + } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, + &sport, &dport, &ipv6_var)) + return NULL; + } else { + return NULL; + } + + return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, + sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure"); diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 831f86e1ec08..d8b5b60b7d53 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; - nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); + nf_dup_ipv6(nft_net(pkt), pkt->skb, nft_hook(pkt), gw, oif); } static int nft_dup_ipv6_init(const struct nft_ctx *ctx, diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c new file mode 100644 index 000000000000..765facf03d45 --- /dev/null +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -0,0 +1,270 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_fib.h> + +#include <net/ip6_fib.h> +#include <net/ip6_route.h> + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, + const struct nft_pktinfo *pkt, + const struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); + int lookup_flags = 0; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl6->daddr = iph->daddr; + fl6->saddr = iph->saddr; + } else { + fl6->daddr = iph->saddr; + fl6->saddr = iph->daddr; + } + + if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } + + if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) + lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; + + if (priv->flags & NFTA_FIB_F_MARK) + fl6->flowi6_mark = pkt->skb->mark; + + fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + + return lookup_flags; +} + +static u32 __nft_fib6_eval_type(const struct nft_fib *priv, + const struct nft_pktinfo *pkt) +{ + const struct net_device *dev = NULL; + const struct nf_ipv6_ops *v6ops; + const struct nf_afinfo *afinfo; + int route_err, addrtype; + struct rt6_info *rt; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + u32 ret = 0; + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (!afinfo) + return RTN_UNREACHABLE; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = nft_in(pkt); + else if (priv->flags & NFTA_FIB_F_OIF) + dev = nft_out(pkt); + + nft_fib6_flowi_init(&fl6, priv, pkt, dev); + + v6ops = nf_get_ipv6_ops(); + if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + ret = RTN_LOCAL; + + route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); + if (route_err) + goto err; + + if (rt->rt6i_flags & RTF_REJECT) { + route_err = rt->dst.error; + dst_release(&rt->dst); + goto err; + } + + if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + ret = RTN_ANYCAST; + else if (!dev && rt->rt6i_flags & RTF_LOCAL) + ret = RTN_LOCAL; + + dst_release(&rt->dst); + + if (ret) + return ret; + + addrtype = ipv6_addr_type(&fl6.daddr); + + if (addrtype & IPV6_ADDR_MULTICAST) + return RTN_MULTICAST; + if (addrtype & IPV6_ADDR_UNICAST) + return RTN_UNICAST; + + return RTN_UNSPEC; + err: + switch (route_err) { + case -EINVAL: + return RTN_BLACKHOLE; + case -EACCES: + return RTN_PROHIBIT; + case -EAGAIN: + return RTN_THROW; + default: + break; + } + + return RTN_UNREACHABLE; +} + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + + *dest = __nft_fib6_eval_type(priv, pkt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval_type); + +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + const struct net_device *oif = NULL; + u32 *dest = ®s->data[priv->dreg]; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + struct rt6_info *rt; + int lookup_flags; + + if (priv->flags & NFTA_FIB_F_IIF) + oif = nft_in(pkt); + else if (priv->flags & NFTA_FIB_F_OIF) + oif = nft_out(pkt); + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && + nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { + nft_fib_store_result(dest, priv->result, pkt, + nft_in(pkt)->ifindex); + return; + } + + *dest = 0; + again: + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); + if (rt->dst.error) + goto put_rt_err; + + /* Should not see RTF_LOCAL here */ + if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + goto put_rt_err; + + if (oif && oif != rt->rt6i_idev->dev) { + /* multipath route? Try again with F_IFACE */ + if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = oif->ifindex; + ip6_rt_put(rt); + goto again; + } + } + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = rt->rt6i_idev->dev->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } + + put_rt_err: + ip6_rt_put(rt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval); + +static struct nft_expr_type nft_fib6_type; + +static const struct nft_expr_ops nft_fib6_type_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib6_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib6_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib6_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib6_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib6_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib6_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib6_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, +}; + +static int __init nft_fib6_module_init(void) +{ + return nft_register_expr(&nft_fib6_type); +} + +static void __exit nft_fib6_module_exit(void) +{ + nft_unregister_expr(&nft_fib6_type); +} +module_init(nft_fib6_module_init); +module_exit(nft_fib6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 9597ffb74077..4146536e9c15 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -27,12 +27,19 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); +} + +static void +nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV6); } static struct nft_expr_type nft_masq_ipv6_type; @@ -41,6 +48,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), .eval = nft_masq_ipv6_eval, .init = nft_masq_init, + .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, }; @@ -77,5 +85,5 @@ module_init(nft_masq_ipv6_module_init); module_exit(nft_masq_ipv6_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index aca44e89a881..a27e424f690d 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -26,16 +26,23 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min], - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max], + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } range.flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook); + regs->verdict.code = + nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); +} + +static void +nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV6); } static struct nft_expr_type nft_redir_ipv6_type; @@ -44,6 +51,7 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_ipv6_eval, .init = nft_redir_init, + .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, }; @@ -71,5 +79,5 @@ module_init(nft_redir_ipv6_module_init); module_exit(nft_redir_ipv6_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 92bda9908bb9..057deeaff1cb 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -27,11 +27,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, - pkt->hook); + nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code, + nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(pkt->net, pkt->skb, pkt->hook); + nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; default: break; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 66e2d9dfc43a..9b522fa90e6d 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -125,12 +126,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return PTR_ERR(dst); rt = (struct rt6_info *) dst; - np = inet6_sk(sk); - if (!np) { - err = -EBADF; - goto dst_err_out; - } - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; else if (!fl6.flowi6_oif) @@ -165,7 +160,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } release_sock(sk); -dst_err_out: dst_release(dst); if (err) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 054a1d84fc5e..f174e76e6505 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -65,11 +65,12 @@ #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ -static struct raw_hashinfo raw_v6_hashinfo = { +struct raw_hashinfo raw_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { @@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v6_lookup); /* * 0 - deliver @@ -589,7 +591,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); + err = skb_copy_bits(skb, offset, &csum, 2); + if (err < 0) { + ip6_flush_pending_frames(sk); + goto out; + } /* in case cksum was not initialized */ if (unlikely(csum)) @@ -648,6 +654,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); if (err) @@ -774,6 +783,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; @@ -927,7 +937,8 @@ out: txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; @@ -1259,6 +1270,7 @@ struct proto rawv6_prot = { .compat_getsockopt = compat_rawv6_getsockopt, .compat_ioctl = compat_rawv6_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3815e8505ed2..e1da5b888cc4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, { struct sk_buff *prev, *next; struct net_device *dev; - int offset, end; + int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; @@ -336,6 +336,10 @@ found: fq->ecn |= ecn; add_frag_mem_limit(fq->q.net, skb->truesize); + fragsize = -skb_network_offset(skb) + skb->len; + if (fragsize > fq->q.max_size) + fq->q.max_size = fragsize; + /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ @@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; IP6CB(head)->flags |= IP6SKB_FRAGMENTED; + IP6CB(head)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ skb_postpush_rcsum(head, skb_network_header(head), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b57e11e6e0d..fb174b590fd3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -64,7 +64,7 @@ #include <net/l3mdev.h> #include <trace/events/fib6.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -98,6 +98,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); +static size_t rt6_nlmsg_size(struct rt6_info *rt); +static int rt6_fill_node(struct net *net, + struct sk_buff *skb, struct rt6_info *rt, + struct in6_addr *dst, struct in6_addr *src, + int iif, int type, u32 portid, u32 seq, + unsigned int flags); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -217,6 +223,21 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, return neigh_create(&nd_tbl, daddr, dst->dev); } +static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + struct rt6_info *rt = (struct rt6_info *)dst; + + daddr = choose_neigh_daddr(rt, NULL, daddr); + if (!daddr) + return; + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + return; + if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) + return; + __ipv6_confirm_neigh(dev, daddr); +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .gc = ip6_dst_gc, @@ -233,6 +254,7 @@ static struct dst_ops ip6_dst_ops_template = { .redirect = rt6_do_redirect, .local_out = __ip6_local_out, .neigh_lookup = ip6_neigh_lookup, + .confirm_neigh = ip6_confirm_neigh, }; static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) @@ -527,7 +549,7 @@ static void rt6_probe_deferred(struct work_struct *w) container_of(w, struct __rt6_probe_work, work); addrconf_addr_solict_mult(&work->target, &mcaddr); - ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL); + ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); dev_put(work->dev); kfree(work); } @@ -1359,6 +1381,7 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, const struct ipv6hdr *iph, u32 mtu) { + const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; if (rt6->rt6i_flags & RTF_LOCAL) @@ -1367,26 +1390,26 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (dst_metric_locked(dst, RTAX_MTU)) return; - dst_confirm(dst); + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + daddr = NULL; + saddr = NULL; + } + dst_confirm_neigh(dst, daddr); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; if (!rt6_cache_allowed_for_pmtu(rt6)) { rt6_do_update_pmtu(rt6, mtu); - } else { - const struct in6_addr *daddr, *saddr; + } else if (daddr) { struct rt6_info *nrt6; - if (iph) { - daddr = &iph->daddr; - saddr = &iph->saddr; - } else if (sk) { - daddr = &sk->sk_v6_daddr; - saddr = &inet6_sk(sk)->saddr; - } else { - return; - } nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); @@ -1408,7 +1431,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1420,6 +1443,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1433,7 +1457,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1463,7 +1487,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_node *fn; /* Get the "current" route for this destination and - * check if the redirect has come from approriate router. + * check if the redirect has come from appropriate router. * * RFC 4861 specifies that redirects should only be * accepted if they come from the nexthop to the target. @@ -1525,7 +1549,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1538,6 +1563,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1559,6 +1585,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1567,7 +1594,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); @@ -1826,6 +1854,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) int addr_type; int err = -EINVAL; + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) + goto out; + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) goto out; #ifndef CONFIG_IPV6_SUBTREES @@ -1892,7 +1924,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; - err = lwtunnel_build_state(dev, cfg->fc_encap_type, + err = lwtunnel_build_state(cfg->fc_encap_type, cfg->fc_encap, AF_INET6, cfg, &lwtstate); if (err) @@ -1995,8 +2027,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) It is very good, but in some (rare!) circumstances (SIT, PtP, NBMA NOARP links) it is handy to allow some exceptions. --ANK + We allow IPv4-mapped nexthops to support RFC4798-type + addressing */ - if (!(gwa_type & IPV6_ADDR_UNICAST)) + if (!(gwa_type & (IPV6_ADDR_UNICAST | + IPV6_ADDR_MAPPED))) goto out; if (cfg->fc_table) { @@ -2135,6 +2170,58 @@ int ip6_del_rt(struct rt6_info *rt) return __ip6_del_rt(rt, &info); } +static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +{ + struct nl_info *info = &cfg->fc_nlinfo; + struct net *net = info->nl_net; + struct sk_buff *skb = NULL; + struct fib6_table *table; + int err = -ENOENT; + + if (rt == net->ipv6.ip6_null_entry) + goto out_put; + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); + + if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { + struct rt6_info *sibling, *next_sibling; + + /* prefer to send a single notification with all hops */ + skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); + if (skb) { + u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; + + if (rt6_fill_node(net, skb, rt, + NULL, NULL, 0, RTM_DELROUTE, + info->portid, seq, 0) < 0) { + kfree_skb(skb); + skb = NULL; + } else + info->skip_notify = 1; + } + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, + rt6i_siblings) { + err = fib6_del(sibling, info); + if (err) + goto out_unlock; + } + } + + err = fib6_del(rt, info); +out_unlock: + write_unlock_bh(&table->tb6_lock); +out_put: + ip6_rt_put(rt); + + if (skb) { + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); + } + return err; +} + static int ip6_route_del(struct fib6_config *cfg) { struct fib6_table *table; @@ -2166,10 +2253,16 @@ static int ip6_route_del(struct fib6_config *cfg) continue; if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; + if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) + continue; dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - return __ip6_del_rt(rt, &cfg->fc_nlinfo); + /* if gateway was specified only delete the one hop */ + if (cfg->fc_flags & RTF_GATEWAY) + return __ip6_del_rt(rt, &cfg->fc_nlinfo); + + return __ip6_del_rt_siblings(rt, cfg); } } read_unlock_bh(&table->tb6_lock); @@ -2248,7 +2341,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * Look, redirects are sent only in response to data packets, * so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->dst); + dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) @@ -2624,6 +2717,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_idev = idev; + rt->rt6i_protocol = RTPROT_KERNEL; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) rt->rt6i_flags |= RTF_ANYCAST; @@ -2701,13 +2795,16 @@ struct arg_dev_net { struct net *net; }; +/* called with write lock held for table with rt */ static int fib6_ifdown(struct rt6_info *rt, void *arg) { const struct arg_dev_net *adn = arg; const struct net_device *dev = adn->dev; if ((rt->dst.dev == dev || !dev) && - rt != adn->net->ipv6.ip6_null_entry) + rt != adn->net->ipv6.ip6_null_entry && + (rt->rt6i_nsiblings == 0 || + !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) return -1; return 0; @@ -2758,7 +2855,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) old MTU is the lowest MTU in the path, update the route PMTU to reflect the increase. In this case if the other nodes' MTU also have the lowest MTU, TOO BIG MESSAGE will be lead to - PMTU discouvery. + PMTU discovery. */ if (rt->dst.dev == arg->dev && dst_metric_raw(&rt->dst, RTAX_MTU) && @@ -2801,6 +2898,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2885,6 +2984,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_MULTIPATH]) { cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + + err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, + cfg->fc_mp_len); + if (err < 0) + goto errout; } if (tb[RTA_PREF]) { @@ -2898,9 +3002,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP]) cfg->fc_encap = tb[RTA_ENCAP]; - if (tb[RTA_ENCAP_TYPE]) + if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; + } + if (tb[RTA_EXPIRES]) { unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); @@ -2927,7 +3036,7 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) struct rt6_nh *nh; list_for_each_entry(nh, rt6_nh_list, next) { - pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", + pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, nh->r_cfg.fc_ifindex); } @@ -2966,13 +3075,37 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } +static void ip6_route_mpath_notify(struct rt6_info *rt, + struct rt6_info *rt_last, + struct nl_info *info, + __u16 nlflags) +{ + /* if this is an APPEND route, then rt points to the first route + * inserted and rt_last points to last route inserted. Userspace + * wants a consistent dump of the route which starts at the first + * nexthop. Since sibling routes are always added at the end of + * the list, find the first sibling of the last route appended + */ + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { + rt = list_first_entry(&rt_last->rt6i_siblings, + struct rt6_info, + rt6i_siblings); + } + + if (rt) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); +} + static int ip6_route_multipath_add(struct fib6_config *cfg) { + struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; struct rt6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; + __u16 nlflags; int remaining; int attrlen; int err = 1; @@ -2981,6 +3114,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); LIST_HEAD(rt6_nh_list); + nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; + if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) + nlflags |= NLM_F_APPEND; + remaining = cfg->fc_mp_len; rtnh = (struct rtnexthop *)cfg->fc_mp; @@ -3023,9 +3160,20 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) rtnh = rtnh_next(rtnh, &remaining); } + /* for add and replace send one notification with all nexthops. + * Skip the notification in fib6_add_rt2node and send one with + * the full route when done + */ + info->skip_notify = 1; + err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); + rt_last = nh->rt6_info; + err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc); + /* save reference to first route for notification */ + if (!rt_notif && !err) + rt_notif = nh->rt6_info; + /* nh->rt6_info is used or freed at this point, reset to NULL*/ nh->rt6_info = NULL; if (err) { @@ -3047,9 +3195,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) nhn++; } + /* success ... tell user about new route */ + ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); goto cleanup; add_errout: + /* send notification for routes that were added so that + * the delete notifications sent by ip6_route_del are + * coherent + */ + if (rt_notif) + ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); + /* Delete routes that were already added */ list_for_each_entry(nh, &rt6_nh_list, next) { if (err_nh == nh) @@ -3117,8 +3274,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) if (cfg.fc_mp) return ip6_route_multipath_del(&cfg); - else + else { + cfg.fc_delete_all_nh = 1; return ip6_route_del(&cfg); + } } static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -3136,8 +3295,19 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) return ip6_route_add(&cfg); } -static inline size_t rt6_nlmsg_size(struct rt6_info *rt) +static size_t rt6_nlmsg_size(struct rt6_info *rt) { + int nexthop_len = 0; + + if (rt->rt6i_nsiblings) { + nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + + NLA_ALIGN(sizeof(struct rtnexthop)) + + nla_total_size(16) /* RTA_GATEWAY */ + + lwtunnel_get_encap_size(rt->dst.lwtstate); + + nexthop_len *= rt->rt6i_nsiblings; + } + return NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(16) /* RTA_SRC */ + nla_total_size(16) /* RTA_DST */ @@ -3151,14 +3321,71 @@ static inline size_t rt6_nlmsg_size(struct rt6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->dst.lwtstate); + + lwtunnel_get_encap_size(rt->dst.lwtstate) + + nexthop_len; +} + +static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, + unsigned int *flags, bool skip_oif) +{ + if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { + *flags |= RTNH_F_LINKDOWN; + if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) + *flags |= RTNH_F_DEAD; + } + + if (rt->rt6i_flags & RTF_GATEWAY) { + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) + goto nla_put_failure; + } + + /* not needed for multipath encoding b/c it has a rtnexthop struct */ + if (!skip_oif && rt->dst.dev && + nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + goto nla_put_failure; + + if (rt->dst.lwtstate && + lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +/* add multipath next hop */ +static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) +{ + struct rtnexthop *rtnh; + unsigned int flags = 0; + + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (!rtnh) + goto nla_put_failure; + + rtnh->rtnh_hops = 0; + rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; + + if (rt6_nexthop_info(skb, rt, &flags, true) < 0) + goto nla_put_failure; + + rtnh->rtnh_flags = flags; + + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; + + return 0; + +nla_put_failure: + return -EMSGSIZE; } static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, - int prefix, int nowait, unsigned int flags) + unsigned int flags) { u32 metrics[RTAX_MAX]; struct rtmsg *rtm; @@ -3166,13 +3393,6 @@ static int rt6_fill_node(struct net *net, long expires; u32 table; - if (prefix) { /* user wants prefix routes only */ - if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { - /* success since this is not a prefix route */ - return 1; - } - } - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -3207,16 +3427,13 @@ static int rt6_fill_node(struct net *net, } else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; + else if (rt->rt6i_flags & RTF_ANYCAST) + rtm->rtm_type = RTN_ANYCAST; else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) rtm->rtm_type = RTN_LOCAL; else rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; - if (!netif_carrier_ok(rt->dst.dev)) { - rtm->rtm_flags |= RTNH_F_LINKDOWN; - if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) - rtm->rtm_flags |= RTNH_F_DEAD; - } rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->rt6i_protocol; if (rt->rt6i_flags & RTF_DYNAMIC) @@ -3250,19 +3467,12 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait, - portid); - - if (err <= 0) { - if (!nowait) { - if (err == 0) - return 0; - goto nla_put_failure; - } else { - if (err == -EMSGSIZE) - goto nla_put_failure; - } - } + int err = ip6mr_get_route(net, skb, rtm, portid); + + if (err == 0) + return 0; + if (err < 0) + goto nla_put_failure; } else #endif if (nla_put_u32(skb, RTA_IIF, iif)) @@ -3287,17 +3497,35 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; - if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) - goto nla_put_failure; - } - - if (rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) - goto nla_put_failure; if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) goto nla_put_failure; + /* For multipath routes, walk the siblings list and add + * each as a nexthop within RTA_MULTIPATH. + */ + if (rt->rt6i_nsiblings) { + struct rt6_info *sibling, *next_sibling; + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + if (rt6_add_nexthop(skb, rt) < 0) + goto nla_put_failure; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, rt6i_siblings) { + if (rt6_add_nexthop(skb, sibling) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, mp); + } else { + if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) + goto nla_put_failure; + } + expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) @@ -3306,7 +3534,6 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; - lwtunnel_fill_encap(skb, rt->dst.lwtstate); nlmsg_end(skb, nlh); return 0; @@ -3319,18 +3546,26 @@ nla_put_failure: int rt6_dump_route(struct rt6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; - int prefix; + struct net *net = arg->net; + + if (rt == net->ipv6.ip6_null_entry) + return 0; if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); - prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; - } else - prefix = 0; - return rt6_fill_node(arg->net, + /* user wants prefix routes only */ + if (rtm->rtm_flags & RTM_F_PREFIX && + !(rt->rt6i_flags & RTF_PREFIX_RT)) { + /* success since this is not a prefix route */ + return 1; + } + } + + return rt6_fill_node(net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, - prefix, 0, NLM_F_MULTI); + NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) @@ -3375,6 +3610,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; @@ -3398,6 +3639,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } + if (rt == net->ipv6.ip6_null_entry) { + err = rt->dst.error; + ip6_rt_put(rt); + goto errout; + } + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); @@ -3405,17 +3652,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) goto errout; } - /* Reserve room for dummy headers, this skb can pass - through good chunk of routing engine. - */ - skb_reset_mac_header(skb); - skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, 0, 0); + nlh->nlmsg_seq, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -3442,7 +3683,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, 0, 0, nlm_flags); + event, info->portid, seq, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c new file mode 100644 index 000000000000..5f44ffed2576 --- /dev/null +++ b/net/ipv6/seg6.c @@ -0,0 +1,500 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/slab.h> + +#include <net/ipv6.h> +#include <net/protocol.h> + +#include <net/seg6.h> +#include <net/genetlink.h> +#include <linux/seg6.h> +#include <linux/seg6_genl.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) +{ + int trailing; + unsigned int tlv_offset; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (((srh->hdrlen + 1) << 3) != len) + return false; + + if (srh->segments_left != srh->first_segment) + return false; + + tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); + + trailing = len - tlv_offset; + if (trailing < 0) + return false; + + while (trailing) { + struct sr6_tlv *tlv; + unsigned int tlv_len; + + if (trailing < sizeof(*tlv)) + return false; + + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + + trailing -= tlv_len; + if (trailing < 0) + return false; + + tlv_offset += tlv_len; + } + + return true; +} + +static struct genl_family seg6_genl_family; + +static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { + [SEG6_ATTR_DST] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, }, + [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, }, + [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, }, + [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, }, + [SEG6_ATTR_ALGID] = { .type = NLA_U8, }, + [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, }, +}; + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + u32 hmackeyid; + char *secret; + int err = 0; + u8 algid; + u8 slen; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_HMACKEYID] || + !info->attrs[SEG6_ATTR_SECRETLEN] || + !info->attrs[SEG6_ATTR_ALGID]) + return -EINVAL; + + hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]); + slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]); + algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]); + + if (hmackeyid == 0) + return -EINVAL; + + if (slen > SEG6_HMAC_SECRET_LEN) + return -EINVAL; + + mutex_lock(&sdata->lock); + hinfo = seg6_hmac_info_lookup(net, hmackeyid); + + if (!slen) { + if (!hinfo) + err = -ENOENT; + + err = seg6_hmac_info_del(net, hmackeyid); + + goto out_unlock; + } + + if (!info->attrs[SEG6_ATTR_SECRET]) { + err = -EINVAL; + goto out_unlock; + } + + if (hinfo) { + err = seg6_hmac_info_del(net, hmackeyid); + if (err) + goto out_unlock; + } + + secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]); + + hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL); + if (!hinfo) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(hinfo->secret, secret, slen); + hinfo->slen = slen; + hinfo->alg_id = algid; + hinfo->hmackeyid = hmackeyid; + + err = seg6_hmac_info_add(net, hmackeyid, hinfo); + if (err) + kfree(hinfo); + +out_unlock: + mutex_unlock(&sdata->lock); + return err; +} + +#else + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + return -ENOTSUPP; +} + +#endif + +static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *val, *t_old, *t_new; + struct seg6_pernet_data *sdata; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_DST]) + return -EINVAL; + + val = nla_data(info->attrs[SEG6_ATTR_DST]); + t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + if (!t_new) + return -ENOMEM; + + mutex_lock(&sdata->lock); + + t_old = sdata->tun_src; + rcu_assign_pointer(sdata->tun_src, t_new); + + mutex_unlock(&sdata->lock); + + synchronize_net(); + kfree(t_old); + + return 0; +} + +static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *tun_src; + struct sk_buff *msg; + void *hdr; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC); + if (!hdr) + goto free_msg; + + rcu_read_lock(); + tun_src = rcu_dereference(seg6_pernet(net)->tun_src); + + if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src)) + goto nla_put_failure; + + rcu_read_unlock(); + + genlmsg_end(msg, hdr); + genlmsg_reply(msg, info); + + return 0; + +nla_put_failure: + rcu_read_unlock(); + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -ENOMEM; +} + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo, + struct sk_buff *msg) +{ + if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) || + nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) || + nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) || + nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id)) + return -1; + + return 0; +} + +static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (__seg6_hmac_fill_info(hinfo, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct seg6_pernet_data *sdata; + struct rhashtable_iter *iter; + + sdata = seg6_pernet(net); + iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&sdata->hmac_infos, iter); + + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + + kfree(iter); + + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + struct net *net = sock_net(skb->sk); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + int ret; + + sdata = seg6_pernet(net); + + ret = rhashtable_walk_start(iter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + hinfo = rhashtable_walk_next(iter); + + if (IS_ERR(hinfo)) { + if (PTR_ERR(hinfo) == -EAGAIN) + continue; + ret = PTR_ERR(hinfo); + goto done; + } else if (!hinfo) { + break; + } + + ret = __seg6_genl_dumphmac_element(hinfo, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, SEG6_CMD_DUMPHMAC); + if (ret) + goto done; + } + + ret = skb->len; + +done: + rhashtable_walk_stop(iter); + return ret; +} + +#else + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +#endif + +static int __net_init seg6_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata; + + sdata = kzalloc(sizeof(*sdata), GFP_KERNEL); + if (!sdata) + return -ENOMEM; + + mutex_init(&sdata->lock); + + sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL); + if (!sdata->tun_src) { + kfree(sdata); + return -ENOMEM; + } + + net->ipv6.seg6_data = sdata; + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_init(net); +#endif + + return 0; +} + +static void __net_exit seg6_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_exit(net); +#endif + + kfree(sdata->tun_src); + kfree(sdata); +} + +static struct pernet_operations ip6_segments_ops = { + .init = seg6_net_init, + .exit = seg6_net_exit, +}; + +static const struct genl_ops seg6_genl_ops[] = { + { + .cmd = SEG6_CMD_SETHMAC, + .doit = seg6_genl_sethmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_DUMPHMAC, + .start = seg6_genl_dumphmac_start, + .dumpit = seg6_genl_dumphmac, + .done = seg6_genl_dumphmac_done, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_SET_TUNSRC, + .doit = seg6_genl_set_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_GET_TUNSRC, + .doit = seg6_genl_get_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family seg6_genl_family __ro_after_init = { + .hdrsize = 0, + .name = SEG6_GENL_NAME, + .version = SEG6_GENL_VERSION, + .maxattr = SEG6_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = seg6_genl_ops, + .n_ops = ARRAY_SIZE(seg6_genl_ops), + .module = THIS_MODULE, +}; + +int __init seg6_init(void) +{ + int err = -ENOMEM; + + err = genl_register_family(&seg6_genl_family); + if (err) + goto out; + + err = register_pernet_subsys(&ip6_segments_ops); + if (err) + goto out_unregister_genl; + +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL + err = seg6_iptunnel_init(); + if (err) + goto out_unregister_pernet; +#endif + +#ifdef CONFIG_IPV6_SEG6_HMAC + err = seg6_hmac_init(); + if (err) + goto out_unregister_iptun; +#endif + + pr_info("Segment Routing with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_SEG6_HMAC +out_unregister_iptun: +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_iptunnel_exit(); +#endif +#endif +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); +#endif +out_unregister_genl: + genl_unregister_family(&seg6_genl_family); + goto out; +} + +void seg6_exit(void) +{ +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_exit(); +#endif +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_iptunnel_exit(); +#endif + unregister_pernet_subsys(&ip6_segments_ops); + genl_unregister_family(&seg6_genl_family); +} diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c new file mode 100644 index 000000000000..f950cb53d5e3 --- /dev/null +++ b/net/ipv6/seg6_hmac.c @@ -0,0 +1,448 @@ +/* + * SR-IPv6 implementation -- HMAC functions + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <linux/mroute6.h> +#include <linux/slab.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/xfrm.h> + +#include <linux/cryptohash.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include <net/seg6.h> +#include <net/genetlink.h> +#include <net/seg6_hmac.h> +#include <linux/random.h> + +static DEFINE_PER_CPU(char [SEG6_HMAC_RING_SIZE], hmac_ring); + +static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct seg6_hmac_info *hinfo = obj; + + return (hinfo->hmackeyid != *(__u32 *)arg->key); +} + +static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) +{ + kfree_rcu(hinfo, rcu); +} + +static void seg6_free_hi(void *ptr, void *arg) +{ + struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; + + if (hinfo) + seg6_hinfo_release(hinfo); +} + +static const struct rhashtable_params rht_params = { + .head_offset = offsetof(struct seg6_hmac_info, node), + .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), + .key_len = sizeof(u32), + .automatic_shrinking = true, + .obj_cmpfn = seg6_hmac_cmpfn, +}; + +static struct seg6_hmac_algo hmac_algos[] = { + { + .alg_id = SEG6_HMAC_ALGO_SHA1, + .name = "hmac(sha1)", + }, + { + .alg_id = SEG6_HMAC_ALGO_SHA256, + .name = "hmac(sha256)", + }, +}; + +static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) +{ + struct sr6_tlv_hmac *tlv; + + if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) + return NULL; + + if (!sr_has_hmac(srh)) + return NULL; + + tlv = (struct sr6_tlv_hmac *) + ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); + + if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) + return NULL; + + return tlv; +} + +static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) +{ + struct seg6_hmac_algo *algo; + int i, alg_count; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + if (algo->alg_id == alg_id) + return algo; + } + + return NULL; +} + +static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, + u8 *output, int outlen) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int ret, dgsize; + + algo = __hmac_get_algo(hinfo->alg_id); + if (!algo) + return -ENOENT; + + tfm = *this_cpu_ptr(algo->tfms); + + dgsize = crypto_shash_digestsize(tfm); + if (dgsize > outlen) { + pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", + dgsize, outlen); + return -ENOMEM; + } + + ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); + goto failed; + } + + shash = *this_cpu_ptr(algo->shashs); + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, text, psize, output); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); + goto failed; + } + + return dgsize; + +failed: + return ret; +} + +int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, + struct in6_addr *saddr, u8 *output) +{ + __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); + u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; + int plen, i, dgsize, wrsize; + char *ring, *off; + + /* a 160-byte buffer for digest output allows to store highest known + * hash function (RadioGatun) with up to 1216 bits + */ + + /* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */ + plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; + + /* this limit allows for 14 segments */ + if (plen >= SEG6_HMAC_RING_SIZE) + return -EMSGSIZE; + + /* Let's build the HMAC text on the ring buffer. The text is composed + * as follows, in order: + * + * 1. Source IPv6 address (128 bits) + * 2. first_segment value (8 bits) + * 3. Flags (8 bits) + * 4. HMAC Key ID (32 bits) + * 5. All segments in the segments list (n * 128 bits) + */ + + local_bh_disable(); + ring = this_cpu_ptr(hmac_ring); + off = ring; + + /* source address */ + memcpy(off, saddr, 16); + off += 16; + + /* first_segment value */ + *off++ = hdr->first_segment; + + /* flags */ + *off++ = hdr->flags; + + /* HMAC Key ID */ + memcpy(off, &hmackeyid, 4); + off += 4; + + /* all segments in the list */ + for (i = 0; i < hdr->first_segment + 1; i++) { + memcpy(off, hdr->segments + i, 16); + off += 16; + } + + dgsize = __do_hmac(hinfo, ring, plen, tmp_out, + SEG6_HMAC_MAX_DIGESTSIZE); + local_bh_enable(); + + if (dgsize < 0) + return dgsize; + + wrsize = SEG6_HMAC_FIELD_LEN; + if (wrsize > dgsize) + wrsize = dgsize; + + memset(output, 0, SEG6_HMAC_FIELD_LEN); + memcpy(output, tmp_out, wrsize); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_compute); + +/* checks if an incoming SR-enabled packet's HMAC status matches + * the incoming policy. + * + * called with rcu_read_lock() + */ +bool seg6_hmac_validate_skb(struct sk_buff *skb) +{ + u8 hmac_output[SEG6_HMAC_FIELD_LEN]; + struct net *net = dev_net(skb->dev); + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; + + idev = __in6_dev_get(skb->dev); + + srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + tlv = seg6_get_tlv_hmac(srh); + + /* mandatory check but no tlv */ + if (idev->cnf.seg6_require_hmac > 0 && !tlv) + return false; + + /* no check */ + if (idev->cnf.seg6_require_hmac < 0) + return true; + + /* check only if present */ + if (idev->cnf.seg6_require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + return false; + + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + + if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + return false; + + return true; +} +EXPORT_SYMBOL(seg6_hmac_validate_skb); + +/* called with rcu_read_lock() */ +struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + + return hinfo; +} +EXPORT_SYMBOL(seg6_hmac_info_lookup); + +int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + int err; + + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_add); + +int seg6_hmac_info_del(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + int err = -ENOENT; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + if (!hinfo) + goto out; + + err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + if (err) + goto out; + + seg6_hinfo_release(hinfo); + +out: + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_del); + +int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh) +{ + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + int err = -ENOENT; + + tlv = seg6_get_tlv_hmac(srh); + if (!tlv) + return -EINVAL; + + rcu_read_lock(); + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + goto out; + + memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); + err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(seg6_push_hmac); + +static int seg6_hmac_init_algo(void) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + + for (i = 0; i < alg_count; i++) { + struct crypto_shash **p_tfm; + int shsize; + + algo = &hmac_algos[i]; + algo->tfms = alloc_percpu(struct crypto_shash *); + if (!algo->tfms) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); + *p_tfm = tfm; + } + + p_tfm = raw_cpu_ptr(algo->tfms); + tfm = *p_tfm; + + shsize = sizeof(*shash) + crypto_shash_descsize(tfm); + + algo->shashs = alloc_percpu(struct shash_desc *); + if (!algo->shashs) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + shash = kzalloc_node(shsize, GFP_KERNEL, + cpu_to_node(cpu)); + if (!shash) + return -ENOMEM; + *per_cpu_ptr(algo->shashs, cpu) = shash; + } + } + + return 0; +} + +int __init seg6_hmac_init(void) +{ + return seg6_hmac_init_algo(); +} +EXPORT_SYMBOL(seg6_hmac_init); + +int __net_init seg6_hmac_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_net_init); + +void seg6_hmac_exit(void) +{ + struct seg6_hmac_algo *algo = NULL; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + for_each_possible_cpu(cpu) { + struct crypto_shash *tfm; + struct shash_desc *shash; + + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); + free_percpu(algo->shashs); + } +} +EXPORT_SYMBOL(seg6_hmac_exit); + +void __net_exit seg6_hmac_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); +} +EXPORT_SYMBOL(seg6_hmac_net_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c new file mode 100644 index 000000000000..85582257d3af --- /dev/null +++ b/net/ipv6/seg6_iptunnel.c @@ -0,0 +1,436 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/seg6.h> +#include <linux/seg6.h> +#include <linux/seg6_iptunnel.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#ifdef CONFIG_DST_CACHE +#include <net/dst_cache.h> +#endif +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +struct seg6_lwt { +#ifdef CONFIG_DST_CACHE + struct dst_cache cache; +#endif + struct seg6_iptunnel_encap tuninfo[0]; +}; + +static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_lwt *)lwt->data; +} + +static inline struct seg6_iptunnel_encap * +seg6_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return seg6_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { + [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +static int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) +{ + struct seg6_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo, len); + + return 0; +} + +static void set_tun_src(struct net *net, struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct in6_addr *tun_src; + + rcu_read_lock(); + + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, + saddr); + } + + rcu_read_unlock(); +} + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + struct ipv6hdr *hdr, *inner_hdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, tot_len, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + + err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* inherit tc, flowlabel and hlim + * hlim will be decremented in ip6_forward() afterwards and + * decapsulation will overwrite inner hlim with outer hlim + */ + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = NEXTHDR_IPV6; + + hdr->daddr = isrh->segments[isrh->first_segment]; + set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +#ifdef CONFIG_IPV6_SEG6_INLINE +static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + + err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + + memmove(hdr, oldhdr, sizeof(*hdr)); + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh->segments[0] = hdr->daddr; + hdr->daddr = isrh->segments[isrh->first_segment]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + struct net *net = dev_net(skb_dst(skb)->dev); + + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + return 0; +} +#endif + +static int seg6_do_srh(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; + int err = 0; + + tinfo = seg6_encap_lwtunnel(dst->lwtstate); + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + switch (tinfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + err = seg6_do_srh_inline(skb, tinfo->srh); + skb_reset_inner_headers(skb); + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + err = seg6_do_srh_encap(skb, tinfo->srh); + break; + } + + if (err) + return err; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_set_inner_protocol(skb, skb->protocol); + + return 0; +} + +static int seg6_input(struct sk_buff *skb) +{ + int err; + + err = seg6_do_srh(skb); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); +} + +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; + int err = -EINVAL; + + err = seg6_do_srh(skb); + if (unlikely(err)) + goto drop; + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +#ifdef CONFIG_DST_CACHE + preempt_disable(); + dst = dst_cache_get(&slwt->cache); + preempt_enable(); +#endif + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + +#ifdef CONFIG_DST_CACHE + preempt_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + preempt_enable(); +#endif + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); +drop: + kfree_skb(skb); + return err; +} + +static int seg6_build_state(struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; + struct seg6_iptunnel_encap *tuninfo; + struct lwtunnel_state *newts; + int tuninfo_len, min_size; + struct seg6_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy); + + if (err < 0) + return err; + + if (!tb[SEG6_IPTUNNEL_SRH]) + return -EINVAL; + + tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); + tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); + + /* tuninfo must contain at least the iptunnel encap structure, + * the SRH and one segment + */ + min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + + sizeof(struct in6_addr); + if (tuninfo_len < min_size) + return -EINVAL; + + switch (tuninfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + break; + default: + return -EINVAL; + } + + /* verify that SRH is consistent */ + if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo))) + return -EINVAL; + + newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_lwt_lwtunnel(newts); + +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&slwt->cache, GFP_KERNEL); + if (err) { + kfree(newts); + return err; + } +#endif + + memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + + newts->type = LWTUNNEL_ENCAP_SEG6; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); + + *ts = newts; + + return 0; +} + +#ifdef CONFIG_DST_CACHE +static void seg6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); +} +#endif + +static int seg6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); +} + +static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); + struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); + + if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops seg6_iptun_ops = { + .build_state = seg6_build_state, +#ifdef CONFIG_DST_CACHE + .destroy_state = seg6_destroy_state, +#endif + .output = seg6_output, + .input = seg6_input, + .fill_encap = seg6_fill_encap_info, + .get_encap_size = seg6_encap_nlsize, + .cmp_encap = seg6_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init seg6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} + +void seg6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b1cdf8009d29..99853c6e33a8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -31,7 +31,7 @@ #include <linux/if_arp.h> #include <linux/icmp.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> @@ -76,7 +76,7 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; -static int sit_net_id __read_mostly; +static unsigned int sit_net_id __read_mostly; struct sit_net { struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; @@ -1318,23 +1318,11 @@ done: return err; } -static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - int t_hlen = tunnel->hlen + sizeof(struct iphdr); - - if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, - .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, }; @@ -1365,6 +1353,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = 0xFFF8 - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; @@ -1390,6 +1380,7 @@ static int ipip6_tunnel_init(struct net_device *dev) err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (err) { free_percpu(dev->tstats); + dev->tstats = NULL; return err; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..895ff650db43 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -16,7 +16,7 @@ #include <linux/tcp.h> #include <linux/random.h> -#include <linux/cryptohash.h> +#include <linux/siphash.h> #include <linux/kernel.h> #include <net/ipv6.h> #include <net/tcp.h> @@ -24,7 +24,7 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; +static siphash_key_t syncookie6_secret[2] __read_mostly; /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] @@ -41,30 +41,27 @@ static __u16 const msstab[] = { 9000 - 60, }; -static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); - -static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, +static u32 cookie_hash(const struct in6_addr *saddr, + const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) { - __u32 *tmp; + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + u32 count; + __be16 sport; + __be16 dport; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *saddr, + .daddr = *daddr, + .count = count, + .sport = sport, + .dport = dport + }; net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); - - tmp = this_cpu_ptr(ipv6_cookie_scratch); - - /* - * we have 320 bits of information to hash, copy in the remaining - * 192 bits required for sha_transform, from the syncookie6_secret - * and overwrite the digest with the secret - */ - memcpy(tmp + 10, syncookie6_secret[c], 44); - memcpy(tmp, saddr, 16); - memcpy(tmp + 4, daddr, 16); - tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; - tmp[9] = count; - sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5); - - return tmp[17]; + return siphash(&combined, offsetofend(typeof(combined), dport), + &syncookie6_secret[c]); } static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, @@ -209,6 +206,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->ts_off = 0; /* * We need to lookup the dst_entry to get the correct window size. @@ -227,6 +225,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b9f1fee9a886..49fa2e8c3fa9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) +static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); + tcp_hdr(skb)->source, tsoff); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -122,7 +122,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct flowi6 fl6; struct dst_entry *dst; int addr_type; + u32 seq; int err; + struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -148,8 +150,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 0x1; + if (ipv6_addr_any(&usin->sin6_addr)) { + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; + } addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -188,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * TCP over IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; @@ -233,6 +240,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -257,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); - if (tcp_death_row.sysctl_tw_recycle && + if (tcp_death_row->sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); @@ -272,17 +280,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - err = inet6_hash_connect(&tcp_death_row, sk); + err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; sk_set_txhash(sk); - if (!tp->write_seq && likely(!tp->repair)) - tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport); + if (likely(!tp->repair)) { + seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport, + &tp->tsoffset); + if (!tp->write_seq) + tp->write_seq = seq; + } + + if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto late_failure; err = tcp_connect(sk); if (err) @@ -292,7 +309,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); - __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; @@ -375,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } @@ -397,7 +415,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, - &tp->tsq_flags)) + &sk->sk_tsq_flags)) sock_hold(sk); goto out; } @@ -467,7 +485,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -828,6 +846,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST @@ -837,7 +856,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); + ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); @@ -954,7 +973,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_time_stamp + tcp_rsk(req)->ts_off, + req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } @@ -987,6 +1007,16 @@ drop: return 0; /* don't send reset */ } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, @@ -1138,10 +1168,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric_advmss(dst); - if (tcp_sk(sk)->rx_opt.user_mss && - tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) - newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); @@ -1178,8 +1205,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) + if (newnp->pktoptions) { + tcp_v6_restore_cb(newnp->pktoptions); skb_set_owner_r(newnp->pktoptions, newsk); + } } } @@ -1194,16 +1223,6 @@ out: return NULL; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -1616,7 +1635,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@ -1647,7 +1665,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), - .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, @@ -1740,7 +1757,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; @@ -1884,6 +1901,7 @@ struct proto tcpv6_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, @@ -1944,7 +1962,7 @@ static void __net_exit tcpv6_net_exit(struct net *net) static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6); + inet_twsk_purge(&tcp_hashinfo, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e4a8000d59ad..e28082f0a307 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -35,7 +35,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <net/addrconf.h> #include <net/ndisc.h> @@ -55,6 +55,16 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if defined(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_udp_l3mdev_accept && + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return true; +#endif + return false; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -103,7 +113,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; - return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); + return udp_lib_get_port(sk, snum, hash2_nulladdr); } static void udp_v6_rehash(struct sock *sk) @@ -118,7 +128,7 @@ static void udp_v6_rehash(struct sock *sk) static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, - int dif) + int dif, bool exact_dif) { int score; struct inet_sock *inet; @@ -149,7 +159,7 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -165,7 +175,7 @@ static int compute_score(struct sock *sk, struct net *net, static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, + bool exact_dif, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -176,7 +186,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -212,6 +222,7 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + bool exact_dif = udp6_lib_exact_dif_match(net, skb); int score, badness, matches = 0, reuseport = 0; u32 hash = 0; @@ -223,7 +234,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -239,7 +250,8 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, skb); + exact_dif, hslot2, + skb); } return result; } @@ -247,7 +259,8 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, + exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -302,7 +315,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); * Does increment socket refcount. */ #if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ - IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \ + IS_ENABLED(CONFIG_NF_SOCKET_IPV6) struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { @@ -334,7 +348,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; int is_udp4; - bool slow; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -344,8 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -364,7 +376,8 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { + if (copied < ulen || peeking || + (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; @@ -378,7 +391,6 @@ try_again: goto csum_copy_err; } if (unlikely(err)) { - trace_kfree_skb(skb, udpv6_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); if (is_udp4) @@ -388,7 +400,7 @@ try_again: UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - skb_free_datagram_locked(sk, skb); + kfree_skb(skb); return err; } if (!peeked) { @@ -427,7 +439,7 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); } else { if (np->rxopt.all) @@ -438,12 +450,11 @@ try_again: if (flags & MSG_TRUNC) err = ulen; - __skb_free_datagram_locked(sk, skb, peeking ? -err : err); + skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: - slow = lock_sock_fast(sk); - if (!skb_kill_datagram(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -456,7 +467,7 @@ csum_copy_err: UDP_MIB_INERRORS, is_udplite); } } - unlock_sock_fast(sk, slow); + kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -522,9 +533,11 @@ int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); + } else { + sk_mark_napi_id_once(sk, skb); } - rc = __sock_queue_rcv_skb(sk, skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -536,6 +549,7 @@ int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + return 0; } @@ -557,7 +571,6 @@ EXPORT_SYMBOL(udpv6_encap_enable); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); - int rc; int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -623,25 +636,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; udp_csum_pull_header(skb); - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - __UDP6_INC_STATS(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } skb_dst_drop(skb); - bh_lock_sock(sk); - rc = 0; - if (!sock_owned_by_user(sk)) - rc = __udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - - return rc; + return __udpv6_queue_rcv_skb(sk, skb); csum_error: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -1037,6 +1035,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = -1; ipc6.tclass = -1; ipc6.dontfrag = -1; + sockc.tsflags = sk->sk_tsflags; /* destination address check */ if (sin6) { @@ -1048,6 +1047,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; + if (ipv6_addr_any(daddr) && + ipv6_addr_v4mapped(&np->saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + daddr); break; case AF_INET: goto do_udp_sendmsg; @@ -1156,7 +1159,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; - sockc.tsflags = sk->sk_tsflags; + fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; @@ -1309,7 +1312,8 @@ out: return err; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; @@ -1434,12 +1438,12 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, + .init = udp_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = __udpv6_queue_rcv_skb, .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 2f5101a12283..2784cc363f2b 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -45,10 +45,11 @@ struct proto udplitev6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b5789562aded..08a807b29298 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -33,6 +33,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); + skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; @@ -44,6 +46,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return -1; + } + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_rcv_finish); @@ -69,18 +76,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, struct xfrm_state *x = NULL; int i = 0; - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + if (secpath_set(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 4e344105b3fd..4439ee44c8b0 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -47,6 +47,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -55,7 +56,8 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e0f71c01d728..79651bc71bf0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -25,8 +25,6 @@ #include <net/mip6.h> #endif -static struct xfrm_policy_afinfo xfrm6_policy_afinfo; - static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) @@ -220,7 +218,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - xfrm6_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } @@ -291,8 +289,7 @@ static struct dst_ops xfrm6_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { - .family = AF_INET6, +static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, @@ -305,7 +302,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { static int __init xfrm6_policy_init(void) { - return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); + return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 54d13f8dbbae..b2dc8ce49378 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -162,9 +162,8 @@ static const struct inet6_protocol ipcomp6_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -static struct xfrm_input_afinfo xfrm6_input_afinfo = { +static const struct xfrm_input_afinfo xfrm6_input_afinfo = { .family = AF_INET6, - .owner = THIS_MODULE, .callback = xfrm6_rcv_cb, }; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index e1c0bbe7996c..d7b731a78d09 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -44,7 +44,7 @@ struct xfrm6_tunnel_net { u32 spi; }; -static int xfrm6_tunnel_net_id __read_mostly; +static unsigned int xfrm6_tunnel_net_id __read_mostly; static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net) { return net_generic(net, xfrm6_tunnel_net_id); |