From 4c75431ac3520631f1d9e74aa88407e6374dbbc4 Mon Sep 17 00:00:00 2001 From: "Alexander Y. Fomichev" Date: Mon, 25 Aug 2014 16:26:45 +0400 Subject: net: prevent of emerging cross-namespace symlinks Code manipulating sysfs symlinks on adjacent net_devices(s) currently doesn't take into account that devices potentially belong to different namespaces. This patch trying to fix an issue as follows: - check for net_ns before creating / deleting symlink. for now only netdev_adjacent_rename_links and __netdev_adjacent_dev_remove are affected, afaics __netdev_adjacent_dev_insert implies both net_devs belong to the same namespace. - Drop all existing symlinks to / from all adj_devs before switching namespace and recreate them just after. Signed-off-by: Alexander Y. Fomichev Signed-off-by: David S. Miller --- net/core/dev.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index b65a5051361f..66738e9d66e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4889,7 +4889,8 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) + if (netdev_adjacent_is_neigh_list(dev, dev_list) && + net_eq(dev_net(dev),dev_net(adj_dev))) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); @@ -5159,11 +5160,65 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void netdev_adjacent_add_links(struct net_device *dev) +{ + struct netdev_adjacent *iter; + + struct net *net = dev_net(dev); + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_add(dev, iter->dev, + &dev->adj_list.upper); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_add(dev, iter->dev, + &dev->adj_list.lower); + } +} + +void netdev_adjacent_del_links(struct net_device *dev) +{ + struct netdev_adjacent *iter; + + struct net *net = dev_net(dev); + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_del(iter->dev, dev->name, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_del(dev, iter->dev->name, + &dev->adj_list.upper); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_del(iter->dev, dev->name, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_del(dev, iter->dev->name, + &dev->adj_list.lower); + } +} + void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) { struct netdev_adjacent *iter; + struct net *net = dev_net(dev); + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.lower); netdev_adjacent_sysfs_add(iter->dev, dev, @@ -5171,6 +5226,8 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) } list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.upper); netdev_adjacent_sysfs_add(iter->dev, dev, @@ -6773,6 +6830,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Send a netdev-removed uevent to the old namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); + netdev_adjacent_del_links(dev); /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -6787,6 +6845,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + netdev_adjacent_add_links(dev); /* Fixup kobjects */ err = device_rename(&dev->dev, dev->name); -- cgit v1.2.3 From db115037bb57cdfe97078b13da762213f7980e81 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Mon, 25 Aug 2014 15:16:22 +0200 Subject: net: fix checksum features handling in netif_skb_features() This is follow-up to da08143b8520 ("vlan: more careful checksum features handling") which introduced more careful feature intersection in vlan code, taking into account that HW_CSUM should be considered superset of IP_CSUM/IPV6_CSUM. The same is needed in netif_skb_features() in order to avoid offloading mismatch warning when vlan is created on top of a bond consisting of slaves supporting IP/IPv6 checksumming but not vlan Tx offloading. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/dev.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 66738e9d66e4..ab9a16530c36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2587,13 +2587,19 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + features = netdev_intersect_features(features, + skb->dev->vlan_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) - features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); return harmonize_features(skb, features); } -- cgit v1.2.3 From d9b2938aabf757da2d40153489b251d4fc3fdd18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2014 20:49:34 -0700 Subject: net: attempt a single high order allocation In commit ed98df3361f0 ("net: use __GFP_NORETRY for high order allocations") we tried to address one issue caused by order-3 allocations. We still observe high latencies and system overhead in situations where compaction is not successful. Instead of trying order-3, order-2, and order-1, do a single order-3 best effort and immediately fallback to plain order-0. This mimics slub strategy to fallback to slab min order if the high order allocation used for performance failed. Order-3 allocations give a performance boost only if they can be done without recurring and expensive memory scan. Quoting David : The page allocator relies on synchronous (sync light) memory compaction after direct reclaim for allocations that don't retry and deferred compaction doesn't work with this strategy because the allocation order is always decreasing from the previous failed attempt. This means sync light compaction will always be encountered if memory cannot be defragmented or reclaimed several times during the skb_page_frag_refill() iteration. Signed-off-by: Eric Dumazet Acked-by: David Rientjes Signed-off-by: David S. Miller --- net/core/sock.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 2714811afbd8..29870571c42f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1822,6 +1822,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, order); if (page) goto fill_page; + /* Do not retry other high order allocations */ + order = 1; + max_page_order = 0; } order--; } @@ -1869,10 +1872,8 @@ EXPORT_SYMBOL(sock_alloc_send_skb); * no guarantee that allocations succeed. Therefore, @sz MUST be * less or equal than PAGE_SIZE. */ -bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) +bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) { - int order; - if (pfrag->page) { if (atomic_read(&pfrag->page->_count) == 1) { pfrag->offset = 0; @@ -1883,20 +1884,21 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) put_page(pfrag->page); } - order = SKB_FRAG_PAGE_ORDER; - do { - gfp_t gfp = prio; - - if (order) - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; - pfrag->page = alloc_pages(gfp, order); + pfrag->offset = 0; + if (SKB_FRAG_PAGE_ORDER) { + pfrag->page = alloc_pages(gfp | __GFP_COMP | + __GFP_NOWARN | __GFP_NORETRY, + SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { - pfrag->offset = 0; - pfrag->size = PAGE_SIZE << order; + pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; return true; } - } while (--order >= 0); - + } + pfrag->page = alloc_page(gfp); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE; + return true; + } return false; } EXPORT_SYMBOL(skb_page_frag_refill); -- cgit v1.2.3 From e793c0f70e9bdf4a2e71c151a1a3cf85c4db92ad Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 4 Sep 2014 23:44:36 +0900 Subject: net: treewide: Fix typo found in DocBook/networking.xml This patch fix spelling typo found in DocBook/networking.xml. It is because the neworking.xml is generated from comments in the source, I have to fix typo in comments within the source. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- include/net/wimax.h | 2 +- include/trace/events/irq.h | 4 ++-- net/core/datagram.c | 2 +- net/core/gen_estimator.c | 2 +- net/core/gen_stats.c | 2 +- net/core/skbuff.c | 4 ++-- net/core/sock.c | 4 ++-- net/socket.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 38377392d082..c8e388e5fccc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3176,7 +3176,7 @@ static inline int __dev_uc_sync(struct net_device *dev, } /** - * __dev_uc_unsync - Remove synchonized addresses from device + * __dev_uc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * @@ -3220,7 +3220,7 @@ static inline int __dev_mc_sync(struct net_device *dev, } /** - * __dev_mc_unsync - Remove synchonized addresses from device + * __dev_mc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * diff --git a/include/net/wimax.h b/include/net/wimax.h index e52ef5357e08..c52b68577cb0 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -290,7 +290,7 @@ struct wimax_dev; * This operation has to be synchronous, and return only when the * reset is complete. In case of having had to resort to bus/cold * reset implying a device disconnection, the call is allowed to - * return inmediately. + * return immediately. * NOTE: wimax_dev->mutex is NOT locked when this op is being * called; however, wimax_dev->mutex_reset IS locked to ensure * serialization of calls to wimax_reset(). diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 1c09820df585..3608bebd3d9c 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -107,7 +107,7 @@ DECLARE_EVENT_CLASS(softirq, * @vec_nr: softirq vector number * * When used in combination with the softirq_exit tracepoint - * we can determine the softirq handler runtine. + * we can determine the softirq handler routine. */ DEFINE_EVENT(softirq, softirq_entry, @@ -121,7 +121,7 @@ DEFINE_EVENT(softirq, softirq_entry, * @vec_nr: softirq vector number * * When used in combination with the softirq_entry tracepoint - * we can determine the softirq handler runtine. + * we can determine the softirq handler routine. */ DEFINE_EVENT(softirq, softirq_exit, diff --git a/net/core/datagram.c b/net/core/datagram.c index 488dd1a825c0..fdbc9a81d4c2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -775,7 +775,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) EXPORT_SYMBOL(__skb_checksum_complete); /** - * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. + * skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec. * @skb: skbuff * @hlen: hardware length * @iov: io vector diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 6b5b6e7013ca..9d33dfffca19 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -197,7 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * as destination. A new timer with the interval specified in the * configuration TLV is created. Upon each interval, the latest statistics * will be read from &bstats and the estimated rate will be stored in - * &rate_est with the statistics lock grabed during this period. + * &rate_est with the statistics lock grabbed during this period. * * Returns 0 on success or a negative error code. * diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 9d3d9e78397b..2ddbce4cce14 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -206,7 +206,7 @@ EXPORT_SYMBOL(gnet_stats_copy_queue); * @st: application specific statistics data * @len: length of data * - * Appends the application sepecific statistics to the top level TLV created by + * Appends the application specific statistics to the top level TLV created by * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping * handle is in backward compatibility mode. * diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 163b673f9e62..da1378a3e2c7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2647,7 +2647,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read); * skb_seq_read() will return the remaining part of the block. * * Note 1: The size of each block of data returned can be arbitrary, - * this limitation is the cost for zerocopy seqeuental + * this limitation is the cost for zerocopy sequential * reads of potentially non linear data. * * Note 2: Fragment lists within fragments are not implemented @@ -2781,7 +2781,7 @@ EXPORT_SYMBOL(skb_find_text); /** * skb_append_datato_frags - append the user data to a skb * @sk: sock structure - * @skb: skb structure to be appened with user data. + * @skb: skb structure to be appended with user data. * @getfrag: call back function to be used for getting the user data * @from: pointer to user message iov * @length: length of the iov message diff --git a/net/core/sock.c b/net/core/sock.c index 29870571c42f..d372b4bd3f99 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(sk_ns_capable); /** * sk_capable - Socket global capability test * @sk: Socket to use a capability on or through - * @cap: The global capbility to use + * @cap: The global capability to use * * Test to see if the opener of the socket had when the socket was * created and the current process has the capability @cap in all user @@ -183,7 +183,7 @@ EXPORT_SYMBOL(sk_capable); * @sk: Socket to use a capability on or through * @cap: The capability to use * - * Test to see if the opener of the socket had when the socke was created + * Test to see if the opener of the socket had when the socket was created * and the current process has the capability @cap over the network namespace * the socket is a member of. */ diff --git a/net/socket.c b/net/socket.c index 4eb09b34b2d3..2e2586e2dee1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2601,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * socket interface. The value ops->family coresponds to the + * socket interface. The value ops->family corresponds to the * socket system call protocol family. */ int sock_register(const struct net_proto_family *ops) -- cgit v1.2.3