summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c54
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c9
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c42
-rw-r--r--net/ipv4/tunnel4.c29
-rw-r--r--net/ipv4/udp.c6
27 files changed, 309 insertions, 210 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
hlist_del(node);
fib_table_flush(tb);
- kfree(tb);
+ fib_free_table(tb);
}
}
kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 43e1c594ce8f..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -120,11 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
struct fib_node *f;
hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
- struct hlist_head __rcu *new_head;
+ struct hlist_head *new_head;
hlist_del_rcu(&f->fn_hash);
- new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+ new_head = rcu_dereference_protected(fz->fz_hash, 1) +
+ fn_hash(f->fn_key, fz);
hlist_add_head_rcu(&f->fn_hash, new_head);
}
}
@@ -179,8 +180,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
memcpy(&nfz, fz, sizeof(nfz));
write_seqlock_bh(&fz->fz_lock);
- old_ht = fz->fz_hash;
- nfz.fz_hash = ht;
+ old_ht = rcu_dereference_protected(fz->fz_hash, 1);
+ RCU_INIT_POINTER(nfz.fz_hash, ht);
nfz.fz_hashmask = new_hashmask;
nfz.fz_divisor = new_divisor;
fn_rebuild_zone(&nfz, old_ht, old_divisor);
@@ -236,7 +237,7 @@ fn_new_zone(struct fn_hash *table, int z)
seqlock_init(&fz->fz_lock);
fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
fz->fz_hashmask = fz->fz_divisor - 1;
- fz->fz_hash = fz->fz_embedded_hash;
+ RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
fz->fz_order = z;
fz->fz_revorder = 32 - z;
fz->fz_mask = inet_make_mask(z);
@@ -272,7 +273,7 @@ int fib_table_lookup(struct fib_table *tb,
for (fz = rcu_dereference(t->fn_zone_list);
fz != NULL;
fz = rcu_dereference(fz->fz_next)) {
- struct hlist_head __rcu *head;
+ struct hlist_head *head;
struct hlist_node *node;
struct fib_node *f;
__be32 k;
@@ -282,7 +283,7 @@ int fib_table_lookup(struct fib_table *tb,
seq = read_seqbegin(&fz->fz_lock);
k = fz_key(flp->fl4_dst, fz);
- head = &fz->fz_hash[fn_hash(k, fz)];
+ head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
hlist_for_each_entry_rcu(f, node, head, fn_hash) {
if (f->fn_key != k)
continue;
@@ -311,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
struct fib_info *last_resort;
struct fn_hash *t = (struct fn_hash *)tb->tb_data;
struct fn_zone *fz = t->fn_zones[0];
+ struct hlist_head *head;
if (fz == NULL)
return;
@@ -320,7 +322,8 @@ void fib_table_select_default(struct fib_table *tb,
order = -1;
rcu_read_lock();
- hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
+ head = rcu_dereference(fz->fz_hash);
+ hlist_for_each_entry_rcu(f, node, head, fn_hash) {
struct fib_alias *fa;
list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
@@ -374,7 +377,7 @@ out:
/* Insert node F to FZ. */
static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
{
- struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
hlist_add_head_rcu(&f->fn_hash, head);
}
@@ -382,7 +385,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
/* Return the node in FZ matching KEY. */
static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
{
- struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
struct hlist_node *node;
struct fib_node *f;
@@ -662,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
static int fn_flush_list(struct fn_zone *fz, int idx)
{
- struct hlist_head *head = &fz->fz_hash[idx];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
struct hlist_node *node, *n;
struct fib_node *f;
int found = 0;
@@ -713,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
return found;
}
+void fib_free_table(struct fib_table *tb)
+{
+ struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+ struct fn_zone *fz, *next;
+
+ next = table->fn_zone_list;
+ while (next != NULL) {
+ fz = next;
+ next = fz->fz_next;
+
+ if (fz->fz_hash != fz->fz_embedded_hash)
+ fz_hash_free(fz->fz_hash, fz->fz_divisor);
+
+ kfree(fz);
+ }
+
+ kfree(tb);
+}
static inline int
fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -761,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
struct fn_zone *fz)
{
int h, s_h;
+ struct hlist_head *head = rcu_dereference(fz->fz_hash);
- if (fz->fz_hash == NULL)
+ if (head == NULL)
return skb->len;
s_h = cb->args[3];
for (h = s_h; h < fz->fz_divisor; h++) {
- if (hlist_empty(&fz->fz_hash[h]))
+ if (hlist_empty(head + h))
continue;
- if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) {
+ if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
cb->args[3] = h;
return -1;
}
@@ -872,7 +894,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
if (!iter->zone->fz_nent)
continue;
- iter->hash_head = iter->zone->fz_hash;
+ iter->hash_head = rcu_dereference(iter->zone->fz_hash);
maxslot = iter->zone->fz_divisor;
for (iter->bucket = 0; iter->bucket < maxslot;
@@ -957,7 +979,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
goto out;
iter->bucket = 0;
- iter->hash_head = iter->zone->fz_hash;
+ iter->hash_head = rcu_dereference(iter->zone->fz_hash);
hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
list_for_each_entry(fa, &fn->fn_alias, fa_list) {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c8..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
{
- if (res->fi != NULL)
- fib_info_put(res->fi);
+ /* we used to play games with refcounts, but we now use RCU */
res->fi = fi;
- if (fi != NULL)
- atomic_inc(&fi->fib_clntref);
}
#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..0f280348e0fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size)
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
else
- return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ return vzalloc(size);
}
static void __tnode_vfree(struct work_struct *arg)
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
return found;
}
+void fib_free_table(struct fib_table *tb)
+{
+ kfree(tb);
+}
+
void fib_table_select_default(struct fib_table *tb,
const struct flowi *flp,
struct fib_result *res)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
#include <net/gre.h>
-static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
static DEFINE_SPINLOCK(gre_proto_lock);
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
goto err_out;
spin_lock(&gre_proto_lock);
- if (gre_proto[version] != proto)
+ if (rcu_dereference_protected(gre_proto[version],
+ lockdep_is_held(&gre_proto_lock)) != proto)
goto err_out_unlock;
rcu_assign_pointer(gre_proto[version], NULL);
spin_unlock(&gre_proto_lock);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a3..e5d1a44bcbdf 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,6 +569,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
/* No need to clone since we're just using its address. */
rt2 = rt;
+ if (!fl.nl_u.ip4_u.saddr)
+ fl.nl_u.ip4_u.saddr = rt->rt_src;
+
err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
switch (err) {
case 0:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
(void) ip_mc_leave_src(sk, iml, in_dev);
- if (in_dev != NULL) {
+ if (in_dev != NULL)
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
- in_dev_put(in_dev);
- }
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba8042665849..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
struct inet_sock *inet = inet_sk(sk);
entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
entry.dport = ntohs(inet->inet_dport);
entry.userlocks = sk->sk_userlocks;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- struct rtattr *bc = NULL;
+ const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
- bc = (struct rtattr *)(r + 1);
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(RTA_DATA(bc),
- RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc),
+ nla_len(bc), &entry))
continue;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b463..3c0369a3a663 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
}
}
}
- sk_add_bind_node(child, &tb->owners);
- inet_csk(child)->icsk_bind_hash = tb;
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
#define node_height(x) x->avl_height
#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
+#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
static const struct inet_peer peer_fake_node = {
- .avl_left = peer_avl_empty,
- .avl_right = peer_avl_empty,
+ .avl_left = peer_avl_empty_rcu,
+ .avl_right = peer_avl_empty_rcu,
.avl_height = 0
};
static struct {
- struct inet_peer *root;
+ struct inet_peer __rcu *root;
spinlock_t lock;
int total;
} peers = {
- .root = peer_avl_empty,
+ .root = peer_avl_empty_rcu,
.lock = __SPIN_LOCK_UNLOCKED(peers.lock),
.total = 0,
};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
*/
#define lookup(_daddr, _stack) \
({ \
- struct inet_peer *u, **v; \
+ struct inet_peer *u; \
+ struct inet_peer __rcu **v; \
\
stackptr = _stack; \
*stackptr++ = &peers.root; \
- for (u = peers.root; u != peer_avl_empty; ) { \
+ for (u = rcu_dereference_protected(peers.root, \
+ lockdep_is_held(&peers.lock)); \
+ u != peer_avl_empty; ) { \
if (_daddr == u->v4daddr) \
break; \
if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
else \
v = &u->avl_right; \
*stackptr++ = v; \
- u = *v; \
+ u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
} \
u; \
})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
/* Called with local BH disabled and the pool lock held. */
#define lookup_rightempty(start) \
({ \
- struct inet_peer *u, **v; \
+ struct inet_peer *u; \
+ struct inet_peer __rcu **v; \
*stackptr++ = &start->avl_left; \
v = &start->avl_left; \
- for (u = *v; u->avl_right != peer_avl_empty; ) { \
+ for (u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
+ u->avl_right != peer_avl_empty_rcu; ) { \
v = &u->avl_right; \
*stackptr++ = v; \
- u = *v; \
+ u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
} \
u; \
})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
* Variable names are the proof of operation correctness.
* Look into mm/map_avl.c for more detail description of the ideas.
*/
-static void peer_avl_rebalance(struct inet_peer **stack[],
- struct inet_peer ***stackend)
+static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
+ struct inet_peer __rcu ***stackend)
{
- struct inet_peer **nodep, *node, *l, *r;
+ struct inet_peer __rcu **nodep;
+ struct inet_peer *node, *l, *r;
int lh, rh;
while (stackend > stack) {
nodep = *--stackend;
- node = *nodep;
- l = node->avl_left;
- r = node->avl_right;
+ node = rcu_dereference_protected(*nodep,
+ lockdep_is_held(&peers.lock));
+ l = rcu_dereference_protected(node->avl_left,
+ lockdep_is_held(&peers.lock));
+ r = rcu_dereference_protected(node->avl_right,
+ lockdep_is_held(&peers.lock));
lh = node_height(l);
rh = node_height(r);
if (lh > rh + 1) { /* l: RH+2 */
struct inet_peer *ll, *lr, *lrl, *lrr;
int lrh;
- ll = l->avl_left;
- lr = l->avl_right;
+ ll = rcu_dereference_protected(l->avl_left,
+ lockdep_is_held(&peers.lock));
+ lr = rcu_dereference_protected(l->avl_right,
+ lockdep_is_held(&peers.lock));
lrh = node_height(lr);
if (lrh <= node_height(ll)) { /* ll: RH+1 */
- node->avl_left = lr; /* lr: RH or RH+1 */
- node->avl_right = r; /* r: RH */
+ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
+ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = lrh + 1; /* RH+1 or RH+2 */
- l->avl_left = ll; /* ll: RH+1 */
- l->avl_right = node; /* node: RH+1 or RH+2 */
+ RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
+ RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
l->avl_height = node->avl_height + 1;
- *nodep = l;
+ RCU_INIT_POINTER(*nodep, l);
} else { /* ll: RH, lr: RH+1 */
- lrl = lr->avl_left; /* lrl: RH or RH-1 */
- lrr = lr->avl_right; /* lrr: RH or RH-1 */
- node->avl_left = lrr; /* lrr: RH or RH-1 */
- node->avl_right = r; /* r: RH */
+ lrl = rcu_dereference_protected(lr->avl_left,
+ lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
+ lrr = rcu_dereference_protected(lr->avl_right,
+ lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
+ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
+ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = rh + 1; /* node: RH+1 */
- l->avl_left = ll; /* ll: RH */
- l->avl_right = lrl; /* lrl: RH or RH-1 */
+ RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
+ RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
l->avl_height = rh + 1; /* l: RH+1 */
- lr->avl_left = l; /* l: RH+1 */
- lr->avl_right = node; /* node: RH+1 */
+ RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
+ RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
lr->avl_height = rh + 2;
- *nodep = lr;
+ RCU_INIT_POINTER(*nodep, lr);
}
} else if (rh > lh + 1) { /* r: LH+2 */
struct inet_peer *rr, *rl, *rlr, *rll;
int rlh;
- rr = r->avl_right;
- rl = r->avl_left;
+ rr = rcu_dereference_protected(r->avl_right,
+ lockdep_is_held(&peers.lock));
+ rl = rcu_dereference_protected(r->avl_left,
+ lockdep_is_held(&peers.lock));
rlh = node_height(rl);
if (rlh <= node_height(rr)) { /* rr: LH+1 */
- node->avl_right = rl; /* rl: LH or LH+1 */
- node->avl_left = l; /* l: LH */
+ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
+ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = rlh + 1; /* LH+1 or LH+2 */
- r->avl_right = rr; /* rr: LH+1 */
- r->avl_left = node; /* node: LH+1 or LH+2 */
+ RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
+ RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
r->avl_height = node->avl_height + 1;
- *nodep = r;
+ RCU_INIT_POINTER(*nodep, r);
} else { /* rr: RH, rl: RH+1 */
- rlr = rl->avl_right; /* rlr: LH or LH-1 */
- rll = rl->avl_left; /* rll: LH or LH-1 */
- node->avl_right = rll; /* rll: LH or LH-1 */
- node->avl_left = l; /* l: LH */
+ rlr = rcu_dereference_protected(rl->avl_right,
+ lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
+ rll = rcu_dereference_protected(rl->avl_left,
+ lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
+ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
+ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = lh + 1; /* node: LH+1 */
- r->avl_right = rr; /* rr: LH */
- r->avl_left = rlr; /* rlr: LH or LH-1 */
+ RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
+ RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
r->avl_height = lh + 1; /* r: LH+1 */
- rl->avl_right = r; /* r: LH+1 */
- rl->avl_left = node; /* node: LH+1 */
+ RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
+ RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
rl->avl_height = lh + 2;
- *nodep = rl;
+ RCU_INIT_POINTER(*nodep, rl);
}
} else {
node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
#define link_to_pool(n) \
do { \
n->avl_height = 1; \
- n->avl_left = peer_avl_empty; \
- n->avl_right = peer_avl_empty; \
- smp_wmb(); /* lockless readers can catch us now */ \
- **--stackptr = n; \
+ n->avl_left = peer_avl_empty_rcu; \
+ n->avl_right = peer_avl_empty_rcu; \
+ /* lockless readers can catch us now */ \
+ rcu_assign_pointer(**--stackptr, n); \
peer_avl_rebalance(stack, stackptr); \
} while (0)
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
* We use refcnt=-1 to alert lockless readers this entry is deleted.
*/
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
- struct inet_peer **stack[PEER_MAXDEPTH];
- struct inet_peer ***stackptr, ***delp;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH];
+ struct inet_peer __rcu ***stackptr, ***delp;
if (lookup(p->v4daddr, stack) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
- if (p->avl_left == peer_avl_empty) {
+ if (p->avl_left == peer_avl_empty_rcu) {
*delp[0] = p->avl_right;
--stackptr;
} else {
/* look for a node to insert instead of p */
struct inet_peer *t;
t = lookup_rightempty(p);
- BUG_ON(*stackptr[-1] != t);
+ BUG_ON(rcu_dereference_protected(*stackptr[-1],
+ lockdep_is_held(&peers.lock)) != t);
**--stackptr = t->avl_left;
/* t is removed, t->v4daddr > x->v4daddr for any
* x in p->avl_left subtree.
* Put t in the old place of p. */
- *delp[0] = t;
+ RCU_INIT_POINTER(*delp[0], t);
t->avl_left = p->avl_left;
t->avl_right = p->avl_right;
t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
struct inet_peer *inet_getpeer(__be32 daddr, int create)
{
struct inet_peer *p;
- struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
/* Look up for the address quickly, lockless.
* Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ffcbe369b7..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1072,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
ipgre_tunnel_unlink(ign, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
t->parms.i_key = p.i_key;
@@ -1324,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@@ -1335,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
tunnel->hlen = sizeof(struct iphdr) + 4;
dev_hold(dev);
- rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
}
@@ -1382,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
if ((err = register_netdev(ign->fb_tunnel_dev)))
goto err_reg_dev;
+ rcu_assign_pointer(ign->tunnels_wc[0],
+ netdev_priv(ign->fb_tunnel_dev));
return 0;
err_reg_dev:
- free_netdev(ign->fb_tunnel_dev);
+ ipgre_dev_free(ign->fb_tunnel_dev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
but receiver should be enough clever f.e. to forward mtrace requests,
sent to multicast group to reach destination designated router.
*/
-struct ip_ra_chain *ip_ra_chain;
+struct ip_ra_chain __rcu *ip_ra_chain;
static DEFINE_SPINLOCK(ip_ra_lock);
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
int ip_ra_control(struct sock *sk, unsigned char on,
void (*destructor)(struct sock *))
{
- struct ip_ra_chain *ra, *new_ra, **rap;
+ struct ip_ra_chain *ra, *new_ra;
+ struct ip_ra_chain __rcu **rap;
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
spin_lock_bh(&ip_ra_lock);
- for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
+ for (rap = &ip_ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&ip_ra_lock))) != NULL;
+ rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e9b816e6cd73..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -676,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
}
t = netdev_priv(dev);
ipip_tunnel_unlink(ipn, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace0..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d80..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e43..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
return rcu_dereference(nf_nat_protos[protonum]);
}
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
- const struct nf_nat_protocol *p;
-
- rcu_read_lock();
- p = __nf_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &nf_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
- module_put(p->me);
-}
-
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
+static const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+ const struct nf_nat_protocol *p;
+
+ rcu_read_lock();
+ p = __nf_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &nf_nat_unknown_protocol;
+ rcu_read_unlock();
+
+ return p;
+}
+
+static void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+ module_put(p->me);
+}
+
static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..b14ec7d03b6e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
local_bh_enable();
socket_seq_show(seq);
- seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+ seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d mem %d\n",
+ atomic_long_read(&tcp_memory_allocated));
+ seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_read(&udp_memory_allocated));
+ atomic_long_read(&udp_memory_allocated));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
@@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
+ SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
+const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
/*
* Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int hash = protocol & (MAX_INET_PROTOS - 1);
- return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
+ return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+ NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet_add_protocol);
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
+ ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+ prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
*/
struct rt_hash_bucket {
- struct rtable *chain;
+ struct rtable __rcu *chain;
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- if (!rt_hash_table[st->bucket].chain)
+ if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
continue;
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
{
struct rt_cache_iter_state *st = seq->private;
- r = r->dst.rt_next;
+ r = rcu_dereference_bh(r->dst.rt_next);
while (!r) {
rcu_read_unlock_bh();
do {
if (--st->bucket < 0)
return NULL;
- } while (!rt_hash_table[st->bucket].chain);
+ } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
rcu_read_lock_bh();
- r = rt_hash_table[st->bucket].chain;
+ r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
}
- return rcu_dereference_bh(r);
+ return r;
}
static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
cond_resched();
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_raw(rt_hash_table[i].chain);
if (!rth)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
- struct rtable ** prev, * p;
+ struct rtable __rcu **prev;
+ struct rtable *p;
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */
- for (tail = rth; tail; tail = tail->dst.rt_next)
+ for (tail = rth; tail;
+ tail = rcu_dereference_protected(tail->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
/* call rt_free on entries after the tail requiring flush */
prev = &rt_hash_table[i].chain;
- for (p = *prev; p; p = next) {
- next = p->dst.rt_next;
+ for (p = rcu_dereference_protected(*prev,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ p != NULL;
+ p = next) {
+ next = rcu_dereference_protected(p->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
} else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
}
}
#else
- rth = rt_hash_table[i].chain;
- rt_hash_table[i].chain = NULL;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) {
- next = rth->dst.rt_next;
+ next = rcu_dereference_protected(rth->dst.rt_next, 1);
rt_free(rth);
}
}
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
while (aux != rth) {
if (compare_hash_inputs(&aux->fl, &rth->fl))
return 0;
- aux = aux->dst.rt_next;
+ aux = rcu_dereference_protected(aux->dst.rt_next, 1);
}
return ONE;
}
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
samples++;
- if (*rthp == NULL)
+ if (rcu_dereference_raw(*rthp) == NULL)
continue;
length = 0;
spin_lock_bh(rt_hash_lock_addr(i));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
prefetch(rth->dst.rt_next);
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
static unsigned long last_gc;
static int rover;
static int equilibrium;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long now = jiffies;
int goal;
int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
k = (k + 1) & rt_hash_mask;
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
while (rth) {
length += has_noalias(head, rth);
- rth = rth->dst.rt_next;
+ rth = rcu_dereference_protected(rth->dst.rt_next, 1);
}
return length >> FRACT_BITS;
}
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb, int ifindex)
{
- struct rtable *rth, **rthp;
+ struct rtable *rth, *cand;
+ struct rtable __rcu **rthp, **candp;
unsigned long now;
- struct rtable *cand, **candp;
u32 min_score;
int chain_length;
int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
static void rt_del(unsigned hash, struct rtable *rt)
{
- struct rtable **rthp, *aux;
+ struct rtable __rcu **rthp;
+ struct rtable *aux;
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
- while ((aux = *rthp) != NULL) {
+ while ((aux = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->dst.rt_next;
rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
{
int i, k;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
rt_genid(net));
- rthp=&rt_hash_table[hash].chain;
+ rthp = &rt_hash_table[hash].chain;
while ((rth = rcu_dereference(*rthp)) != NULL) {
struct rtable *rt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..1b4ec21497a4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_adv_win_scale_min = -31;
+static int tcp_adv_win_scale_max = 31;
/* Update system visible IP port range */
static void set_local_port_range(int range[2])
@@ -398,7 +400,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "tcp_wmem",
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_adv_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_adv_win_scale_min,
+ .extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_tw_reuse",
@@ -602,8 +606,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..f15c36a706ec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
-atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < 8 || val > MAX_TCP_WINDOW) {
+ if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
err = -EINVAL;
break;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353d..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
sizeof(struct sk_buff);
- if (sk->sk_sndbuf < 3 * sndmem)
- sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+ if (sk->sk_sndbuf < 3 * sndmem) {
+ sk->sk_sndbuf = 3 * sndmem;
+ if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+ sk->sk_sndbuf = sysctl_tcp_wmem[2];
+ }
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d41682..e13da6de1fc7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
!icsk->icsk_backoff)
break;
+ if (sock_owned_by_user(sk))
+ break;
+
icsk->icsk_backoff--;
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (remaining) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
- } else if (sock_owned_by_user(sk)) {
- /* RTO revert clocked out retransmission,
- * but socket is locked. Will defer. */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- HZ/20, TCP_RTO_MAX);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now */
@@ -2045,7 +2043,9 @@ get_req:
}
get_sk:
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (sk->sk_family == st->family) {
cur = sk;
goto out;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 43cf901d7659..a66735f75963 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -347,7 +347,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
}
tcp_update_metrics(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf36763..61c2463e2753 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,11 +231,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* when initializing use the value from init_rcv_wnd
* rather than the default from above
*/
- if (init_rcv_wnd &&
- (*rcv_wnd > init_rcv_wnd * mss))
- *rcv_wnd = init_rcv_wnd * mss;
- else if (*rcv_wnd > init_cwnd * mss)
- *rcv_wnd = init_cwnd * mss;
+ if (init_rcv_wnd)
+ *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
+ else
+ *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
}
/* Set the clamp no higher than max representable value */
@@ -386,27 +385,30 @@ struct tcp_out_options {
*/
static u8 tcp_cookie_size_check(u8 desired)
{
- if (desired > 0) {
+ int cookie_size;
+
+ if (desired > 0)
/* previously specified */
return desired;
- }
- if (sysctl_tcp_cookie_size <= 0) {
+
+ cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
+ if (cookie_size <= 0)
/* no default specified */
return 0;
- }
- if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) {
+
+ if (cookie_size <= TCP_COOKIE_MIN)
/* value too small, specify minimum */
return TCP_COOKIE_MIN;
- }
- if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) {
+
+ if (cookie_size >= TCP_COOKIE_MAX)
/* value too large, specify maximum */
return TCP_COOKIE_MAX;
- }
- if (0x1 & sysctl_tcp_cookie_size) {
+
+ if (cookie_size & 1)
/* 8-bit multiple, illegal, fix it */
- return (u8)(sysctl_tcp_cookie_size + 0x1);
- }
- return (u8)sysctl_tcp_cookie_size;
+ cookie_size++;
+
+ return (u8)cookie_size;
}
/* Write previously computed TCP options to the packet.
@@ -1513,6 +1515,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight;
+ int win_divisor;
if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
goto send_now;
@@ -1544,13 +1547,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- if (sysctl_tcp_tso_win_divisor) {
+ win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+ if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
*/
- chunk /= sysctl_tcp_tso_win_divisor;
+ chunk /= win_divisor;
if (limit >= chunk)
goto send_now;
} else {
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
#include <net/protocol.h>
#include <net/xfrm.h>
-static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
-static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
-static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
{
- struct xfrm_tunnel **pprev;
+ struct xfrm_tunnel __rcu **pprev;
+ struct xfrm_tunnel *t;
+
int ret = -EEXIST;
int priority = handler->priority;
mutex_lock(&tunnel4_mutex);
- for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
- if ((*pprev)->priority > priority)
+ for (pprev = fam_handlers(family);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel4_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
break;
- if ((*pprev)->priority == priority)
+ if (t->priority == priority)
goto err;
}
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
{
- struct xfrm_tunnel **pprev;
+ struct xfrm_tunnel __rcu **pprev;
+ struct xfrm_tunnel *t;
int ret = -ENOENT;
mutex_lock(&tunnel4_mutex);
- for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
- if (*pprev == handler) {
+ for (pprev = fam_handlers(family);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel4_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
*pprev = handler->next;
ret = 0;
break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..5e0a3a582a59 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
-int sysctl_udp_mem[3] __read_mostly;
+long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
int sysctl_udp_wmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_wmem_min);
-atomic_t udp_memory_allocated;
+atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter) {
+ if (rcu_dereference_raw(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto drop;
}