From e8058a49e67fe7bc7e4a0308851a3ca3a6d2e45d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 20:31:45 +0100 Subject: netlink: introduce type-checking attribute iteration There are, especially with multi-attr arrays, many cases of needing to iterate all attributes of a specific type in a netlink message or a nested attribute. Add specific macros to support that case. Also convert many instances using this spatch: @@ iterator nla_for_each_attr; iterator name nla_for_each_attr_type; identifier nla; expression head, len, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_attr(nla, head, len, rem) +nla_for_each_attr_type(nla, ATTR, head, len, rem) { <... T x; ...> -if (nla_type(nla) == ATTR) { ... -} } @@ identifier nla; iterator nla_for_each_nested; iterator name nla_for_each_nested_type; expression attr, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_nested(nla, attr, rem) +nla_for_each_nested_type(nla, ATTR, attr, rem) { <... T x; ...> -if (nla_type(nla) == ATTR) { ... -} } @@ iterator nla_for_each_attr; iterator name nla_for_each_attr_type; identifier nla; expression head, len, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_attr(nla, head, len, rem) +nla_for_each_attr_type(nla, ATTR, head, len, rem) { <... T x; ...> -if (nla_type(nla) != ATTR) continue; ... } @@ identifier nla; iterator nla_for_each_nested; iterator name nla_for_each_nested_type; expression attr, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_nested(nla, attr, rem) +nla_for_each_nested_type(nla, ATTR, attr, rem) { <... T x; ...> -if (nla_type(nla) != ATTR) continue; ... } Although I had to undo one bad change this made, and I also adjusted some other code for whitespace and to use direct variable initialization now. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20240328203144.b5a6c895fb80.I1869b44767379f204998ff44dd239803f39c23e0@changeid Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a3d7847ce69d..283e42f48af6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5245,15 +5245,14 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { - if (nla_len(attr) < sizeof(flags)) - return -EINVAL; + nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec, + rem) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; - have_flags = true; - flags = nla_get_u16(attr); - break; - } + have_flags = true; + flags = nla_get_u16(attr); + break; } } -- cgit v1.2.3 From 136c2a9a2a8760d8dae83ae7c882c50be02bdb63 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 May 2024 11:37:47 +0000 Subject: rtnetlink: change rtnl_stats_dump() return value By returning 0 (or an error) instead of skb->len, we allow NLMSG_DONE to be appended to the current skb at the end of a dump, saving a couple of recvmsg() system calls. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240502113748.1622637-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 283e42f48af6..88980c8bcf33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6024,7 +6024,7 @@ out: cb->args[1] = idx; cb->args[0] = h; - return skb->len; + return err; } void rtnl_offload_xstats_notify(struct net_device *dev) -- cgit v1.2.3 From 0feb396f7428b95710ea72c1dc33ae363019fae5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 May 2024 11:37:48 +0000 Subject: rtnetlink: use for_each_netdev_dump() in rtnl_stats_dump() Switch rtnl_stats_dump() to use for_each_netdev_dump() instead of net->dev_index_head[] hash table. This makes the code much easier to read, and fixes scalability issues. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240502113748.1622637-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 59 ++++++++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 88980c8bcf33..28050e53ecb0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5961,19 +5961,17 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; - int h, s_h, err, s_idx, s_idxattr, s_prividx; struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; struct if_stats_msg *ifsm; - struct hlist_head *head; + struct { + unsigned long ifindex; + int idxattr; + int prividx; + } *ctx = (void *)cb->ctx; struct net_device *dev; - int idx = 0; - - s_h = cb->args[0]; - s_idx = cb->args[1]; - s_idxattr = cb->args[2]; - s_prividx = cb->args[3]; + int err; cb->seq = net->dev_base_seq; @@ -5992,37 +5990,24 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - flags, &filters, - &s_idxattr, &s_prividx, - extack); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + for_each_netdev_dump(net, dev, ctx->ifindex) { + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + flags, &filters, + &ctx->idxattr, &ctx->prividx, + extack); + /* If we ran out of room on the first message, + * we're in trouble. + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err < 0) - goto out; - s_prividx = 0; - s_idxattr = 0; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } + if (err < 0) + break; + ctx->prividx = 0; + ctx->idxattr = 0; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } -out: - cb->args[3] = s_prividx; - cb->args[2] = s_idxattr; - cb->args[1] = idx; - cb->args[0] = h; return err; } -- cgit v1.2.3 From 698419ffb6fc83dd7b0359d9e8476e732967eed2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:52 +0000 Subject: rtnetlink: do not depend on RTNL for IFLA_QDISC output dev->qdisc can be read using RCU protection. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 28050e53ecb0..ba5301d6df84 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1832,7 +1832,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure; - qdisc = rtnl_dereference(dev->qdisc); if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u8(skb, IFLA_OPERSTATE, @@ -1857,8 +1856,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, #endif put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || - (qdisc && - nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + @@ -1924,6 +1921,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; rcu_read_lock(); + qdisc = rcu_dereference(dev->qdisc); + if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) + goto nla_put_failure_rcu; if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) goto nla_put_failure_rcu; if (rtnl_fill_link_ifmap(skb, dev)) -- cgit v1.2.3 From 8a58268133622c3d50155ac5798ad1d51d6bd3be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:53 +0000 Subject: rtnetlink: do not depend on RTNL for IFLA_IFNAME output We can use netdev_copy_name() to no longer rely on RTNL to fetch dev->name. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ba5301d6df84..eb1a151c6910 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1812,6 +1812,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, u32 event, int *new_nsid, int new_ifindex, int tgt_netnsid, gfp_t gfp) { + char devname[IFNAMSIZ]; struct ifinfomsg *ifm; struct nlmsghdr *nlh; struct Qdisc *qdisc; @@ -1832,8 +1833,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure; - if (nla_put_string(skb, IFLA_IFNAME, dev->name) || - nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || + netdev_copy_name(dev, devname); + if (nla_put_string(skb, IFLA_IFNAME, devname)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || -- cgit v1.2.3 From ad13b5b0d1f9eb8e048394919e6393e520b14552 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:54 +0000 Subject: rtnetlink: do not depend on RTNL for IFLA_TXQLEN output rtnl_fill_ifinfo() can read dev->tx_queue_len locklessly, granted we add corresponding READ_ONCE()/WRITE_ONCE() annotations. Add missing READ_ONCE(dev->tx_queue_len) in teql_enqueue() Signed-off-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/dev.c | 4 ++-- net/core/rtnetlink.c | 2 +- net/sched/sch_api.c | 2 +- net/sched/sch_teql.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/dev.c b/net/core/dev.c index d6b24749eb2e..882ce6375b2e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8959,7 +8959,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) return -ERANGE; if (new_len != orig_len) { - dev->tx_queue_len = new_len; + WRITE_ONCE(dev->tx_queue_len, new_len); res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); res = notifier_to_errno(res); if (res) @@ -8973,7 +8973,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) err_rollback: netdev_err(dev, "refused to change device tx_queue_len\n"); - dev->tx_queue_len = orig_len; + WRITE_ONCE(dev->tx_queue_len, orig_len); return res; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eb1a151c6910..7e156f535bd8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1837,7 +1837,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (nla_put_string(skb, IFLA_IFNAME, devname)) goto nla_put_failure; - if (nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || + if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) || nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6292d6d73b72..74afc210527d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * before again attaching a qdisc. */ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { - dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN); netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 59304611dc00..29850d0f0733 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); - if (q->q.qlen < dev->tx_queue_len) { + if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { __skb_queue_tail(&q->q, skb); return NET_XMIT_SUCCESS; } -- cgit v1.2.3 From 6747a5d4990b8c8d7392f7a06b7a4bb5f4ada80e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:56 +0000 Subject: rtnetlink: do not depend on RTNL for many attributes Following device fields can be read locklessly in rtnl_fill_ifinfo() : type, ifindex, operstate, link_mode, mtu, min_mtu, max_mtu, group, promiscuity, allmulti, num_tx_queues, gso_max_segs, gso_max_size, gro_max_size, gso_ipv4_max_size, gro_ipv4_max_size, tso_max_size, tso_max_segs, num_rx_queues. Signed-off-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 51 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 20 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7e156f535bd8..16b5f9c68bbf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1603,7 +1603,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + ret = nla_put_u32(skb, IFLA_MASTER, + READ_ONCE(upper_dev->ifindex)); rcu_read_unlock(); return ret; @@ -1825,8 +1826,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ifm = nlmsg_data(nlh); ifm->ifi_family = AF_UNSPEC; ifm->__ifi_pad = 0; - ifm->ifi_type = dev->type; - ifm->ifi_index = dev->ifindex; + ifm->ifi_type = READ_ONCE(dev->type); + ifm->ifi_index = READ_ONCE(dev->ifindex); ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; @@ -1839,24 +1840,34 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) || nla_put_u8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || - nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || - nla_put_u32(skb, IFLA_MTU, dev->mtu) || - nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) || - nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) || - nla_put_u32(skb, IFLA_GROUP, dev->group) || - nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || - nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) || - nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || - nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || - nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || - nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || - nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) || - nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) || - nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || - nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || + netif_running(dev) ? READ_ONCE(dev->operstate) : + IF_OPER_DOWN) || + nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) || + nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || + nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) || + nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) || + nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) || + nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) || + nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) || + nla_put_u32(skb, IFLA_NUM_TX_QUEUES, + READ_ONCE(dev->num_tx_queues)) || + nla_put_u32(skb, IFLA_GSO_MAX_SEGS, + READ_ONCE(dev->gso_max_segs)) || + nla_put_u32(skb, IFLA_GSO_MAX_SIZE, + READ_ONCE(dev->gso_max_size)) || + nla_put_u32(skb, IFLA_GRO_MAX_SIZE, + READ_ONCE(dev->gro_max_size)) || + nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, + READ_ONCE(dev->gso_ipv4_max_size)) || + nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, + READ_ONCE(dev->gro_ipv4_max_size)) || + nla_put_u32(skb, IFLA_TSO_MAX_SIZE, + READ_ONCE(dev->tso_max_size)) || + nla_put_u32(skb, IFLA_TSO_MAX_SEGS, + READ_ONCE(dev->tso_max_segs)) || #ifdef CONFIG_RPS - nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || + nla_put_u32(skb, IFLA_NUM_RX_QUEUES, + READ_ONCE(dev->num_rx_queues)) || #endif put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || -- cgit v1.2.3 From 6890ab31d1a35444741e6150db19d64797db2919 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:57 +0000 Subject: rtnetlink: do not depend on RTNL in rtnl_fill_proto_down() Change dev_change_proto_down() and dev_change_proto_down_reason() to write once on dev->proto_down and dev->proto_down_reason. Then rtnl_fill_proto_down() can use READ_ONCE() annotations and run locklessly. rtnl_proto_down_size() should assume worst case, because readng dev->proto_down_reason multiple times would be racy without RTNL in the future. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/core/dev.c | 11 +++++++---- net/core/rtnetlink.c | 8 ++++---- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/dev.c b/net/core/dev.c index d9d9a0d91593..d2ce91a334c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9223,7 +9223,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) netif_carrier_off(dev); else netif_carrier_on(dev); - dev->proto_down = proto_down; + WRITE_ONCE(dev->proto_down, proto_down); return 0; } @@ -9237,18 +9237,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, u32 value) { + u32 proto_down_reason; int b; if (!mask) { - dev->proto_down_reason = value; + proto_down_reason = value; } else { + proto_down_reason = dev->proto_down_reason; for_each_set_bit(b, &mask, 32) { if (value & (1 << b)) - dev->proto_down_reason |= BIT(b); + proto_down_reason |= BIT(b); else - dev->proto_down_reason &= ~BIT(b); + proto_down_reason &= ~BIT(b); } } + WRITE_ONCE(dev->proto_down_reason, proto_down_reason); } struct bpf_xdp_link { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 16b5f9c68bbf..dfdd08006fae 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1036,8 +1036,8 @@ static size_t rtnl_proto_down_size(const struct net_device *dev) { size_t size = nla_total_size(1); - if (dev->proto_down_reason) - size += nla_total_size(0) + nla_total_size(4); + /* Assume dev->proto_down_reason is not zero. */ + size += nla_total_size(0) + nla_total_size(4); return size; } @@ -1737,10 +1737,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb, struct nlattr *pr; u32 preason; - if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) + if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down))) goto nla_put_failure; - preason = dev->proto_down_reason; + preason = READ_ONCE(dev->proto_down_reason); if (!preason) return 0; -- cgit v1.2.3 From 979aad40da9217d5e907ee4ad7c7f0dc555944a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:58 +0000 Subject: rtnetlink: do not depend on RTNL in rtnl_xdp_prog_skb() dev->xdp_prog is protected by RCU, we can lift RTNL requirement from rtnl_xdp_prog_skb(). Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dfdd08006fae..41eb8bca5305 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, static u32 rtnl_xdp_prog_skb(struct net_device *dev) { const struct bpf_prog *generic_xdp_prog; + u32 res = 0; - ASSERT_RTNL(); + rcu_read_lock(); + generic_xdp_prog = rcu_dereference(dev->xdp_prog); + if (generic_xdp_prog) + res = generic_xdp_prog->aux->id; + rcu_read_unlock(); - generic_xdp_prog = rtnl_dereference(dev->xdp_prog); - if (!generic_xdp_prog) - return 0; - return generic_xdp_prog->aux->id; + return res; } static u32 rtnl_xdp_prog_drv(struct net_device *dev) -- cgit v1.2.3 From 9cf621bd5fcbeadc2804951d13d487e22e95b363 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 May 2024 19:20:59 +0000 Subject: rtnetlink: allow rtnl_fill_link_netnsid() to run under RCU protection We want to be able to run rtnl_fill_ifinfo() under RCU protection instead of RTNL in the future. All rtnl_link_ops->get_link_net() methods already using dev_net() are ready. I added READ_ONCE() annotations on others. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_generic.c | 2 +- drivers/net/vxlan/vxlan_core.c | 2 +- net/core/rtnetlink.c | 5 ++--- net/ipv4/ip_tunnel.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/xfrm/xfrm_interface_core.c | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index fe380fe196e7..0a65b6d690fe 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1357,7 +1357,7 @@ static struct net *ppp_nl_get_link_net(const struct net_device *dev) { struct ppp *ppp = netdev_priv(dev); - return ppp->ppp_net; + return READ_ONCE(ppp->ppp_net); } static struct rtnl_link_ops ppp_link_ops __read_mostly = { diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 8884913e0473..7e3a7d1f2018 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -4569,7 +4569,7 @@ static struct net *vxlan_get_link_net(const struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - return vxlan->net; + return READ_ONCE(vxlan->net); } static struct rtnl_link_ops vxlan_link_ops __read_mostly = { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 41eb8bca5305..af8da8aeed39 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1923,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp)) - goto nla_put_failure; - if (new_nsid && nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; @@ -1938,6 +1935,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; rcu_read_lock(); + if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC)) + goto nla_put_failure_rcu; qdisc = rcu_dereference(dev->qdisc); if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) goto nla_put_failure_rcu; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ba46cf7612f4..f1c5f6c3f2f8 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1120,7 +1120,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - return tunnel->net; + return READ_ONCE(tunnel->net); } EXPORT_SYMBOL(ip_tunnel_get_link_net); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 57bb3b3ea0c5..5aec79c2af1a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); - return tunnel->net; + return READ_ONCE(tunnel->net); } EXPORT_SYMBOL(ip6_tnl_get_link_net); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 4df5c06e3ece..e50e4bf993fa 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return xi->net; + return READ_ONCE(xi->net); } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { -- cgit v1.2.3