From 9fdd04918a452980631ecc499317881c1d120b70 Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Fri, 23 Apr 2021 00:02:13 -0400 Subject: net: ieee802154: fix null deref in parse dev addr Fix a logic error that could result in a null deref if the user sets the mode incorrectly for the given addr type. Signed-off-by: Dan Robertson Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20210423040214.15438-2-dan@dlrobertson.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl802154.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 05f6bd89a7dd..0cf2374c143b 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1298,19 +1298,20 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL)) return -EINVAL; - if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || - !attrs[NL802154_DEV_ADDR_ATTR_MODE] || - !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] || - attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])) + if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE]) return -EINVAL; addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]); addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]); switch (addr->mode) { case NL802154_DEV_ADDR_SHORT: + if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT]) + return -EINVAL; addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]); break; case NL802154_DEV_ADDR_EXTENDED: + if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]) + return -EINVAL; addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]); break; default: -- cgit v1.2.3 From 22cbdbcfb61acc78d5fc21ebb13ccc0d7e29f793 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 4 May 2021 16:40:00 +0200 Subject: netfilter: conntrack: unregister ipv4 sockopts on error unwind When ipv6 sockopt register fails, the ipv4 one needs to be removed. Fixes: a0ae2562c6c ("netfilter: conntrack: remove l3proto abstraction") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 89e5bac384d7..dc9ca12b0489 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -664,7 +664,7 @@ int nf_conntrack_proto_init(void) #if IS_ENABLED(CONFIG_IPV6) cleanup_sockopt: - nf_unregister_sockopt(&so_getorigdst6); + nf_unregister_sockopt(&so_getorigdst); #endif return ret; } -- cgit v1.2.3 From c781471d67a56d7d4c113669a11ede0463b5c719 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 19 May 2021 13:32:20 +0200 Subject: netfilter: nf_tables: missing error reporting for not selected expressions Sometimes users forget to turn on nftables extensions from Kconfig that they need. In such case, the error reporting from userspace is misleading: $ sudo nft add rule x y counter Error: Could not process rule: No such file or directory add rule x y counter ^^^^^^^^^^^^^^^^^^^^ Add missing NL_SET_BAD_ATTR() to provide a hint: $ nft add rule x y counter Error: Could not process rule: No such file or directory add rule x y counter ^^^^^^^ Fixes: 83d9dcba06c5 ("netfilter: nf_tables: extended netlink error reporting for expressions") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d63d2d8f769c..5a02b48af7fb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3328,8 +3328,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (n == NFT_RULE_MAXEXPRS) goto err1; err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, tmp); goto err1; + } size += expr_info[n].ops->size; n++; } -- cgit v1.2.3 From 983c4fcb81d6bd19c6035e5dda6bf1fca058c320 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 19 May 2021 21:16:40 +0200 Subject: netfilter: nf_tables: extended netlink error reporting for chain type Users that forget to select the NAT chain type in netfilter's Kconfig hit ENOENT when adding the basechain. This report is however sparse since it might be the table, the chain or the kernel module that is missing/does not exist. This patch provides extended netlink error reporting for the NFTA_CHAIN_TYPE netlink attribute, which conveys the basechain type. If the user selects a basechain that his custom kernel does not support, the netlink extended error provides a more accurate hint on the described issue. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5a02b48af7fb..c34a3c0a0d9c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1905,7 +1905,7 @@ static int nft_chain_parse_netdev(struct net *net, static int nft_chain_parse_hook(struct net *net, const struct nlattr * const nla[], struct nft_chain_hook *hook, u8 family, - bool autoload) + struct netlink_ext_ack *extack, bool autoload) { struct nftables_pernet *nft_net = nft_pernet(net); struct nlattr *ha[NFTA_HOOK_MAX + 1]; @@ -1935,8 +1935,10 @@ static int nft_chain_parse_hook(struct net *net, if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], family, autoload); - if (IS_ERR(type)) + if (IS_ERR(type)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return PTR_ERR(type); + } } if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; @@ -1945,8 +1947,11 @@ static int nft_chain_parse_hook(struct net *net, hook->priority <= NF_IP_PRI_CONNTRACK) return -EOPNOTSUPP; - if (!try_module_get(type->owner)) + if (!try_module_get(type->owner)) { + if (nla[NFTA_CHAIN_TYPE]) + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return -ENOENT; + } hook->type = type; @@ -2057,7 +2062,8 @@ static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) static u64 chain_id; static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, - u8 policy, u32 flags) + u8 policy, u32 flags, + struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; @@ -2079,7 +2085,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; - err = nft_chain_parse_hook(net, nla, &hook, family, true); + err = nft_chain_parse_hook(net, nla, &hook, family, extack, + true); if (err < 0) return err; @@ -2234,7 +2241,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EEXIST; } err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, - false); + extack, false); if (err < 0) return err; @@ -2447,7 +2454,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, extack); } - return nf_tables_addchain(&ctx, family, genmask, policy, flags); + return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, -- cgit v1.2.3 From 179d9ba5559a756f4322583388b3213fe4e391b0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 24 May 2021 17:10:18 +0200 Subject: netfilter: nf_tables: fix table flag updates The dormant flag need to be updated from the preparation phase, otherwise, two consecutive requests to dorm a table in the same batch might try to remove the same hooks twice, resulting in the following warning: hook not found, pf 3 num 0 WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 Modules linked in: CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") to restore the previous behaviour. However, there is still another problem: A batch containing a series of dorm-wakeup-dorm table and vice-versa also trigger the warning above since hook unregistration happens from the preparation phase, while hook registration occurs from the commit phase. To fix this problem, this patch adds two internal flags to annotate the original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and __NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path. The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update with one single transaction. Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ---- net/netfilter/nf_tables_api.c | 59 ++++++++++++++++++++++++++------------- 2 files changed, 40 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 27eeb613bb4e..0a5655e300b5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1506,16 +1506,10 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - u8 state; - u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_state(trans) \ - (((struct nft_trans_table *)trans->data)->state) -#define nft_trans_table_flags(trans) \ - (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c34a3c0a0d9c..72bc759179ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -736,7 +736,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) @@ -947,20 +948,22 @@ err_register_hooks: static void nf_tables_table_disable(struct net *net, struct nft_table *table) { + table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); + table->flags |= NFT_TABLE_F_DORMANT; } -enum { - NFT_TABLE_STATE_UNCHANGED = 0, - NFT_TABLE_STATE_DORMANT, - NFT_TABLE_STATE_WAKEUP -}; +#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) +#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) +#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; - int ret = 0; + int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -985,21 +988,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; + ctx->table->flags |= NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) + ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) - nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret < 0) + goto err_register_hooks; + + ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; + } } - if (ret < 0) - goto err; - nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; nft_trans_commit_list_add_tail(ctx->net, trans); + return 0; -err: + +err_register_hooks: nft_trans_destroy(trans); return ret; } @@ -8556,10 +8565,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags = nft_trans_table_flags(trans); + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, trans->ctx.table); } @@ -8777,9 +8790,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { nf_tables_table_disable(net, trans->ctx.table); - + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + } + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); -- cgit v1.2.3 From 56e4ee82e850026d71223262c07df7d6af3bd872 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 24 May 2021 22:54:57 +0300 Subject: ipvs: ignore IP_VS_SVC_F_HASHED flag when adding service syzbot reported memory leak [1] when adding service with HASHED flag. We should ignore this flag both from sockopt and netlink provided data, otherwise the service is not hashed and not visible while releasing resources. [1] BUG: memory leak unreferenced object 0xffff888115227800 (size 512): comm "syz-executor263", pid 8658, jiffies 4294951882 (age 12.560s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc include/linux/slab.h:556 [inline] [] kzalloc include/linux/slab.h:686 [inline] [] ip_vs_add_service+0x598/0x7c0 net/netfilter/ipvs/ip_vs_ctl.c:1343 [] do_ip_vs_set_ctl+0x810/0xa40 net/netfilter/ipvs/ip_vs_ctl.c:2570 [] nf_setsockopt+0x68/0xa0 net/netfilter/nf_sockopt.c:101 [] ip_setsockopt+0x259/0x1ff0 net/ipv4/ip_sockglue.c:1435 [] raw_setsockopt+0x18c/0x1b0 net/ipv4/raw.c:857 [] __sys_setsockopt+0x1b0/0x360 net/socket.c:2117 [] __do_sys_setsockopt net/socket.c:2128 [inline] [] __se_sys_setsockopt net/socket.c:2125 [inline] [] __x64_sys_setsockopt+0x22/0x30 net/socket.c:2125 [] do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47 [] entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-and-tested-by: syzbot+e562383183e4b1766930@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d45dbcba8b49..c25097092a06 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1367,7 +1367,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; - svc->flags = u->flags; + svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; svc->ipvs = ipvs; -- cgit v1.2.3 From 6a137caec23aeb9e036cdfd8a46dd8a366460e5d Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 25 May 2021 14:39:02 +0200 Subject: Bluetooth: fix the erroneous flush_work() order In the cleanup routine for failed initialization of HCI device, the flush_work(&hdev->rx_work) need to be finished before the flush_work(&hdev->cmd_work). Otherwise, the hci_rx_work() can possibly invoke new cmd_work and cause a bug, like double free, in late processings. This was assigned CVE-2021-3564. This patch reorder the flush_work() to fix this bug. Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Luiz Augusto von Dentz Cc: "David S. Miller" Cc: Jakub Kicinski Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Lin Ma Signed-off-by: Hao Xiong Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fd12f1652bdf..7d71d104fdfd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1610,8 +1610,13 @@ setup_failed: } else { /* Init failed, cleanup */ flush_work(&hdev->tx_work); - flush_work(&hdev->cmd_work); + + /* Since hci_rx_work() is possible to awake new cmd_work + * it should be flushed first to avoid unexpected call of + * hci_cmd_work() + */ flush_work(&hdev->rx_work); + flush_work(&hdev->cmd_work); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->rx_q); -- cgit v1.2.3 From b28d8f0c25a9b0355116cace5f53ea52bd4020c8 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 26 May 2021 23:00:27 +0300 Subject: devlink: Correct VIRTUAL port to not have phys_port attributes Physical port name, port number attributes do not belong to virtual port flavour. When VF or SF virtual ports are registered they incorrectly append "np0" string in the netdevice name of the VF/SF. Before this fix, VF netdevice name were ens2f0np0v0, ens2f0np0v1 for VF 0 and 1 respectively. After the fix, they are ens2f0v0, ens2f0v1. With this fix, reading /sys/class/net/ens2f0v0/phys_port_name returns -EOPNOTSUPP. Also devlink port show example for 2 VFs on one PF to ensure that any physical port attributes are not exposed. $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false pci/0000:06:00.3/196608: type eth netdev ens2f0v0 flavour virtual splittable false pci/0000:06:00.4/262144: type eth netdev ens2f0v1 flavour virtual splittable false This change introduces a netdevice name change on systemd/udev version 245 and higher which honors phys_port_name sysfs file for generation of netdevice name. This also aligns to phys_port_name usage which is limited to switchdev ports as described in [1]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/Documentation/networking/switchdev.rst Fixes: acf1ee44ca5d ("devlink: Introduce devlink port flavour virtual") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20210526200027.14008-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 4eb969518ee0..051432ea4f69 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -705,7 +705,6 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->phys.port_number)) return -EMSGSIZE; @@ -8631,7 +8630,6 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (!attrs->split) n = snprintf(name, len, "p%u", attrs->phys.port_number); else @@ -8679,6 +8677,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) -- cgit v1.2.3 From 0cc254e5aa37cf05f65bcdcdc0ac5c58010feb33 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 26 May 2021 14:44:09 +0300 Subject: net/sched: act_ct: Offload connections with commit action Currently established connections are not offloaded if the filter has a "ct commit" action. This behavior will not offload connections of the following scenario: $ tc_filter add dev $DEV ingress protocol ip prio 1 flower \ ct_state -trk \ action ct commit action goto chain 1 $ tc_filter add dev $DEV ingress protocol ip chain 1 prio 1 flower \ action mirred egress redirect dev $DEV2 $ tc_filter add dev $DEV2 ingress protocol ip prio 1 flower \ action ct commit action goto chain 1 $ tc_filter add dev $DEV2 ingress protocol ip prio 1 chain 1 flower \ ct_state +trk+est \ action mirred egress redirect dev $DEV Offload established connections, regardless of the commit flag. Fixes: 46475bb20f4b ("net/sched: act_ct: Software offload of established flows") Reviewed-by: Oz Shlomo Reviewed-by: Jiri Pirko Acked-by: Marcelo Ricardo Leitner Signed-off-by: Paul Blakey Link: https://lore.kernel.org/r/1622029449-27060-1-git-send-email-paulb@nvidia.com Signed-off-by: Jakub Kicinski --- net/sched/act_ct.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ec7a1c438df9..b1473a1aecdd 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -984,7 +984,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); if (!cached) { - if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { + if (tcf_ct_flow_table_lookup(p, skb, family)) { skip_add = true; goto do_nat; } @@ -1022,10 +1022,11 @@ do_nat: * even if the connection is already confirmed. */ nf_conntrack_confirm(skb); - } else if (!skip_add) { - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); } + if (!skip_add) + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); + out_push: skb_push_rcsum(skb, nh_ofs); -- cgit v1.2.3 From fb91702b743dec78d6507c53a2dec8a8883f509d Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Wed, 26 May 2021 20:01:10 +0300 Subject: net/sched: act_ct: Fix ct template allocation for zone 0 Fix current behavior of skipping template allocation in case the ct action is in zone 0. Skipping the allocation may cause the datapath ct code to ignore the entire ct action with all its attributes (commit, nat) in case the ct action in zone 0 was preceded by a ct clear action. The ct clear action sets the ct_state to untracked and resets the skb->_nfct pointer. Under these conditions and without an allocated ct template, the skb->_nfct pointer will remain NULL which will cause the tc ct action handler to exit without handling commit and nat actions, if such exist. For example, the following rule in OVS dp: recirc_id(0x2),ct_state(+new-est-rel-rpl+trk),ct_label(0/0x1), \ in_port(eth0),actions:ct_clear,ct(commit,nat(src=10.11.0.12)), \ recirc(0x37a) Will result in act_ct skipping the commit and nat actions in zone 0. The change removes the skipping of template allocation for zone 0 and treats it the same as any other zone. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Ariel Levkovich Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20210526170110.54864-1-lariel@nvidia.com Signed-off-by: Jakub Kicinski --- net/sched/act_ct.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b1473a1aecdd..18edd9ad1410 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1203,9 +1203,6 @@ static int tcf_ct_fill_params(struct net *net, sizeof(p->zone)); } - if (p->zone == NF_CT_DEFAULT_ZONE_ID) - return 0; - nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); if (!tmpl) { -- cgit v1.2.3 From b5941f066b4ca331db225a976dae1d6ca8cf0ae3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:37 -0700 Subject: mptcp: fix sk_forward_memory corruption on retransmission MPTCP sk_forward_memory handling is a bit special, as such field is protected by the msk socket spin_lock, instead of the plain socket lock. Currently we have a code path updating such field without handling the relevant lock: __mptcp_retrans() -> __mptcp_clean_una_wakeup() Several helpers in __mptcp_clean_una_wakeup() will update sk_forward_alloc, possibly causing such field corruption, as reported by Matthieu. Address the issue providing and using a new variant of blamed function which explicitly acquires the msk spin lock. Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/172 Reported-by: Matthieu Baerts Tested-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2bc199549a88..5edc686faff1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif + if (!msk->wmem_reserved) return; @@ -1085,10 +1089,20 @@ out: static void __mptcp_clean_una_wakeup(struct sock *sk) { +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif __mptcp_clean_una(sk); mptcp_write_space(sk); } +static void mptcp_clean_una_wakeup(struct sock *sk) +{ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + mptcp_data_unlock(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk) struct sock *ssk; int ret; - __mptcp_clean_una_wakeup(sk); + mptcp_clean_una_wakeup(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { -- cgit v1.2.3 From 06f9a435b3aa12f4de6da91f11fdce8ce7b46205 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:38 -0700 Subject: mptcp: always parse mptcp options for MPC reqsk In subflow_syn_recv_sock() we currently skip options parsing for OoO packet, given that such packets may not carry the relevant MPC option. If the peer generates an MPC+data TSO packet and some of the early segments are lost or get reorder, we server will ignore the peer key, causing transient, unexpected fallback to TCP. The solution is always parsing the incoming MPTCP options, and do the fallback only for in-order packets. This actually cleans the existing code a bit. Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option") Reported-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bde6be77ea73..c6ee81149829 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* if the sk is MP_CAPABLE, we try to fetch the client key */ if (subflow_req->mp_capable) { - if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { - /* here we can receive and accept an in-window, - * out-of-order pkt, which will not carry the MP_CAPABLE - * opt even on mptcp enabled paths - */ - goto create_msk; - } - + /* we can receive and accept an in-window, out-of-order pkt, + * which may not carry the MP_CAPABLE opt even on mptcp enabled + * paths: always try to extract the peer key, and fallback + * for packets missing it. + * Even OoO DSS packets coming legitly after dropped or + * reordered MPC will cause fallback, but we don't have other + * options. + */ mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; } -create_msk: new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; -- cgit v1.2.3 From dea2b1ea9c705c5ba351a9174403fd83dbb68fc3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 May 2021 16:31:39 -0700 Subject: mptcp: do not reset MP_CAPABLE subflow on mapping errors When some mapping related errors occurs we close the main MPC subflow with a RST. We should instead fallback gracefully to TCP, and do the reset only for MPJ subflows. Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192 Reported-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 62 +++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c6ee81149829..ef3d037f984a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1011,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (status == MAPPING_INVALID) { - ssk->sk_err = EBADMSG; - goto fatal; - } - if (status == MAPPING_DUMMY) { - __mptcp_do_fallback(msk); - skb = skb_peek(&ssk->sk_receive_queue); - subflow->map_valid = 1; - subflow->map_seq = READ_ONCE(msk->ack_seq); - subflow->map_data_len = skb->len; - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - return true; - } + if (unlikely(status == MAPPING_INVALID)) + goto fallback; + + if (unlikely(status == MAPPING_DUMMY)) + goto fallback; if (status != MAPPING_OK) goto no_data; @@ -1038,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk) * MP_CAPABLE-based mapping */ if (unlikely(!READ_ONCE(msk->can_ack))) { - if (!subflow->mpc_map) { - ssk->sk_err = EBADMSG; - goto fatal; - } + if (!subflow->mpc_map) + goto fallback; WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->ack_seq, subflow->map_seq); WRITE_ONCE(msk->can_ack, true); @@ -1069,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk) no_data: subflow_sched_work_if_closed(msk, ssk); return false; -fatal: - /* fatal protocol error, close the socket */ - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - ssk->sk_error_report(ssk); - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; - tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; - return false; + +fallback: + /* RFC 8684 section 3.7. */ + if (subflow->mp_join || subflow->fully_established) { + /* fatal protocol error, close the socket. + * subflow_error_report() will introduce the appropriate barriers + */ + ssk->sk_err = EBADMSG; + ssk->sk_error_report(ssk); + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; + tcp_send_active_reset(ssk, GFP_ATOMIC); + subflow->data_avail = 0; + return false; + } + + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + return true; } bool mptcp_subflow_data_available(struct sock *sk) -- cgit v1.2.3 From e305509e678b3a4af2b3cfd410f409f7cdaabb52 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 30 May 2021 21:37:43 +0800 Subject: Bluetooth: use correct lock to prevent UAF of hdev object The hci_sock_dev_event() function will cleanup the hdev object for sockets even if this object may still be in used within the hci_sock_bound_ioctl() function, result in UAF vulnerability. This patch replace the BH context lock to serialize these affairs and prevent the race condition. Signed-off-by: Lin Ma Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 251b9128f530..eed0dd066e12 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -762,7 +762,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - bh_lock_sock_nested(sk); + lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; @@ -771,7 +771,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) hci_dev_put(hdev); } - bh_unlock_sock(sk); + release_sock(sk); } read_unlock(&hci_sk_list.lock); } -- cgit v1.2.3 From 4ac06a1e013cf5fdd963317ffd3b968560f33bba Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 31 May 2021 09:21:38 +0200 Subject: nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect It's possible to trigger NULL pointer dereference by local unprivileged user, when calling getsockname() after failed bind() (e.g. the bind fails because LLCP_SAP_MAX used as SAP): BUG: kernel NULL pointer dereference, address: 0000000000000000 CPU: 1 PID: 426 Comm: llcp_sock_getna Not tainted 5.13.0-rc2-next-20210521+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 Call Trace: llcp_sock_getname+0xb1/0xe0 __sys_getpeername+0x95/0xc0 ? lockdep_hardirqs_on_prepare+0xd5/0x180 ? syscall_enter_from_user_mode+0x1c/0x40 __x64_sys_getpeername+0x11/0x20 do_syscall_64+0x36/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xae This can be reproduced with Syzkaller C repro (bind followed by getpeername): https://syzkaller.appspot.com/x/repro.c?x=14def446e00000 Cc: Fixes: d646960f7986 ("NFC: Initial LLCP support") Reported-by: syzbot+80fb126e7f7d8b1a5914@syzkaller.appspotmail.com Reported-by: butt3rflyh4ck Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20210531072138.5219-1-krzysztof.kozlowski@canonical.com Signed-off-by: Jakub Kicinski --- net/nfc/llcp_sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 53dbe733f998..6cfd30fc0798 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -110,6 +110,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (!llcp_sock->service_name) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; + llcp_sock->dev = NULL; ret = -ENOMEM; goto put_dev; } @@ -119,6 +120,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->local = NULL; kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; + llcp_sock->dev = NULL; ret = -EADDRINUSE; goto put_dev; } -- cgit v1.2.3 From 4ef8d857b5f494e62bce9085031563fda35f9563 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 31 May 2021 13:20:45 +0300 Subject: net: dsa: tag_8021q: fix the VLAN IDs used for encoding sub-VLANs When using sub-VLANs in the range of 1-7, the resulting value from: rx_vid = dsa_8021q_rx_vid_subvlan(ds, port, subvlan); is wrong according to the description from tag_8021q.c: | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | +-----------+-----+-----------------+-----------+-----------------------+ | DIR | SVL | SWITCH_ID | SUBVLAN | PORT | +-----------+-----+-----------------+-----------+-----------------------+ For example, when ds->index == 0, port == 3 and subvlan == 1, dsa_8021q_rx_vid_subvlan() returns 1027, same as it returns for subvlan == 0, but it should have returned 1043. This is because the low portion of the subvlan bits are not masked properly when writing into the 12-bit VLAN value. They are masked into bits 4:3, but they should be masked into bits 5:4. Fixes: 3eaae1d05f2b ("net: dsa: tag_8021q: support up to 8 VLANs per port using sub-VLANs") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag_8021q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 008c1ec6e20c..122ad5833fb1 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -64,7 +64,7 @@ #define DSA_8021Q_SUBVLAN_HI_SHIFT 9 #define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9) #define DSA_8021Q_SUBVLAN_LO_SHIFT 4 -#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(4, 3) +#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4) #define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2) #define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0)) #define DSA_8021Q_SUBVLAN(x) \ -- cgit v1.2.3 From dd9082f4a9f94280fbbece641bf8fc0a25f71f7a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 31 May 2021 17:00:30 -0400 Subject: net: sock: fix in-kernel mark setting This patch fixes the in-kernel mark setting by doing an additional sk_dst_reset() which was introduced by commit 50254256f382 ("sock: Reset dst when changing sk_mark via setsockopt"). The code is now shared to avoid any further suprises when changing the socket mark value. Fixes: 84d1c617402e ("net: sock: add sock_set_mark") Reported-by: Marcelo Ricardo Leitner Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- net/core/sock.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 958614ea16ed..946888afef88 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -815,10 +815,18 @@ void sock_set_rcvbuf(struct sock *sk, int val) } EXPORT_SYMBOL(sock_set_rcvbuf); +static void __sock_set_mark(struct sock *sk, u32 val) +{ + if (val != sk->sk_mark) { + sk->sk_mark = val; + sk_dst_reset(sk); + } +} + void sock_set_mark(struct sock *sk, u32 val) { lock_sock(sk); - sk->sk_mark = val; + __sock_set_mark(sk, val); release_sock(sk); } EXPORT_SYMBOL(sock_set_mark); @@ -1126,10 +1134,10 @@ set_sndbuf: case SO_MARK: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; - } else if (val != sk->sk_mark) { - sk->sk_mark = val; - sk_dst_reset(sk); + break; } + + __sock_set_mark(sk, val); break; case SO_RXQ_OVFL: -- cgit v1.2.3 From 05fc8b6cbd4f979a6f25759c4a17dd5f657f7ecd Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 1 Jun 2021 15:07:59 +0300 Subject: net/tls: Replace TLS_RX_SYNC_RUNNING with RCU RCU synchronization is guaranteed to finish in finite time, unlike a busy loop that polls a flag. This patch is a preparation for the bugfix in the next patch, where the same synchronize_net() call will also be used to sync with the TX datapath. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/net/tls.h | 1 - net/tls/tls_device.c | 10 +++------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 3eccb525e8f7..6531ace2a68b 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -193,7 +193,6 @@ struct tls_offload_context_tx { (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) enum tls_context_flags { - TLS_RX_SYNC_RUNNING = 0, /* Unlike RX where resync is driven entirely by the core in TX only * the driver knows when things went out of sync, so we need the flag * to be atomic. diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 76a6f8c2eec4..171752cd6910 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -680,15 +680,13 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); struct net_device *netdev; - if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags))) - return; - trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); + rcu_read_lock(); netdev = READ_ONCE(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); - clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags); + rcu_read_unlock(); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } @@ -1300,9 +1298,7 @@ static int tls_device_down(struct net_device *netdev) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); WRITE_ONCE(ctx->netdev, NULL); - smp_mb__before_atomic(); /* pairs with test_and_set_bit() */ - while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags)) - usleep_range(10, 200); + synchronize_net(); dev_put(netdev); list_del_init(&ctx->list); -- cgit v1.2.3 From c55dcdd435aa6c6ad6ccac0a4c636d010ee367a4 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 1 Jun 2021 15:08:00 +0300 Subject: net/tls: Fix use-after-free after the TLS device goes down and up When a netdev with active TLS offload goes down, tls_device_down is called to stop the offload and tear down the TLS context. However, the socket stays alive, and it still points to the TLS context, which is now deallocated. If a netdev goes up, while the connection is still active, and the data flow resumes after a number of TCP retransmissions, it will lead to a use-after-free of the TLS context. This commit addresses this bug by keeping the context alive until its normal destruction, and implements the necessary fallbacks, so that the connection can resume in software (non-offloaded) kTLS mode. On the TX side tls_sw_fallback is used to encrypt all packets. The RX side already has all the necessary fallbacks, because receiving non-decrypted packets is supported. The thing needed on the RX side is to block resync requests, which are normally produced after receiving non-decrypted packets. The necessary synchronization is implemented for a graceful teardown: first the fallbacks are deployed, then the driver resources are released (it used to be possible to have a tls_dev_resync after tls_dev_del). A new flag called TLS_RX_DEV_DEGRADED is added to indicate the fallback mode. It's used to skip the RX resync logic completely, as it becomes useless, and some objects may be released (for example, resync_async, which is allocated and freed by the driver). Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/net/tls.h | 9 ++++++++ net/tls/tls_device.c | 52 ++++++++++++++++++++++++++++++++++++++----- net/tls/tls_device_fallback.c | 7 ++++++ net/tls/tls_main.c | 1 + 4 files changed, 64 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 6531ace2a68b..8341a8d1e807 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -193,6 +193,11 @@ struct tls_offload_context_tx { (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) enum tls_context_flags { + /* tls_device_down was called after the netdev went down, device state + * was released, and kTLS works in software, even though rx_conf is + * still TLS_HW (needed for transition). + */ + TLS_RX_DEV_DEGRADED = 0, /* Unlike RX where resync is driven entirely by the core in TX only * the driver knows when things went out of sync, so we need the flag * to be atomic. @@ -265,6 +270,7 @@ struct tls_context { /* cache cold stuff */ struct proto *sk_proto; + struct sock *sk; void (*sk_destruct)(struct sock *sk); @@ -447,6 +453,9 @@ static inline u16 tls_user_config(struct tls_context *ctx, bool tx) struct sk_buff * tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb); +struct sk_buff * +tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev, + struct sk_buff *skb); static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 171752cd6910..bd9f1567aa39 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -50,6 +50,7 @@ static void tls_device_gc_task(struct work_struct *work); static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); static LIST_HEAD(tls_device_gc_list); static LIST_HEAD(tls_device_list); +static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) @@ -759,6 +760,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) if (tls_ctx->rx_conf != TLS_HW) return; + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) + return; prot = &tls_ctx->prot_info; rx_ctx = tls_offload_ctx_rx(tls_ctx); @@ -961,6 +964,17 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, ctx->sw.decrypted |= is_decrypted; + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { + if (likely(is_encrypted || is_decrypted)) + return 0; + + /* After tls_device_down disables the offload, the next SKB will + * likely have initial fragments decrypted, and final ones not + * decrypted. We need to reencrypt that single SKB. + */ + return tls_device_reencrypt(sk, skb); + } + /* Return immediately if the record is either entirely plaintext or * entirely ciphertext. Otherwise handle reencrypt partially decrypted * record. @@ -1290,6 +1304,26 @@ static int tls_device_down(struct net_device *netdev) spin_unlock_irqrestore(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &list, list) { + /* Stop offloaded TX and switch to the fallback. + * tls_is_sk_tx_device_offloaded will return false. + */ + WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); + + /* Stop the RX and TX resync. + * tls_dev_resync must not be called after tls_dev_del. + */ + WRITE_ONCE(ctx->netdev, NULL); + + /* Start skipping the RX resync logic completely. */ + set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags); + + /* Sync with inflight packets. After this point: + * TX: no non-encrypted packets will be passed to the driver. + * RX: resync requests from the driver will be ignored. + */ + synchronize_net(); + + /* Release the offload context on the driver side. */ if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); @@ -1297,13 +1331,21 @@ static int tls_device_down(struct net_device *netdev) !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); - WRITE_ONCE(ctx->netdev, NULL); - synchronize_net(); + dev_put(netdev); - list_del_init(&ctx->list); - if (refcount_dec_and_test(&ctx->refcount)) - tls_device_free_ctx(ctx); + /* Move the context to a separate list for two reasons: + * 1. When the context is deallocated, list_del is called. + * 2. It's no longer an offloaded context, so we don't want to + * run offload-specific code on this context. + */ + spin_lock_irqsave(&tls_device_lock, flags); + list_move_tail(&ctx->list, &tls_device_down_list); + spin_unlock_irqrestore(&tls_device_lock, flags); + + /* Device contexts for RX and TX will be freed in on sk_destruct + * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + */ } up_write(&device_offload_lock); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index cacf040872c7..e40bedd112b6 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -431,6 +431,13 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk, } EXPORT_SYMBOL_GPL(tls_validate_xmit_skb); +struct sk_buff *tls_validate_xmit_skb_sw(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb) +{ + return tls_sw_fallback(sk, skb); +} + struct sk_buff *tls_encrypt_skb(struct sk_buff *skb) { return tls_sw_fallback(skb->sk, skb); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 47b7c5334c34..fde56ff49163 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -636,6 +636,7 @@ struct tls_context *tls_ctx_create(struct sock *sk) mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); + ctx->sk = sk; return ctx; } -- cgit v1.2.3 From 1710eb913bdcda3917f44d383c32de6bdabfc836 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 May 2021 21:54:42 +0200 Subject: netfilter: nft_ct: skip expectations for confirmed conntrack nft_ct_expect_obj_eval() calls nf_ct_ext_add() for a confirmed conntrack entry. However, nf_ct_ext_add() can only be called for !nf_ct_is_confirmed(). [ 1825.349056] WARNING: CPU: 0 PID: 1279 at net/netfilter/nf_conntrack_extend.c:48 nf_ct_xt_add+0x18e/0x1a0 [nf_conntrack] [ 1825.351391] RIP: 0010:nf_ct_ext_add+0x18e/0x1a0 [nf_conntrack] [ 1825.351493] Code: 41 5c 41 5d 41 5e 41 5f c3 41 bc 0a 00 00 00 e9 15 ff ff ff ba 09 00 00 00 31 f6 4c 89 ff e8 69 6c 3d e9 eb 96 45 31 ed eb cd <0f> 0b e9 b1 fe ff ff e8 86 79 14 e9 eb bf 0f 1f 40 00 0f 1f 44 00 [ 1825.351721] RSP: 0018:ffffc90002e1f1e8 EFLAGS: 00010202 [ 1825.351790] RAX: 000000000000000e RBX: ffff88814f5783c0 RCX: ffffffffc0e4f887 [ 1825.351881] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff88814f578440 [ 1825.351971] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff88814f578447 [ 1825.352060] R10: ffffed1029eaf088 R11: 0000000000000001 R12: ffff88814f578440 [ 1825.352150] R13: ffff8882053f3a00 R14: 0000000000000000 R15: 0000000000000a20 [ 1825.352240] FS: 00007f992261c900(0000) GS:ffff889faec00000(0000) knlGS:0000000000000000 [ 1825.352343] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1825.352417] CR2: 000056070a4d1158 CR3: 000000015efe0000 CR4: 0000000000350ee0 [ 1825.352508] Call Trace: [ 1825.352544] nf_ct_helper_ext_add+0x10/0x60 [nf_conntrack] [ 1825.352641] nft_ct_expect_obj_eval+0x1b8/0x1e0 [nft_ct] [ 1825.352716] nft_do_chain+0x232/0x850 [nf_tables] Add the ct helper extension only for unconfirmed conntrack. Skip rule evaluation if the ct helper extension does not exist. Thus, you can only create expectations from the first packet. It should be possible to remove this limitation by adding a new action to attach a generic ct helper to the first packet. Then, use this ct helper extension from follow up packets to create the ct expectation. While at it, add a missing check to skip the template conntrack too and remove check for IPCT_UNTRACK which is implicit to !ct. Fixes: 857b46027d6f ("netfilter: nft_ct: add ct expectations support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 0592a9456084..337e22d8b40b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1217,7 +1217,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj, struct nf_conn *ct; ct = nf_ct_get(pkt->skb, &ctinfo); - if (!ct || ctinfo == IP_CT_UNTRACKED) { + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) { regs->verdict.code = NFT_BREAK; return; } -- cgit v1.2.3 From 8971ee8b087750a23f3cd4dc55bff2d0303fd267 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 28 May 2021 13:45:16 +0200 Subject: netfilter: nfnetlink_cthelper: hit EBUSY on updates if size mismatches The private helper data size cannot be updated. However, updates that contain NFCTH_PRIV_DATA_LEN might bogusly hit EBUSY even if the size is the same. Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 322ac5dd5402..752b10cae524 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -380,10 +380,14 @@ static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { + u32 size; int ret; - if (tb[NFCTH_PRIV_DATA_LEN]) - return -EBUSY; + if (tb[NFCTH_PRIV_DATA_LEN]) { + size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + if (size != helper->data_len) + return -EBUSY; + } if (tb[NFCTH_POLICY]) { ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); -- cgit v1.2.3 From b508d5fb69c2211a1b860fc058aafbefc3b3c3cd Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 1 Jun 2021 18:38:41 -0700 Subject: net: ipconfig: Don't override command-line hostnames or domains If the user specifies a hostname or domain name as part of the ip= command-line option, preserve it and don't overwrite it with one supplied by DHCP/BOOTP. For instance, ip=::::myhostname::dhcp will use "myhostname" rather than ignoring and overwriting it. Fix the comment on ic_bootp_string that suggests it only copies a string "if not already set"; it doesn't have any such logic. Signed-off-by: Josh Triplett Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index bc2f6ca97152..816d8aad5a68 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -886,7 +886,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* - * Copy BOOTP-supplied string if not already set. + * Copy BOOTP-supplied string */ static int __init ic_bootp_string(char *dest, char *src, int len, int max) { @@ -935,12 +935,15 @@ static void __init ic_do_bootp_ext(u8 *ext) } break; case 12: /* Host name */ - ic_bootp_string(utsname()->nodename, ext+1, *ext, - __NEW_UTS_LEN); - ic_host_name_set = 1; + if (!ic_host_name_set) { + ic_bootp_string(utsname()->nodename, ext+1, *ext, + __NEW_UTS_LEN); + ic_host_name_set = 1; + } break; case 15: /* Domain name (DNS) */ - ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); + if (!ic_domain[0]) + ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); break; case 17: /* Root path */ if (!root_server_path[0]) -- cgit v1.2.3 From ad6f5cc5f6c261f881e44ecd750f17952df2b496 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Sun, 25 Apr 2021 18:24:59 +0800 Subject: net/ieee802154: drop unneeded assignment in llsec_iter_devkeys() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to keep the code style consistency of the whole file, redundant return value ‘rc’ and its assignments should be deleted The clang_analyzer complains as follows: net/ieee802154/nl-mac.c:1203:12: warning: Although the value stored to 'rc' is used in the enclosing expression, the value is never actually read from 'rc' No functional change, only more efficient. Reported-by: Abaci Robot Signed-off-by: Yang Li Link: https://lore.kernel.org/r/1619346299-40237-1-git-send-email-yang.lee@linux.alibaba.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl-mac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 0c1b0770c59e..a6a8cf62924c 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -1184,7 +1184,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data) { struct ieee802154_llsec_device *dpos; struct ieee802154_llsec_device_key *kpos; - int rc = 0, idx = 0, idx2; + int idx = 0, idx2; list_for_each_entry(dpos, &data->table->devices, list) { if (idx++ < data->s_idx) @@ -1200,7 +1200,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data) data->nlmsg_seq, dpos->hwaddr, kpos, data->dev)) { - return rc = -EMSGSIZE; + return -EMSGSIZE; } data->s_idx2++; @@ -1209,7 +1209,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data) data->s_idx++; } - return rc; + return 0; } int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, -- cgit v1.2.3 From 79c6b8ed30e54b401c873dbad2511f2a1c525fd5 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 14:25:17 +0800 Subject: ieee802154: fix error return code in ieee802154_add_iface() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: be51da0f3e34 ("ieee802154: Stop using NLA_PUT*().") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20210508062517.2574-1-thunder.leizhen@huawei.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl-phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 2cdc7e63fe17..88215b5c93aa 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -241,8 +241,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) } if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || - nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) + nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) { + rc = -EMSGSIZE; goto nla_put_failure; + } dev_put(dev); wpan_phy_put(phy); -- cgit v1.2.3 From 373e864cf52403b0974c2f23ca8faf9104234555 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 19 May 2021 14:16:14 +0000 Subject: ieee802154: fix error return code in ieee802154_llsec_getparams() Fix to return negative error code -ENOBUFS from the error handling case instead of 0, as done elsewhere in this function. Fixes: 3e9c156e2c21 ("ieee802154: add netlink interfaces for llsec") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210519141614.3040055-1-weiyongjun1@huawei.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl-mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index a6a8cf62924c..29bf97640166 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -680,8 +680,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, be32_to_cpu(params.frame_counter)) || - ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) { + rc = -ENOBUFS; goto out_free; + } dev_put(dev); -- cgit v1.2.3 From a8db57c1d285c758adc7fb43d6e2bad2554106e1 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 2 Jun 2021 18:15:04 +0800 Subject: rtnetlink: Fix missing error code in rtnl_bridge_notify() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'err'. Eliminate the follow smatch warning: net/core/rtnetlink.c:4834 rtnl_bridge_notify() warn: missing error code 'err'. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 714d5fa38546..3e84279c4123 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4842,8 +4842,10 @@ static int rtnl_bridge_notify(struct net_device *dev) if (err < 0) goto errout; - if (!skb->len) + if (!skb->len) { + err = -EINVAL; goto errout; + } rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; -- cgit v1.2.3 From 261ba78cc364ad595cead555a7d2a61471eac165 Mon Sep 17 00:00:00 2001 From: zhang kai Date: Wed, 2 Jun 2021 18:36:26 +0800 Subject: sit: set name of device back to struct parms addrconf_set_sit_dstaddr will use parms->name. Signed-off-by: zhang kai Signed-off-by: David S. Miller --- net/ipv6/sit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index aa98294a3ad3..f7c8110ece5f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (ipip6_tunnel_create(dev) < 0) goto failed_free; + if (!parms->name[0]) + strcpy(parms->name, dev->name); + return nt; failed_free: -- cgit v1.2.3 From c47cc304990a2813995b1a92bbc11d0bb9a19ea9 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 2 Jun 2021 22:26:40 +0300 Subject: net: kcm: fix memory leak in kcm_sendmsg Syzbot reported memory leak in kcm_sendmsg()[1]. The problem was in non-freed frag_list in case of error. In the while loop: if (head == skb) skb_shinfo(head)->frag_list = tskb; else skb->next = tskb; frag_list filled with skbs, but nothing was freeing them. backtrace: [<0000000094c02615>] __alloc_skb+0x5e/0x250 net/core/skbuff.c:198 [<00000000e5386cbd>] alloc_skb include/linux/skbuff.h:1083 [inline] [<00000000e5386cbd>] kcm_sendmsg+0x3b6/0xa50 net/kcm/kcmsock.c:967 [1] [<00000000f1613a8a>] sock_sendmsg_nosec net/socket.c:652 [inline] [<00000000f1613a8a>] sock_sendmsg+0x4c/0x60 net/socket.c:672 Reported-and-tested-by: syzbot+b039f5699bd82e1fb011@syzkaller.appspotmail.com Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 6201965bd822..1c572c8daced 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1066,6 +1066,11 @@ out_error: goto partial_message; } + if (skb_has_frag_list(head)) { + kfree_skb_list(skb_shinfo(head)->frag_list); + skb_shinfo(head)->frag_list = NULL; + } + if (head != kcm->seq_skb) kfree_skb(head); -- cgit v1.2.3 From bce130e7f392ddde8cfcb09927808ebd5f9c8669 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:38:12 +0300 Subject: net: caif: added cfserl_release function Added cfserl_release() function. Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- include/net/caif/cfserl.h | 1 + net/caif/cfserl.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h index 14a55e03bb3c..67cce8757175 100644 --- a/include/net/caif/cfserl.h +++ b/include/net/caif/cfserl.h @@ -9,4 +9,5 @@ #include struct cflayer *cfserl_create(int instance, bool use_stx); +void cfserl_release(struct cflayer *layer); #endif diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index e11725a4bb0e..40cd57ad0a0f 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid); +void cfserl_release(struct cflayer *layer) +{ + kfree(layer); +} + struct cflayer *cfserl_create(int instance, bool use_stx) { struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC); -- cgit v1.2.3 From a2805dca5107d5603f4bbc027e81e20d93476e96 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:38:51 +0300 Subject: net: caif: add proper error handling caif_enroll_dev() can fail in some cases. Ingnoring these cases can lead to memory leak due to not assigning link_support pointer to anywhere. Fixes: 7c18d2205ea7 ("caif: Restructure how link caif link layer enroll") Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- include/net/caif/caif_dev.h | 2 +- include/net/caif/cfcnfg.h | 2 +- net/caif/caif_dev.c | 8 +++++--- net/caif/cfcnfg.c | 16 +++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index 48ecca8530ff..b655d8666f55 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -119,7 +119,7 @@ void caif_free_client(struct cflayer *adap_layer); * The link_support layer is used to add any Link Layer specific * framing. */ -void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, +int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, struct cflayer *link_support, int head_room, struct cflayer **layer, int (**rcv_func)( struct sk_buff *, struct net_device *, diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index 2aa5e91d8457..8819ff4db35a 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -62,7 +62,7 @@ void cfcnfg_remove(struct cfcnfg *cfg); * @fcs: Specify if checksum is used in CAIF Framing Layer. * @head_room: Head space needed by link specific protocol. */ -void +int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index c10e5a55758d..fffbe41440b3 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -308,7 +308,7 @@ static void dev_flowctrl(struct net_device *dev, int on) caifd_put(caifd); } -void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, +int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, struct cflayer *link_support, int head_room, struct cflayer **layer, int (**rcv_func)(struct sk_buff *, struct net_device *, @@ -319,11 +319,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, enum cfcnfg_phy_preference pref; struct cfcnfg *cfg = get_cfcnfg(dev_net(dev)); struct caif_device_entry_list *caifdevs; + int res; caifdevs = caif_device_list(dev_net(dev)); caifd = caif_device_alloc(dev); if (!caifd) - return; + return -ENOMEM; *layer = &caifd->layer; spin_lock_init(&caifd->flow_lock); @@ -344,7 +345,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, strlcpy(caifd->layer.name, dev->name, sizeof(caifd->layer.name)); caifd->layer.transmit = transmit; - cfcnfg_add_phy_layer(cfg, + res = cfcnfg_add_phy_layer(cfg, dev, &caifd->layer, pref, @@ -354,6 +355,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, mutex_unlock(&caifdevs->lock); if (rcv_func) *rcv_func = receive; + return res; } EXPORT_SYMBOL(caif_enroll_dev); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 399239a14420..cac30e676ac9 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -450,7 +450,7 @@ unlock: rcu_read_unlock(); } -void +int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, @@ -459,7 +459,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, { struct cflayer *frml; struct cfcnfg_phyinfo *phyinfo = NULL; - int i; + int i, res = 0; u8 phyid; mutex_lock(&cnfg->lock); @@ -473,12 +473,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, goto got_phyid; } pr_warn("Too many CAIF Link Layers (max 6)\n"); + res = -EEXIST; goto out; got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); - if (!phyinfo) + if (!phyinfo) { + res = -ENOMEM; goto out_err; + } phy_layer->id = phyid; phyinfo->pref = pref; @@ -492,8 +495,10 @@ got_phyid: frml = cffrml_create(phyid, fcs); - if (!frml) + if (!frml) { + res = -ENOMEM; goto out_err; + } phyinfo->frm_layer = frml; layer_set_up(frml, cnfg->mux); @@ -511,11 +516,12 @@ got_phyid: list_add_rcu(&phyinfo->node, &cnfg->phys); out: mutex_unlock(&cnfg->lock); - return; + return res; out_err: kfree(phyinfo); mutex_unlock(&cnfg->lock); + return res; } EXPORT_SYMBOL(cfcnfg_add_phy_layer); -- cgit v1.2.3 From b53558a950a89824938e9811eddfc8efcd94e1bb Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:39:11 +0300 Subject: net: caif: fix memory leak in caif_device_notify In case of caif_enroll_dev() fail, allocated link_support won't be assigned to the corresponding structure. So simply free allocated pointer in case of error Fixes: 7c18d2205ea7 ("caif: Restructure how link caif link layer enroll") Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+7ec324747ce876a29db6@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index fffbe41440b3..440139706130 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -370,6 +370,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, struct cflayer *layer, *link_support; int head_room = 0; struct caif_device_entry_list *caifdevs; + int res; cfg = get_cfcnfg(dev_net(dev)); caifdevs = caif_device_list(dev_net(dev)); @@ -395,8 +396,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, break; } } - caif_enroll_dev(dev, caifdev, link_support, head_room, + res = caif_enroll_dev(dev, caifdev, link_support, head_room, &layer, NULL); + if (res) + cfserl_release(link_support); caifdev->flowctrl = dev_flowctrl; break; -- cgit v1.2.3 From 7f5d86669fa4d485523ddb1d212e0a2d90bd62bb Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:39:35 +0300 Subject: net: caif: fix memory leak in cfusbl_device_notify In case of caif_enroll_dev() fail, allocated link_support won't be assigned to the corresponding structure. So simply free allocated pointer in case of error. Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- net/caif/caif_usb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index a0116b9503d9..b02e1292f7f1 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -115,6 +115,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], return (struct cflayer *) this; } +static void cfusbl_release(struct cflayer *layer) +{ + kfree(layer); +} + static struct packet_type caif_usb_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_EX1), }; @@ -127,6 +132,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct cflayer *layer, *link_support; struct usbnet *usbnet; struct usb_device *usbdev; + int res; /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && @@ -169,8 +175,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, if (dev->num_tx_queues > 1) pr_warn("USB device uses more than one tx queue\n"); - caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, + res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, &layer, &caif_usb_type.func); + if (res) + goto err; + if (!pack_added) dev_add_pack(&caif_usb_type); pack_added = true; @@ -178,6 +187,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, strlcpy(layer->name, dev->name, sizeof(layer->name)); return 0; +err: + cfusbl_release(link_support); + return res; } static struct notifier_block caif_device_notifier = { -- cgit v1.2.3 From d7736958668c4facc15f421e622ffd718f5be80a Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 2 Jun 2021 22:06:30 +0800 Subject: net/x25: Return the correct errno code When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF. Signed-off-by: Zheng Yongjun Signed-off-by: David S. Miller --- net/x25/af_x25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 44d6566dd23e..1816899499ce 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -536,7 +536,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, if (protocol) goto out; - rc = -ENOBUFS; + rc = -ENOMEM; if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; -- cgit v1.2.3 From 49251cd00228a3c983651f6bb2f33f6a0b8f152e Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 2 Jun 2021 22:06:40 +0800 Subject: net: Return the correct errno code When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF. Signed-off-by: Zheng Yongjun Signed-off-by: David S. Miller --- net/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index ddd15af3a283..210fc3b4d0d8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -177,7 +177,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, if (kcmlen > stackbuf_size) kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); if (kcmsg == NULL) - return -ENOBUFS; + return -ENOMEM; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); -- cgit v1.2.3 From 59607863c54e9eb3f69afc5257dfe71c38bb751e Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 2 Jun 2021 22:06:58 +0800 Subject: fib: Return the correct errno code When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF. Signed-off-by: Zheng Yongjun Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index cd80ffed6d26..a9f937975080 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -1168,7 +1168,7 @@ static void notify_rule_change(int event, struct fib_rule *rule, { struct net *net; struct sk_buff *skb; - int err = -ENOBUFS; + int err = -ENOMEM; net = ops->fro_net; skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); -- cgit v1.2.3 From 821bbf79fe46a8b1d18aa456e8ed0a3c208c3754 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 3 Jun 2021 07:32:58 +0000 Subject: ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions Reported by syzbot: HEAD commit: 90c911ad Merge tag 'fixes' of git://git.kernel.org/pub/scm.. git tree: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master dashboard link: https://syzkaller.appspot.com/bug?extid=123aa35098fd3c000eb7 compiler: Debian clang version 11.0.1-2 ================================================================== BUG: KASAN: slab-out-of-bounds in fib6_nh_get_excptn_bucket net/ipv6/route.c:1604 [inline] BUG: KASAN: slab-out-of-bounds in fib6_nh_flush_exceptions+0xbd/0x360 net/ipv6/route.c:1732 Read of size 8 at addr ffff8880145c78f8 by task syz-executor.4/17760 CPU: 0 PID: 17760 Comm: syz-executor.4 Not tainted 5.12.0-rc8-syzkaller #0 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x202/0x31e lib/dump_stack.c:120 print_address_description+0x5f/0x3b0 mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report+0x15c/0x200 mm/kasan/report.c:416 fib6_nh_get_excptn_bucket net/ipv6/route.c:1604 [inline] fib6_nh_flush_exceptions+0xbd/0x360 net/ipv6/route.c:1732 fib6_nh_release+0x9a/0x430 net/ipv6/route.c:3536 fib6_info_destroy_rcu+0xcb/0x1c0 net/ipv6/ip6_fib.c:174 rcu_do_batch kernel/rcu/tree.c:2559 [inline] rcu_core+0x8f6/0x1450 kernel/rcu/tree.c:2794 __do_softirq+0x372/0x7a6 kernel/softirq.c:345 invoke_softirq kernel/softirq.c:221 [inline] __irq_exit_rcu+0x22c/0x260 kernel/softirq.c:422 irq_exit_rcu+0x5/0x20 kernel/softirq.c:434 sysvec_apic_timer_interrupt+0x91/0xb0 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632 RIP: 0010:lock_acquire+0x1f6/0x720 kernel/locking/lockdep.c:5515 Code: f6 84 24 a1 00 00 00 02 0f 85 8d 02 00 00 f7 c3 00 02 00 00 49 bd 00 00 00 00 00 fc ff df 74 01 fb 48 c7 44 24 40 0e 36 e0 45 <4b> c7 44 3d 00 00 00 00 00 4b c7 44 3d 09 00 00 00 00 43 c7 44 3d RSP: 0018:ffffc90009e06560 EFLAGS: 00000206 RAX: 1ffff920013c0cc0 RBX: 0000000000000246 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc90009e066e0 R08: dffffc0000000000 R09: fffffbfff1f992b1 R10: fffffbfff1f992b1 R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: 0000000000000000 R15: 1ffff920013c0cb4 rcu_lock_acquire+0x2a/0x30 include/linux/rcupdate.h:267 rcu_read_lock include/linux/rcupdate.h:656 [inline] ext4_get_group_info+0xea/0x340 fs/ext4/ext4.h:3231 ext4_mb_prefetch+0x123/0x5d0 fs/ext4/mballoc.c:2212 ext4_mb_regular_allocator+0x8a5/0x28f0 fs/ext4/mballoc.c:2379 ext4_mb_new_blocks+0xc6e/0x24f0 fs/ext4/mballoc.c:4982 ext4_ext_map_blocks+0x2be3/0x7210 fs/ext4/extents.c:4238 ext4_map_blocks+0xab3/0x1cb0 fs/ext4/inode.c:638 ext4_getblk+0x187/0x6c0 fs/ext4/inode.c:848 ext4_bread+0x2a/0x1c0 fs/ext4/inode.c:900 ext4_append+0x1a4/0x360 fs/ext4/namei.c:67 ext4_init_new_dir+0x337/0xa10 fs/ext4/namei.c:2768 ext4_mkdir+0x4b8/0xc00 fs/ext4/namei.c:2814 vfs_mkdir+0x45b/0x640 fs/namei.c:3819 ovl_do_mkdir fs/overlayfs/overlayfs.h:161 [inline] ovl_mkdir_real+0x53/0x1a0 fs/overlayfs/dir.c:146 ovl_create_real+0x280/0x490 fs/overlayfs/dir.c:193 ovl_workdir_create+0x425/0x600 fs/overlayfs/super.c:788 ovl_make_workdir+0xed/0x1140 fs/overlayfs/super.c:1355 ovl_get_workdir fs/overlayfs/super.c:1492 [inline] ovl_fill_super+0x39ee/0x5370 fs/overlayfs/super.c:2035 mount_nodev+0x52/0xe0 fs/super.c:1413 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1497 do_new_mount fs/namespace.c:2903 [inline] path_mount+0x196f/0x2be0 fs/namespace.c:3233 do_mount fs/namespace.c:3246 [inline] __do_sys_mount fs/namespace.c:3454 [inline] __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3431 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x4665f9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f68f2b87188 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 000000000056bf60 RCX: 00000000004665f9 RDX: 00000000200000c0 RSI: 0000000020000000 RDI: 000000000040000a RBP: 00000000004bfbb9 R08: 0000000020000100 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf60 R13: 00007ffe19002dff R14: 00007f68f2b87300 R15: 0000000000022000 Allocated by task 17768: kasan_save_stack mm/kasan/common.c:38 [inline] kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:427 [inline] ____kasan_kmalloc+0xc2/0xf0 mm/kasan/common.c:506 kasan_kmalloc include/linux/kasan.h:233 [inline] __kmalloc+0xb4/0x380 mm/slub.c:4055 kmalloc include/linux/slab.h:559 [inline] kzalloc include/linux/slab.h:684 [inline] fib6_info_alloc+0x2c/0xd0 net/ipv6/ip6_fib.c:154 ip6_route_info_create+0x55d/0x1a10 net/ipv6/route.c:3638 ip6_route_add+0x22/0x120 net/ipv6/route.c:3728 inet6_rtm_newroute+0x2cd/0x2260 net/ipv6/route.c:5352 rtnetlink_rcv_msg+0xb34/0xe70 net/core/rtnetlink.c:5553 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x5a2/0x900 net/socket.c:2350 ___sys_sendmsg net/socket.c:2404 [inline] __sys_sendmsg+0x319/0x400 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Last potentially related work creation: kasan_save_stack+0x27/0x50 mm/kasan/common.c:38 kasan_record_aux_stack+0xee/0x120 mm/kasan/generic.c:345 __call_rcu kernel/rcu/tree.c:3039 [inline] call_rcu+0x1b1/0xa30 kernel/rcu/tree.c:3114 fib6_info_release include/net/ip6_fib.h:337 [inline] ip6_route_info_create+0x10c4/0x1a10 net/ipv6/route.c:3718 ip6_route_add+0x22/0x120 net/ipv6/route.c:3728 inet6_rtm_newroute+0x2cd/0x2260 net/ipv6/route.c:5352 rtnetlink_rcv_msg+0xb34/0xe70 net/core/rtnetlink.c:5553 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x5a2/0x900 net/socket.c:2350 ___sys_sendmsg net/socket.c:2404 [inline] __sys_sendmsg+0x319/0x400 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Second to last potentially related work creation: kasan_save_stack+0x27/0x50 mm/kasan/common.c:38 kasan_record_aux_stack+0xee/0x120 mm/kasan/generic.c:345 insert_work+0x54/0x400 kernel/workqueue.c:1331 __queue_work+0x981/0xcc0 kernel/workqueue.c:1497 queue_work_on+0x111/0x200 kernel/workqueue.c:1524 queue_work include/linux/workqueue.h:507 [inline] call_usermodehelper_exec+0x283/0x470 kernel/umh.c:433 kobject_uevent_env+0x1349/0x1730 lib/kobject_uevent.c:617 kvm_uevent_notify_change+0x309/0x3b0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4809 kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:877 [inline] kvm_put_kvm+0x9c/0xd10 arch/x86/kvm/../../../virt/kvm/kvm_main.c:920 kvm_vcpu_release+0x53/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3120 __fput+0x352/0x7b0 fs/file_table.c:280 task_work_run+0x146/0x1c0 kernel/task_work.c:140 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:174 [inline] exit_to_user_mode_prepare+0x10b/0x1e0 kernel/entry/common.c:208 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 [inline] syscall_exit_to_user_mode+0x26/0x70 kernel/entry/common.c:301 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff8880145c7800 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 56 bytes to the right of 192-byte region [ffff8880145c7800, ffff8880145c78c0) The buggy address belongs to the page: page:ffffea00005171c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x145c7 flags: 0xfff00000000200(slab) raw: 00fff00000000200 ffffea00006474c0 0000000200000002 ffff888010c41a00 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880145c7780: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8880145c7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff8880145c7880: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff8880145c7900: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880145c7980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ================================================================== In the ip6_route_info_create function, in the case that the nh pointer is not NULL, the fib6_nh in fib6_info has not been allocated. Therefore, when trying to free fib6_info in this error case using fib6_info_release, the function will call fib6_info_destroy_rcu, which it will access fib6_nh_release(f6i->fib6_nh); However, f6i->fib6_nh doesn't have any refcount yet given the lack of allocation causing the reported memory issue above. Therefore, releasing the empty pointer directly instead would be the solution. Fixes: f88d8ea67fbdb ("ipv6: Plumb support for nexthop object in a fib6_info") Fixes: 706ec91916462 ("ipv6: Fix nexthop refcnt leak when creating ipv6 route info") Signed-off-by: Coco Li Cc: David Ahern Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a22822bdbf39..d417e514bd52 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3673,11 +3673,11 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (nh) { if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); - goto out; + goto out_free; } if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); - goto out; + goto out_free; } rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); @@ -3714,6 +3714,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, out: fib6_info_release(rt); return ERR_PTR(err); +out_free: + ip_fib_metrics_put(rt->fib6_metrics); + kfree(rt); + return ERR_PTR(err); } int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, -- cgit v1.2.3 From 944d671d5faa0d78980a3da5c0f04960ef1ad893 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 4 Jun 2021 19:03:18 +0800 Subject: sch_htb: fix refcount leak in htb_parent_to_leaf_offload The commit ae81feb7338c ("sch_htb: fix null pointer dereference on a null new_q") fixes a NULL pointer dereference bug, but it is not correct. Because htb_graft_helper properly handles the case when new_q is NULL, and after the previous patch by skipping this call which creates an inconsistency : dev_queue->qdisc will still point to the old qdisc, but cl->parent->leaf.q will point to the new one (which will be noop_qdisc, because new_q was NULL). The code is based on an assumption that these two pointers are the same, so it can lead to refcount leaks. The correct fix is to add a NULL pointer check to protect qdisc_refcount_inc inside htb_parent_to_leaf_offload. Fixes: ae81feb7338c ("sch_htb: fix null pointer dereference on a null new_q") Signed-off-by: Yunjian Wang Suggested-by: Maxim Mikityanskiy Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 081c11d5717c..8827987ba903 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1488,7 +1488,8 @@ static void htb_parent_to_leaf_offload(struct Qdisc *sch, struct Qdisc *old_q; /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ - qdisc_refcount_inc(new_q); + if (new_q) + qdisc_refcount_inc(new_q); old_q = htb_graft_helper(dev_queue, new_q); WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); } @@ -1675,10 +1676,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, cl->parent->common.classid, NULL); if (q->offload) { - if (new_q) { + if (new_q) htb_set_lockdep_class_child(new_q); - htb_parent_to_leaf_offload(sch, dev_queue, new_q); - } + htb_parent_to_leaf_offload(sch, dev_queue, new_q); } } -- cgit v1.2.3