summaryrefslogtreecommitdiffstats
path: root/net/mptcp
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 01:43:10 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 01:43:10 +0200
commit3a8a670eeeaa40d87bd38a587438952741980c18 (patch)
treed5546d311271503eadf75b45d87e12720e72899f /net/mptcp
parentMerge tag 'v6.5-rc1-sysctl-next' of git://git.kernel.org/pub/scm/linux/kernel... (diff)
parentMerge branch 'af_unix-followup-fixes-for-so_passpidfd' (diff)
downloadlinux-3a8a670eeeaa40d87bd38a587438952741980c18.tar.xz
linux-3a8a670eeeaa40d87bd38a587438952741980c18.zip
Merge tag 'net-next-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking changes from Jakub Kicinski: "WiFi 7 and sendpage changes are the biggest pieces of work for this release. The latter will definitely require fixes but I think that we got it to a reasonable point. Core: - Rework the sendpage & splice implementations Instead of feeding data into sockets page by page extend sendmsg handlers to support taking a reference on the data, controlled by a new flag called MSG_SPLICE_PAGES Rework the handling of unexpected-end-of-file to invoke an additional callback instead of trying to predict what the right combination of MORE/NOTLAST flags is Remove the MSG_SENDPAGE_NOTLAST flag completely - Implement SCM_PIDFD, a new type of CMSG type analogous to SCM_CREDENTIALS, but it contains pidfd instead of plain pid - Enable socket busy polling with CONFIG_RT - Improve reliability and efficiency of reporting for ref_tracker - Auto-generate a user space C library for various Netlink families Protocols: - Allow TCP to shrink the advertised window when necessary, prevent sk_rcvbuf auto-tuning from growing the window all the way up to tcp_rmem[2] - Use per-VMA locking for "page-flipping" TCP receive zerocopy - Prepare TCP for device-to-device data transfers, by making sure that payloads are always attached to skbs as page frags - Make the backoff time for the first N TCP SYN retransmissions linear. Exponential backoff is unnecessarily conservative - Create a new MPTCP getsockopt to retrieve all info (MPTCP_FULL_INFO) - Avoid waking up applications using TLS sockets until we have a full record - Allow using kernel memory for protocol ioctl callbacks, paving the way to issuing ioctls over io_uring - Add nolocalbypass option to VxLAN, forcing packets to be fully encapsulated even if they are destined for a local IP address - Make TCPv4 use consistent hash in TIME_WAIT and SYN_RECV. Ensure in-kernel ECMP implementation (e.g. Open vSwitch) select the same link for all packets. Support L4 symmetric hashing in Open vSwitch - PPPoE: make number of hash bits configurable - Allow DNS to be overwritten by DHCPACK in the in-kernel DHCP client (ipconfig) - Add layer 2 miss indication and filtering, allowing higher layers (e.g. ACL filters) to make forwarding decisions based on whether packet matched forwarding state in lower devices (bridge) - Support matching on Connectivity Fault Management (CFM) packets - Hide the "link becomes ready" IPv6 messages by demoting their printk level to debug - HSR: don't enable promiscuous mode if device offloads the proto - Support active scanning in IEEE 802.15.4 - Continue work on Multi-Link Operation for WiFi 7 BPF: - Add precision propagation for subprogs and callbacks. This allows maintaining verification efficiency when subprograms are used, or in fact passing the verifier at all for complex programs, especially those using open-coded iterators - Improve BPF's {g,s}setsockopt() length handling. Previously BPF assumed the length is always equal to the amount of written data. But some protos allow passing a NULL buffer to discover what the output buffer *should* be, without writing anything - Accept dynptr memory as memory arguments passed to helpers - Add routing table ID to bpf_fib_lookup BPF helper - Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands - Drop bpf_capable() check in BPF_MAP_FREEZE command (used to mark maps as read-only) - Show target_{obj,btf}_id in tracing link fdinfo - Addition of several new kfuncs (most of the names are self-explanatory): - Add a set of new dynptr kfuncs: bpf_dynptr_adjust(), bpf_dynptr_is_null(), bpf_dynptr_is_rdonly(), bpf_dynptr_size() and bpf_dynptr_clone(). - bpf_task_under_cgroup() - bpf_sock_destroy() - force closing sockets - bpf_cpumask_first_and(), rework bpf_cpumask_any*() kfuncs Netfilter: - Relax set/map validation checks in nf_tables. Allow checking presence of an entry in a map without using the value - Increase ip_vs_conn_tab_bits range for 64BIT builds - Allow updating size of a set - Improve NAT tuple selection when connection is closing Driver API: - Integrate netdev with LED subsystem, to allow configuring HW "offloaded" blinking of LEDs based on link state and activity (i.e. packets coming in and out) - Support configuring rate selection pins of SFP modules - Factor Clause 73 auto-negotiation code out of the drivers, provide common helper routines - Add more fool-proof helpers for managing lifetime of MDIO devices associated with the PCS layer - Allow drivers to report advanced statistics related to Time Aware scheduler offload (taprio) - Allow opting out of VF statistics in link dump, to allow more VFs to fit into the message - Split devlink instance and devlink port operations New hardware / drivers: - Ethernet: - Synopsys EMAC4 IP support (stmmac) - Marvell 88E6361 8 port (5x1GE + 3x2.5GE) switches - Marvell 88E6250 7 port switches - Microchip LAN8650/1 Rev.B0 PHYs - MediaTek MT7981/MT7988 built-in 1GE PHY driver - WiFi: - Realtek RTL8192FU, 2.4 GHz, b/g/n mode, 2T2R, 300 Mbps - Realtek RTL8723DS (SDIO variant) - Realtek RTL8851BE - CAN: - Fintek F81604 Drivers: - Ethernet NICs: - Intel (100G, ice): - support dynamic interrupt allocation - use meta data match instead of VF MAC addr on slow-path - nVidia/Mellanox: - extend link aggregation to handle 4, rather than just 2 ports - spawn sub-functions without any features by default - OcteonTX2: - support HTB (Tx scheduling/QoS) offload - make RSS hash generation configurable - support selecting Rx queue using TC filters - Wangxun (ngbe/txgbe): - add basic Tx/Rx packet offloads - add phylink support (SFP/PCS control) - Freescale/NXP (enetc): - report TAPRIO packet statistics - Solarflare/AMD: - support matching on IP ToS and UDP source port of outer header - VxLAN and GENEVE tunnel encapsulation over IPv4 or IPv6 - add devlink dev info support for EF10 - Virtual NICs: - Microsoft vNIC: - size the Rx indirection table based on requested configuration - support VLAN tagging - Amazon vNIC: - try to reuse Rx buffers if not fully consumed, useful for ARM servers running with 16kB pages - Google vNIC: - support TCP segmentation of >64kB frames - Ethernet embedded switches: - Marvell (mv88e6xxx): - enable USXGMII (88E6191X) - Microchip: - lan966x: add support for Egress Stage 0 ACL engine - lan966x: support mapping packet priority to internal switch priority (based on PCP or DSCP) - Ethernet PHYs: - Broadcom PHYs: - support for Wake-on-LAN for BCM54210E/B50212E - report LPI counter - Microsemi PHYs: support RGMII delay configuration (VSC85xx) - Micrel PHYs: receive timestamp in the frame (LAN8841) - Realtek PHYs: support optional external PHY clock - Altera TSE PCS: merge the driver into Lynx PCS which it is a variant of - CAN: Kvaser PCIEcan: - support packet timestamping - WiFi: - Intel (iwlwifi): - major update for new firmware and Multi-Link Operation (MLO) - configuration rework to drop test devices and split the different families - support for segmented PNVM images and power tables - new vendor entries for PPAG (platform antenna gain) feature - Qualcomm 802.11ax (ath11k): - Multiple Basic Service Set Identifier (MBSSID) and Enhanced MBSSID Advertisement (EMA) support in AP mode - support factory test mode - RealTek (rtw89): - add RSSI based antenna diversity - support U-NII-4 channels on 5 GHz band - RealTek (rtl8xxxu): - AP mode support for 8188f - support USB RX aggregation for the newer chips" * tag 'net-next-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1602 commits) net: scm: introduce and use scm_recv_unix helper af_unix: Skip SCM_PIDFD if scm->pid is NULL. net: lan743x: Simplify comparison netlink: Add __sock_i_ino() for __netlink_diag_dump(). net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses Revert "af_unix: Call scm_recv() only after scm_set_cred()." phylink: ReST-ify the phylink_pcs_neg_mode() kdoc libceph: Partially revert changes to support MSG_SPLICE_PAGES net: phy: mscc: fix packet loss due to RGMII delays net: mana: use vmalloc_array and vcalloc net: enetc: use vmalloc_array and vcalloc ionic: use vmalloc_array and vcalloc pds_core: use vmalloc_array and vcalloc gve: use vmalloc_array and vcalloc octeon_ep: use vmalloc_array and vcalloc net: usb: qmi_wwan: add u-blox 0x1312 composition perf trace: fix MSG_SPLICE_PAGES build error ipvlan: Fix return value of ipvlan_queue_xmit() netfilter: nf_tables: fix underflow in chain reference counter netfilter: nf_tables: unbind non-anonymous set if rule construction fails ...
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/mib.c6
-rw-r--r--net/mptcp/mib.h18
-rw-r--r--net/mptcp/options.c19
-rw-r--r--net/mptcp/pm.c47
-rw-r--r--net/mptcp/pm_netlink.c142
-rw-r--r--net/mptcp/pm_userspace.c5
-rw-r--r--net/mptcp/protocol.c65
-rw-r--r--net/mptcp/protocol.h20
-rw-r--r--net/mptcp/sockopt.c153
-rw-r--r--net/mptcp/subflow.c2
10 files changed, 347 insertions, 130 deletions
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 0dac2863c6e1..a0990c365a2e 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -34,7 +34,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
+ SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX),
+ SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
+ SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX),
+ SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
@@ -44,6 +48,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
+ SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX),
+ SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 2be3596374f4..cae71d947252 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -27,7 +27,15 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */
MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
+ MPTCP_MIB_ADDADDRTX, /* Sent ADD_ADDR with echo-flag=0 */
+ MPTCP_MIB_ADDADDRTXDROP, /* ADD_ADDR with echo-flag=0 not send due to
+ * resource exhaustion
+ */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
+ MPTCP_MIB_ECHOADDTX, /* Send ADD_ADDR with echo-flag=1 */
+ MPTCP_MIB_ECHOADDTXDROP, /* ADD_ADDR with echo-flag=1 not send due
+ * to resource exhaustion
+ */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
@@ -37,6 +45,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
+ MPTCP_MIB_RMADDRTX, /* Sent RM_ADDR */
+ MPTCP_MIB_RMADDRTXDROP, /* RM_ADDR not sent due to resource exhaustion */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
@@ -63,6 +73,14 @@ struct mptcp_mib {
unsigned long mibs[LINUX_MIB_MPTCP_MAX];
};
+static inline void MPTCP_ADD_STATS(struct net *net,
+ enum linux_mptcp_mib_field field,
+ int val)
+{
+ if (likely(net->mib.mptcp_statistics))
+ SNMP_ADD_STATS(net->mib.mptcp_statistics, field, val);
+}
+
static inline void MPTCP_INC_STATS(struct net *net,
enum linux_mptcp_mib_field field)
{
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 19a01b6566f1..c254accb14de 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -687,9 +687,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
}
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
opts->ahmac = add_addr_generate_hmac(msk->local_key,
msk->remote_key,
&opts->addr);
+ } else {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
}
pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
@@ -723,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
for (i = 0; i < opts->rm_list.nr; i++)
pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
-
+ MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr);
return true;
}
@@ -1023,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
return cur_seq;
}
+static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
+{
+ msk->bytes_acked += new_snd_una - msk->snd_una;
+ msk->snd_una = new_snd_una;
+}
+
static void ack_update_msk(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_options_received *mp_opt)
@@ -1054,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
- msk->snd_una = new_snd_una;
+ __mptcp_snd_una_update(msk, new_snd_una);
__mptcp_data_acked(sk);
}
mptcp_data_unlock(sk);
@@ -1116,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mptcp_data_lock(subflow->conn);
if (sk_stream_memory_free(sk))
__mptcp_check_push(subflow->conn, sk);
+
+ /* on fallback we just need to ignore the msk-level snd_una, as
+ * this is really plain TCP
+ */
+ __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));
+
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
return true;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 76612bca275a..7dbbad1e4f55 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -26,7 +26,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
if (add_addr &
(echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
- pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
+ MPTCP_INC_STATS(sock_net((struct sock *)msk),
+ echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP);
return -EINVAL;
}
@@ -48,7 +49,8 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
if (rm_addr) {
- pr_warn("addr_signal error, rm_addr=%d", rm_addr);
+ MPTCP_ADD_STATS(sock_net((struct sock *)msk),
+ MPTCP_MIB_RMADDRTXDROP, rm_list->nr);
return -EINVAL;
}
@@ -413,7 +415,46 @@ out_unlock:
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
{
- return mptcp_pm_nl_get_local_id(msk, skc);
+ struct mptcp_addr_info skc_local;
+ struct mptcp_addr_info msk_local;
+
+ if (WARN_ON_ONCE(!msk))
+ return -1;
+
+ /* The 0 ID mapping is defined by the first subflow, copied into the msk
+ * addr
+ */
+ mptcp_local_address((struct sock_common *)msk, &msk_local);
+ mptcp_local_address((struct sock_common *)skc, &skc_local);
+ if (mptcp_addresses_equal(&msk_local, &skc_local, false))
+ return 0;
+
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_local_id(msk, &skc_local);
+ return mptcp_pm_nl_get_local_id(msk, &skc_local);
+}
+
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ *flags = 0;
+ *ifindex = 0;
+
+ if (!id)
+ return 0;
+
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
+ return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
+}
+
+int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup)
+{
+ if (token)
+ return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup);
+ return mptcp_pm_nl_set_flags(net, loc, bkup);
}
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1224dfca5bf3..5692daf57a4d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -25,9 +25,9 @@ static int pm_nl_pernet_id;
struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
+ u8 retrans_times;
struct timer_list add_timer;
struct mptcp_sock *sock;
- u8 retrans_times;
};
struct pm_nl_pernet {
@@ -86,8 +86,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
return a->port == b->port;
}
-static void local_address(const struct sock_common *skc,
- struct mptcp_addr_info *addr)
+void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr)
{
addr->family = skc->skc_family;
addr->port = htons(skc->skc_num);
@@ -122,7 +121,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
list_for_each_entry(subflow, list, node) {
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
- local_address(skc, &cur);
+ mptcp_local_address(skc, &cur);
if (mptcp_addresses_equal(&cur, saddr, saddr->port))
return true;
}
@@ -263,7 +262,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
struct mptcp_addr_info saddr;
bool ret = false;
- local_address((struct sock_common *)sk, &saddr);
+ mptcp_local_address((struct sock_common *)sk, &saddr);
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
@@ -342,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
}
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- const struct mptcp_pm_addr_entry *entry)
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
@@ -350,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
- add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
+ add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (add_entry) {
if (mptcp_pm_is_kernel(msk))
@@ -367,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
list_add(&add_entry->list, &msk->pm.anno_list);
- add_entry->addr = entry->addr;
+ add_entry->addr = *addr;
add_entry->sock = msk;
add_entry->retrans_times = 0;
@@ -541,7 +540,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct mptcp_addr_info mpc_addr;
bool backup = false;
- local_address((struct sock_common *)msk->first, &mpc_addr);
+ mptcp_local_address((struct sock_common *)msk->first, &mpc_addr);
rcu_read_lock();
entry = __lookup_addr(pernet, &mpc_addr, false);
if (entry) {
@@ -577,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
return;
if (local) {
- if (mptcp_pm_alloc_anno_list(msk, local)) {
+ if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
@@ -752,7 +751,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_addr_info local, remote;
- local_address((struct sock_common *)ssk, &local);
+ mptcp_local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
@@ -1057,33 +1056,17 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
return 0;
}
-int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
+int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
{
struct mptcp_pm_addr_entry *entry;
- struct mptcp_addr_info skc_local;
- struct mptcp_addr_info msk_local;
struct pm_nl_pernet *pernet;
int ret = -1;
- if (WARN_ON_ONCE(!msk))
- return -1;
-
- /* The 0 ID mapping is defined by the first subflow, copied into the msk
- * addr
- */
- local_address((struct sock_common *)msk, &msk_local);
- local_address((struct sock_common *)skc, &skc_local);
- if (mptcp_addresses_equal(&msk_local, &skc_local, false))
- return 0;
-
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_local_id(msk, &skc_local);
-
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -1097,7 +1080,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (!entry)
return -ENOMEM;
- entry->addr = skc_local;
+ entry->addr = *skc;
entry->addr.id = 0;
entry->addr.port = 0;
entry->ifindex = 0;
@@ -1374,31 +1357,20 @@ out_free:
return ret;
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
+int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
+ u8 *flags, int *ifindex)
{
struct mptcp_pm_addr_entry *entry;
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
- *flags = 0;
- *ifindex = 0;
-
- if (id) {
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk,
- id,
- flags,
- ifindex);
-
- rcu_read_lock();
- entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
- if (entry) {
- *flags = entry->flags;
- *ifindex = entry->ifindex;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
+ if (entry) {
+ *flags = entry->flags;
+ *ifindex = entry->ifindex;
}
+ rcu_read_unlock();
return 0;
}
@@ -1492,7 +1464,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
- local_address((struct sock_common *)msk, &msk_local);
+ mptcp_local_address((struct sock_common *)msk, &msk_local);
if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
goto next;
@@ -1911,18 +1883,50 @@ next:
return ret;
}
+int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
+ u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
+ MPTCP_PM_ADDR_FLAG_FULLMESH;
+ struct mptcp_pm_addr_entry *entry;
+ u8 lookup_by_id = 0;
+
+ if (addr->addr.family == AF_UNSPEC) {
+ lookup_by_id = 1;
+ if (!addr->addr.id)
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_bh(&pernet->lock);
+ entry = __lookup_addr(pernet, &addr->addr, lookup_by_id);
+ if (!entry) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
+ }
+ if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
+ }
+
+ changed = (addr->flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (addr->flags & mask);
+ *addr = *entry;
+ spin_unlock_bh(&pernet->lock);
+
+ mptcp_nl_set_flags(net, &addr->addr, bkup, changed);
+ return 0;
+}
+
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
- struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
+ struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
- u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
- MPTCP_PM_ADDR_FLAG_FULLMESH;
struct net *net = sock_net(skb->sk);
- u8 bkup = 0, lookup_by_id = 0;
+ u8 bkup = 0;
int ret;
ret = mptcp_pm_parse_entry(attr, info, false, &addr);
@@ -1937,34 +1941,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
- if (addr.addr.family == AF_UNSPEC) {
- lookup_by_id = 1;
- if (!addr.addr.id)
- return -EOPNOTSUPP;
- }
-
- if (token)
- return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup);
- spin_lock_bh(&pernet->lock);
- entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
- if (!entry) {
- spin_unlock_bh(&pernet->lock);
- return -EINVAL;
- }
- if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- spin_unlock_bh(&pernet->lock);
- return -EINVAL;
- }
-
- changed = (addr.flags ^ entry->flags) & mask;
- entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
- addr = *entry;
- spin_unlock_bh(&pernet->lock);
-
- mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
- return 0;
+ return mptcp_pm_set_flags(net, token, &addr, &remote, bkup);
}
static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index b06aa58dfcf2..b5a8aa4c1ebd 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -111,9 +111,6 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
{
struct mptcp_pm_addr_entry *entry, *match = NULL;
- *flags = 0;
- *ifindex = 0;
-
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
if (id == entry->addr.id) {
@@ -196,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
lock_sock((struct sock *)msk);
spin_lock_bh(&msk->pm.lock);
- if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a6c7f2d24909..e892673deb73 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
+ subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
@@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
+ msk->bytes_received += copy_len;
WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail && mptcp_try_coalesce(sk, tail, skb))
@@ -757,6 +759,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
MPTCP_SKB_CB(skb)->map_seq += delta;
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
+ msk->bytes_received += end_seq - msk->ack_seq;
msk->ack_seq = end_seq;
moved = true;
}
@@ -842,6 +845,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);
+ mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
return true;
@@ -958,12 +962,6 @@ static void __mptcp_clean_una(struct sock *sk)
struct mptcp_data_frag *dtmp, *dfrag;
u64 snd_una;
- /* on fallback we just need to ignore snd_una, as this is really
- * plain TCP
- */
- if (__mptcp_check_fallback(msk))
- msk->snd_una = READ_ONCE(msk->snd_nxt);
-
snd_una = msk->snd_una;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -1491,8 +1489,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
* that has been handed to the subflow for transmission
* and skip update in case it was old dfrag.
*/
- if (likely(after64(snd_nxt_new, msk->snd_nxt)))
+ if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
+ msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
msk->snd_nxt = snd_nxt_new;
+ }
}
void mptcp_check_and_set_pending(struct sock *sk)
@@ -2549,6 +2549,7 @@ static void __mptcp_retrans(struct sock *sk)
}
if (copied) {
dfrag->already_sent = max(dfrag->already_sent, info.sent);
+ msk->bytes_retrans += copied;
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
WRITE_ONCE(msk->allow_infinite_fallback, false);
@@ -2607,6 +2608,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
+ inet_sk_state_store(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@@ -2640,10 +2642,9 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk)) {
- inet_sk_state_store(sk, TCP_CLOSE);
+ if (mptcp_should_close(sk))
mptcp_do_fastclose(sk);
- }
+
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
goto unlock;
@@ -2682,6 +2683,7 @@ static int __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
+ msk->subflow_id = 1;
mptcp_pm_data_init(msk);
@@ -2878,7 +2880,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
- inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
@@ -2934,7 +2935,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
- inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
timeout = 0;
} else if (mptcp_close_state(sk)) {
@@ -3069,6 +3069,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
+ msk->bytes_acked = 0;
+ msk->bytes_received = 0;
+ msk->bytes_sent = 0;
+ msk->bytes_retrans = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3119,6 +3123,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+ /* passive msk is created after the first/MPC subflow */
+ msk->subflow_id = 2;
+
sock_reset_flag(nsk, SOCK_RCU_FREE);
security_inet_csk_clone(nsk, req);
@@ -3538,11 +3545,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
return (int)delta;
}
-static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
{
struct mptcp_sock *msk = mptcp_sk(sk);
bool slow;
- int answ;
switch (cmd) {
case SIOCINQ:
@@ -3551,24 +3557,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
lock_sock(sk);
__mptcp_move_skbs(msk);
- answ = mptcp_inq_hint(sk);
+ *karg = mptcp_inq_hint(sk);
release_sock(sk);
break;
case SIOCOUTQ:
slow = lock_sock_fast(sk);
- answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
+ *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
unlock_sock_fast(sk, slow);
break;
case SIOCOUTQNSD:
slow = lock_sock_fast(sk);
- answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
+ *karg = mptcp_ioctl_outq(msk, msk->snd_nxt);
unlock_sock_fast(sk, slow);
break;
default:
return -ENOIOCTLCMD;
}
- return put_user(answ, (int __user *)arg);
+ return 0;
}
static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
@@ -3726,6 +3732,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct socket *ssock;
+ struct sock *newsk;
int err;
pr_debug("msk=%p", msk);
@@ -3737,17 +3744,20 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssock)
return -EINVAL;
- err = ssock->ops->accept(sock, newsock, flags, kern);
- if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsock->sk);
+ newsk = mptcp_accept(sock->sk, flags, &err, kern);
+ if (!newsk)
+ return err;
+
+ lock_sock(newsk);
+
+ __inet_accept(sock, newsock, newsk);
+ if (!mptcp_is_tcpsk(newsock->sk)) {
+ struct mptcp_sock *msk = mptcp_sk(newsk);
struct mptcp_subflow_context *subflow;
- struct sock *newsk = newsock->sk;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
msk->in_accept_queue = 0;
- lock_sock(newsk);
-
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
@@ -3768,11 +3778,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (unlikely(list_empty(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
-
- release_sock(newsk);
}
+ release_sock(newsk);
- return err;
+ return 0;
}
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
@@ -3857,7 +3866,6 @@ static const struct proto_ops mptcp_stream_ops = {
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
- .sendpage = inet_sendpage,
};
static struct inet_protosw mptcp_protosw = {
@@ -3952,7 +3960,6 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.sendmsg = inet6_sendmsg,
.recvmsg = inet6_recvmsg,
.mmap = sock_no_mmap,
- .sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
#endif
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3783a7056e1..37fbe22e2433 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -261,10 +261,13 @@ struct mptcp_sock {
u64 local_key;
u64 remote_key;
u64 write_seq;
+ u64 bytes_sent;
u64 snd_nxt;
+ u64 bytes_received;
u64 ack_seq;
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
+ u64 bytes_retrans;
int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
@@ -273,6 +276,7 @@ struct mptcp_sock {
* recovery related fields are under data_lock
* protection
*/
+ u64 bytes_acked;
u64 snd_una;
u64 wnd_end;
unsigned long timer_ival;
@@ -318,7 +322,8 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
- u32 setsockopt_seq;
+ u32 subflow_id;
+ u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
struct mptcp_sock *dl_next;
};
@@ -498,6 +503,8 @@ struct mptcp_subflow_context {
u8 reset_reason:4;
u8 stale_count;
+ u32 subflow_id;
+
long delegated_status;
unsigned long fail_tout;
@@ -636,6 +643,7 @@ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
+void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
@@ -806,7 +814,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *rem,
u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- const struct mptcp_pm_addr_entry *entry);
+ const struct mptcp_addr_info *addr);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
@@ -818,9 +826,15 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
+int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
+ u8 *flags, int *ifindex);
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
+int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup);
+int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup);
int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
struct mptcp_pm_addr_entry *loc,
struct mptcp_pm_addr_entry *rem, u8 bkup);
@@ -913,13 +927,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
-int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4258869ac48..63f7a09335c5 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -14,7 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
-#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_FULL_INFO_OPTLEN_SIZE 40
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
@@ -355,6 +356,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BROADCAST:
case SO_BSDCOMPAT:
case SO_PASSCRED:
+ case SO_PASSPIDFD:
case SO_PASSSEC:
case SO_RXQ_OVFL:
case SO_WIFI_STATUS:
@@ -888,7 +890,9 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
+ struct sock *sk = (struct sock *)msk;
u32 flags = 0;
+ bool slow;
memset(info, 0, sizeof(*info));
@@ -897,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return;
+
/* The following limits only make sense for the in-kernel PM */
if (mptcp_pm_is_kernel(msk)) {
info->mptcpi_subflows_max =
@@ -914,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
info->mptcpi_flags = flags;
- info->mptcpi_token = READ_ONCE(msk->token);
- info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
- info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
- info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
- info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
+ mptcp_data_lock(sk);
+ info->mptcpi_snd_una = msk->snd_una;
+ info->mptcpi_rcv_nxt = msk->ack_seq;
+ info->mptcpi_bytes_acked = msk->bytes_acked;
+ mptcp_data_unlock(sk);
+
+ slow = lock_sock_fast(sk);
+ info->mptcpi_csum_enabled = msk->csum_enabled;
+ info->mptcpi_token = msk->token;
+ info->mptcpi_write_seq = msk->write_seq;
+ info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
+ info->mptcpi_bytes_sent = msk->bytes_sent;
+ info->mptcpi_bytes_received = msk->bytes_received;
+ info->mptcpi_bytes_retrans = msk->bytes_retrans;
+ unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -965,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
}
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
- char __user *optval, int __user *optlen)
+ char __user *optval,
+ int __user *optlen)
{
int len, copylen;
@@ -1146,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
+static int mptcp_get_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
+ MIN_FULL_INFO_OPTLEN_SIZE);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len < MIN_FULL_INFO_OPTLEN_SIZE)
+ return -EINVAL;
+
+ memset(mfi, 0, sizeof(*mfi));
+ if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
+ return -EFAULT;
+
+ if (mfi->size_tcpinfo_kernel ||
+ mfi->size_sfinfo_kernel ||
+ mfi->num_subflows)
+ return -EINVAL;
+
+ if (mfi->size_sfinfo_user > INT_MAX ||
+ mfi->size_tcpinfo_user > INT_MAX)
+ return -EINVAL;
+
+ return len - MIN_FULL_INFO_OPTLEN_SIZE;
+}
+
+static int mptcp_put_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ u32 copylen,
+ int __user *optlen)
+{
+ copylen += MIN_FULL_INFO_OPTLEN_SIZE;
+ if (put_user(copylen, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, mfi, copylen))
+ return -EFAULT;
+ return 0;
+}
+
+static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen)
+{
+ unsigned int sfcount = 0, copylen = 0;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ void __user *tcpinfoptr, *sfinfoptr;
+ struct mptcp_full_info mfi;
+ int len;
+
+ len = mptcp_get_full_info(&mfi, optval, optlen);
+ if (len < 0)
+ return len;
+
+ /* don't bother filling the mptcp info if there is not enough
+ * user-space-provided storage
+ */
+ if (len > 0) {
+ mptcp_diag_fill_info(msk, &mfi.mptcp_info);
+ copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
+ }
+
+ mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
+ mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
+ sizeof(struct tcp_info));
+ sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
+ mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
+ mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
+ sizeof(struct mptcp_subflow_info));
+ tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct mptcp_subflow_info sfinfo;
+ struct tcp_info tcp_info;
+
+ if (sfcount++ >= mfi.size_arrays_user)
+ continue;
+
+ /* fetch addr/tcp_info only if the user space buffers
+ * are wide enough
+ */
+ memset(&sfinfo, 0, sizeof(sfinfo));
+ sfinfo.id = subflow->subflow_id;
+ if (mfi.size_sfinfo_user >
+ offsetof(struct mptcp_subflow_info, addrs))
+ mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
+ if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
+ goto fail_release;
+
+ if (mfi.size_tcpinfo_user) {
+ tcp_get_info(ssk, &tcp_info);
+ if (copy_to_user(tcpinfoptr, &tcp_info,
+ mfi.size_tcpinfo_user))
+ goto fail_release;
+ }
+
+ tcpinfoptr += mfi.size_tcpinfo_user;
+ sfinfoptr += mfi.size_sfinfo_user;
+ }
+ release_sock(sk);
+
+ mfi.num_subflows = sfcount;
+ if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
+ return -EFAULT;
+
+ return 0;
+
+fail_release:
+ release_sock(sk);
+ return -EFAULT;
+}
+
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
int __user *optlen, int val)
{
@@ -1219,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
switch (optname) {
case MPTCP_INFO:
return mptcp_getsockopt_info(msk, optval, optlen);
+ case MPTCP_FULL_INFO:
+ return mptcp_getsockopt_full_info(msk, optval, optlen);
case MPTCP_TCPINFO:
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
case MPTCP_SUBFLOW_ADDRS:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8ff5c9fab55b..9ee3b7abbaf6 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -819,6 +819,7 @@ create_child:
if (!ctx->conn)
goto fallback;
+ ctx->subflow_id = 1;
owner = mptcp_sk(ctx->conn);
mptcp_pm_new_connection(owner, child, 1);
@@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);