summaryrefslogtreecommitdiffstats
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r--net/mptcp/subflow.c403
1 files changed, 357 insertions, 46 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 65122edf60aa..b5180c81588e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <crypto/algapi.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -19,17 +20,42 @@
#endif
#include <net/mptcp.h>
#include "protocol.h"
+#include "mib.h"
+
+static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
+ enum linux_mptcp_mib_field field)
+{
+ MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
+}
static int subflow_rebuild_header(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- int err = 0;
+ int local_id, err = 0;
if (subflow->request_mptcp && !subflow->token) {
pr_debug("subflow=%p", sk);
err = mptcp_token_new_connect(sk);
+ } else if (subflow->request_join && !subflow->local_nonce) {
+ struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
+
+ pr_debug("subflow=%p", sk);
+
+ do {
+ get_random_bytes(&subflow->local_nonce, sizeof(u32));
+ } while (!subflow->local_nonce);
+
+ if (subflow->local_id)
+ goto out;
+
+ local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
+ if (local_id < 0)
+ return -EINVAL;
+
+ subflow->local_id = local_id;
}
+out:
if (err)
return err;
@@ -47,6 +73,51 @@ static void subflow_req_destructor(struct request_sock *req)
tcp_request_sock_ops.destructor(req);
}
+static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
+ void *hmac)
+{
+ u8 msg[8];
+
+ put_unaligned_be32(nonce1, &msg[0]);
+ put_unaligned_be32(nonce2, &msg[4]);
+
+ mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
+}
+
+/* validate received token and create truncated hmac and nonce for SYN-ACK */
+static bool subflow_token_join_request(struct request_sock *req,
+ const struct sk_buff *skb)
+{
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ u8 hmac[MPTCPOPT_HMAC_LEN];
+ struct mptcp_sock *msk;
+ int local_id;
+
+ msk = mptcp_token_get_sock(subflow_req->token);
+ if (!msk) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
+ return false;
+ }
+
+ local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
+ if (local_id < 0) {
+ sock_put((struct sock *)msk);
+ return false;
+ }
+ subflow_req->local_id = local_id;
+
+ get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
+
+ subflow_generate_hmac(msk->local_key, msk->remote_key,
+ subflow_req->local_nonce,
+ subflow_req->remote_nonce, hmac);
+
+ subflow_req->thmac = get_unaligned_be64(hmac);
+
+ sock_put((struct sock *)msk);
+ return true;
+}
+
static void subflow_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -61,6 +132,7 @@ static void subflow_init_req(struct request_sock *req,
mptcp_get_options(skb, &rx_opt);
subflow_req->mp_capable = 0;
+ subflow_req->mp_join = 0;
subflow_req->remote_key_valid = 0;
#ifdef CONFIG_TCP_MD5SIG
@@ -71,6 +143,15 @@ static void subflow_init_req(struct request_sock *req,
return;
#endif
+ if (rx_opt.mptcp.mp_capable) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
+
+ if (rx_opt.mptcp.mp_join)
+ return;
+ } else if (rx_opt.mptcp.mp_join) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
+ }
+
if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
int err;
@@ -79,6 +160,19 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
+ } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
+ subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
+ subflow_req->mp_join = 1;
+ subflow_req->backup = rx_opt.mptcp.backup;
+ subflow_req->remote_id = rx_opt.mptcp.join_id;
+ subflow_req->token = rx_opt.mptcp.token;
+ subflow_req->remote_nonce = rx_opt.mptcp.nonce;
+ pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
+ subflow_req->remote_nonce);
+ if (!subflow_token_join_request(req, skb)) {
+ subflow_req->mp_join = 0;
+ // @@ need to trigger RST
+ }
}
}
@@ -106,13 +200,41 @@ static void subflow_v6_init_req(struct request_sock *req,
}
#endif
+/* validate received truncated hmac and create hmac for third ACK */
+static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
+{
+ u8 hmac[MPTCPOPT_HMAC_LEN];
+ u64 thmac;
+
+ subflow_generate_hmac(subflow->remote_key, subflow->local_key,
+ subflow->remote_nonce, subflow->local_nonce,
+ hmac);
+
+ thmac = get_unaligned_be64(hmac);
+ pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
+ subflow, subflow->token,
+ (unsigned long long)thmac,
+ (unsigned long long)subflow->thmac);
+
+ return thmac == subflow->thmac;
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct sock *parent = subflow->conn;
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
- if (subflow->conn && !subflow->conn_finished) {
+ if (inet_sk_state_load(parent) != TCP_ESTABLISHED) {
+ inet_sk_state_store(parent, TCP_ESTABLISHED);
+ parent->sk_state_change(parent);
+ }
+
+ if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp)
+ return;
+
+ if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
@@ -122,6 +244,33 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
}
+ } else if (subflow->mp_join) {
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
+ subflow, subflow->thmac,
+ subflow->remote_nonce);
+ if (!subflow_thmac_valid(subflow)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
+ subflow->mp_join = 0;
+ goto do_reset;
+ }
+
+ subflow_generate_hmac(subflow->local_key, subflow->remote_key,
+ subflow->local_nonce,
+ subflow->remote_nonce,
+ subflow->hmac);
+
+ if (skb)
+ subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
+
+ if (!mptcp_finish_join(sk))
+ goto do_reset;
+
+ subflow->conn_finished = 1;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
+ } else {
+do_reset:
+ tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_done(sk);
}
}
@@ -172,6 +321,32 @@ drop:
}
#endif
+/* validate hmac received in third ACK */
+static bool subflow_hmac_valid(const struct request_sock *req,
+ const struct tcp_options_received *rx_opt)
+{
+ const struct mptcp_subflow_request_sock *subflow_req;
+ u8 hmac[MPTCPOPT_HMAC_LEN];
+ struct mptcp_sock *msk;
+ bool ret;
+
+ subflow_req = mptcp_subflow_rsk(req);
+ msk = mptcp_token_get_sock(subflow_req->token);
+ if (!msk)
+ return false;
+
+ subflow_generate_hmac(msk->remote_key, msk->local_key,
+ subflow_req->remote_nonce,
+ subflow_req->local_nonce, hmac);
+
+ ret = true;
+ if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac)))
+ ret = false;
+
+ sock_put((struct sock *)msk);
+ return ret;
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -182,6 +357,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
struct tcp_options_received opt_rx;
+ bool fallback_is_fatal = false;
+ struct sock *new_msk = NULL;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -197,7 +374,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* out-of-order pkt, which will not carry the MP_CAPABLE
* opt even on mptcp enabled paths
*/
- goto create_child;
+ goto create_msk;
}
opt_rx.mptcp.mp_capable = 0;
@@ -207,6 +384,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
subflow_req->remote_key_valid = 1;
} else {
subflow_req->mp_capable = 0;
+ goto create_child;
+ }
+
+create_msk:
+ new_msk = mptcp_sk_clone(listener->conn, req);
+ if (!new_msk)
+ subflow_req->mp_capable = 0;
+ } else if (subflow_req->mp_join) {
+ fallback_is_fatal = true;
+ opt_rx.mptcp.mp_join = 0;
+ mptcp_get_options(skb, &opt_rx);
+ if (!opt_rx.mptcp.mp_join ||
+ !subflow_hmac_valid(req, &opt_rx)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+ return NULL;
}
}
@@ -217,22 +409,45 @@ create_child:
if (child && *own_req) {
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
- /* we have null ctx on TCP fallback, not fatal on MPC
- * handshake
+ /* we have null ctx on TCP fallback, which is fatal on
+ * MPJ handshake
*/
- if (!ctx)
- return child;
+ if (!ctx) {
+ if (fallback_is_fatal)
+ goto close_child;
+ goto out;
+ }
if (ctx->mp_capable) {
- if (mptcp_token_new_accept(ctx->token))
+ /* new mpc subflow takes ownership of the newly
+ * created mptcp socket
+ */
+ inet_sk_state_store(new_msk, TCP_ESTABLISHED);
+ mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
+ ctx->conn = new_msk;
+ new_msk = NULL;
+ } else if (ctx->mp_join) {
+ struct mptcp_sock *owner;
+
+ owner = mptcp_token_get_sock(ctx->token);
+ if (!owner)
goto close_child;
+
+ ctx->conn = (struct sock *)owner;
+ if (!mptcp_finish_join(child))
+ goto close_child;
+
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
}
}
+out:
+ /* dispose of the left over mptcp master, if any */
+ if (unlikely(new_msk))
+ sock_put(new_msk);
return child;
close_child:
- pr_debug("closing child socket");
tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_forced_close(child);
tcp_done(child);
@@ -338,6 +553,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
data_len = mpext->data_len;
if (data_len == 0) {
pr_err("Infinite mapping not handled");
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
return MAPPING_INVALID;
}
@@ -381,8 +597,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
/* If this skb data are fully covered by the current mapping,
* the new map would need caching, which is not supported
*/
- if (skb_is_fully_mapped(ssk, skb))
+ if (skb_is_fully_mapped(ssk, skb)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
return MAPPING_INVALID;
+ }
/* will validate the next map after consuming the current one */
return MAPPING_OK;
@@ -408,6 +626,18 @@ validate_seq:
return MAPPING_OK;
}
+static int subflow_read_actor(read_descriptor_t *desc,
+ struct sk_buff *skb,
+ unsigned int offset, size_t len)
+{
+ size_t copy_len = min(desc->count, len);
+
+ desc->count -= copy_len;
+
+ pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
+ return copy_len;
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -420,9 +650,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
if (subflow->data_avail)
return true;
- if (!subflow->conn)
- return false;
-
msk = mptcp_sk(subflow->conn);
for (;;) {
u32 map_remaining;
@@ -482,16 +709,12 @@ static bool subflow_check_data_avail(struct sock *ssk)
pr_debug("discarding %zu bytes, current map len=%d", delta,
map_remaining);
if (delta) {
- struct mptcp_read_arg arg = {
- .msg = NULL,
- };
read_descriptor_t desc = {
.count = delta,
- .arg.data = &arg,
};
int ret;
- ret = tcp_read_sock(ssk, &desc, mptcp_read_actor);
+ ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
if (ret < 0) {
ssk->sk_err = -ret;
goto fatal;
@@ -546,19 +769,15 @@ static void subflow_data_ready(struct sock *sk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
- if (!parent || !subflow->mp_capable) {
+ if (!subflow->mp_capable && !subflow->mp_join) {
subflow->tcp_data_ready(sk);
- if (parent)
- parent->sk_data_ready(parent);
+ parent->sk_data_ready(parent);
return;
}
- if (mptcp_subflow_data_available(sk)) {
- set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
-
- parent->sk_data_ready(parent);
- }
+ if (mptcp_subflow_data_available(sk))
+ mptcp_data_ready(parent, sk);
}
static void subflow_write_space(struct sock *sk)
@@ -567,7 +786,7 @@ static void subflow_write_space(struct sock *sk)
struct sock *parent = subflow->conn;
sk_stream_write_space(sk);
- if (parent && sk_stream_is_writeable(sk)) {
+ if (sk_stream_is_writeable(sk)) {
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
smp_mb__after_atomic();
/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
@@ -605,6 +824,85 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
}
#endif
+static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
+ struct sockaddr_storage *addr)
+{
+ memset(addr, 0, sizeof(*addr));
+ addr->ss_family = info->family;
+ if (addr->ss_family == AF_INET) {
+ struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
+
+ in_addr->sin_addr = info->addr;
+ in_addr->sin_port = info->port;
+ }
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (addr->ss_family == AF_INET6) {
+ struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
+
+ in6_addr->sin6_addr = info->addr6;
+ in6_addr->sin6_port = info->port;
+ }
+#endif
+}
+
+int __mptcp_subflow_connect(struct sock *sk, int ifindex,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *remote)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *subflow;
+ struct sockaddr_storage addr;
+ struct socket *sf;
+ u32 remote_token;
+ int addrlen;
+ int err;
+
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTCONN;
+
+ err = mptcp_subflow_create_socket(sk, &sf);
+ if (err)
+ return err;
+
+ subflow = mptcp_subflow_ctx(sf->sk);
+ subflow->remote_key = msk->remote_key;
+ subflow->local_key = msk->local_key;
+ subflow->token = msk->token;
+ mptcp_info2sockaddr(loc, &addr);
+
+ addrlen = sizeof(struct sockaddr_in);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (loc->family == AF_INET6)
+ addrlen = sizeof(struct sockaddr_in6);
+#endif
+ sf->sk->sk_bound_dev_if = ifindex;
+ err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
+ if (err)
+ goto failed;
+
+ mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
+ pr_debug("msk=%p remote_token=%u", msk, remote_token);
+ subflow->remote_token = remote_token;
+ subflow->local_id = loc->id;
+ subflow->request_join = 1;
+ subflow->request_bkup = 1;
+ mptcp_info2sockaddr(remote, &addr);
+
+ err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
+ if (err && err != -EINPROGRESS)
+ goto failed;
+
+ spin_lock_bh(&msk->join_list_lock);
+ list_add_tail(&subflow->node, &msk->join_list);
+ spin_unlock_bh(&msk->join_list_lock);
+
+ return err;
+
+failed:
+ sock_release(sf);
+ return err;
+}
+
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
@@ -682,7 +980,7 @@ static bool subflow_is_done(const struct sock *sk)
static void subflow_state_change(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sock *parent = READ_ONCE(subflow->conn);
+ struct sock *parent = subflow->conn;
__subflow_state_change(sk);
@@ -690,13 +988,10 @@ static void subflow_state_change(struct sock *sk)
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
*/
- if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) {
- set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags);
-
- parent->sk_data_ready(parent);
- }
+ if (subflow->mp_capable && mptcp_subflow_data_available(sk))
+ mptcp_data_ready(parent, sk);
- if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
+ if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
parent->sk_shutdown |= RCV_SHUTDOWN;
@@ -772,7 +1067,8 @@ static void subflow_ulp_clone(const struct request_sock *req,
struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
struct mptcp_subflow_context *new_ctx;
- if (!tcp_rsk(req)->is_mptcp || !subflow_req->mp_capable) {
+ if (!tcp_rsk(req)->is_mptcp ||
+ (!subflow_req->mp_capable && !subflow_req->mp_join)) {
subflow_ulp_fallback(newsk, old_ctx);
return;
}
@@ -783,22 +1079,35 @@ static void subflow_ulp_clone(const struct request_sock *req,
return;
}
- /* see comments in subflow_syn_recv_sock(), MPTCP connection is fully
- * established only after we receive the remote key
- */
new_ctx->conn_finished = 1;
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
- new_ctx->mp_capable = 1;
- new_ctx->fourth_ack = subflow_req->remote_key_valid;
- new_ctx->can_ack = subflow_req->remote_key_valid;
- new_ctx->remote_key = subflow_req->remote_key;
- new_ctx->local_key = subflow_req->local_key;
- new_ctx->token = subflow_req->token;
- new_ctx->ssn_offset = subflow_req->ssn_offset;
- new_ctx->idsn = subflow_req->idsn;
+ new_ctx->rel_write_seq = 1;
+ new_ctx->tcp_sock = newsk;
+
+ if (subflow_req->mp_capable) {
+ /* see comments in subflow_syn_recv_sock(), MPTCP connection
+ * is fully established only after we receive the remote key
+ */
+ new_ctx->mp_capable = 1;
+ new_ctx->fully_established = subflow_req->remote_key_valid;
+ new_ctx->can_ack = subflow_req->remote_key_valid;
+ new_ctx->remote_key = subflow_req->remote_key;
+ new_ctx->local_key = subflow_req->local_key;
+ new_ctx->token = subflow_req->token;
+ new_ctx->ssn_offset = subflow_req->ssn_offset;
+ new_ctx->idsn = subflow_req->idsn;
+ } else if (subflow_req->mp_join) {
+ new_ctx->ssn_offset = subflow_req->ssn_offset;
+ new_ctx->mp_join = 1;
+ new_ctx->fully_established = 1;
+ new_ctx->backup = subflow_req->backup;
+ new_ctx->local_id = subflow_req->local_id;
+ new_ctx->token = subflow_req->token;
+ new_ctx->thmac = subflow_req->thmac;
+ }
}
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
@@ -860,6 +1169,8 @@ void mptcp_subflow_init(void)
subflow_v6m_specific.net_frag_header_len = 0;
#endif
+ mptcp_diag_subflow_init(&subflow_ulp_ops);
+
if (tcp_register_ulp(&subflow_ulp_ops) != 0)
panic("MPTCP: failed to register subflows to ULP\n");
}