Commit 08a45c59 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-part-two'

Christoph Paasch says:

====================
Multipath TCP part 2: Single subflow & RFC8684 support

v2 -> v3: Added RFC8684-style handshake (see below fore more details) and some minor fixes
v1 -> v2: Rebased on latest "Multipath TCP: Prerequisites" v3 series

This set adds MPTCP connection establishment, writing & reading MPTCP
options on data packets, a sysctl to allow MPTCP per-namespace, and self
tests. This is sufficient to establish and maintain a connection with a
MPTCP peer, but will not yet allow or initiate establishment of
additional MPTCP subflows.

We also add the necessary code for the RFC8684-style handshake.
RFC8684 obsoletes the experimental RFC6824 and makes MPTCP move-on to
version 1.

Originally our plan was to submit single-subflow and RFC8684 support in
two patchsets, but to simplify the merging-process and ensure that a coherent
MPTCP-version lands in Linux we decided to merge the two sets into a single
one.

The MPTCP patchset exclusively supports RFC 8684. Although all MPTCP
deployments are currently based on RFC 6824, future deployments will be
migrating to MPTCP version 1. 3GPP's 5G standardization also solely supports
RFC 8684. In addition, we believe that this initial submission of MPTCP will be
cleaner by solely supporting RFC 8684. If later on support for the old
MPTCP-version is required it can always be added in the future.

The major difference between RFC 8684 and RFC 6824 is that it has a better
support for servers using TCP SYN-cookies by reliably retransmitting the
MP_CAPABLE option.

Before ending this cover letter with some refs, it is worth mentioning
that we promise David Miller that merging this series will be rewarded by
Twitter dopamine hits :-D

Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v3-part2)

Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v3-part2

Thank you for your review. You can find us at mptcp@lists.01.org and
https://is.gd/mptcp_upstream
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 23f4eacd 8ab183de
......@@ -11583,6 +11583,8 @@ W: https://github.com/multipath-tcp/mptcp_net-next/wiki
B: https://github.com/multipath-tcp/mptcp_net-next/issues
S: Maintained
F: include/net/mptcp.h
F: net/mptcp/
F: tools/testing/selftests/net/mptcp/
NETWORKING [TCP]
M: Eric Dumazet <edumazet@google.com>
......
......@@ -78,6 +78,27 @@ struct tcp_sack_block {
#define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */
#define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/
#if IS_ENABLED(CONFIG_MPTCP)
struct mptcp_options_received {
u64 sndr_key;
u64 rcvr_key;
u64 data_ack;
u64 data_seq;
u32 subflow_seq;
u16 data_len;
u8 mp_capable : 1,
mp_join : 1,
dss : 1;
u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
__unused:2;
};
#endif
struct tcp_options_received {
/* PAWS/RTTM data */
int ts_recent_stamp;/* Time we stored ts_recent (for aging) */
......@@ -95,6 +116,9 @@ struct tcp_options_received {
u8 num_sacks; /* Number of SACK blocks */
u16 user_mss; /* mss requested by user in ioctl */
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
#if IS_ENABLED(CONFIG_MPTCP)
struct mptcp_options_received mptcp;
#endif
};
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
......@@ -104,6 +128,11 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
#if IS_ENABLED(CONFIG_SMC)
rx_opt->smc_ok = 0;
#endif
#if IS_ENABLED(CONFIG_MPTCP)
rx_opt->mptcp.mp_capable = 0;
rx_opt->mptcp.mp_join = 0;
rx_opt->mptcp.dss = 0;
#endif
}
/* This is the max number of SACKS that we'll generate and process. It's safe
......@@ -119,6 +148,9 @@ struct tcp_request_sock {
const struct tcp_request_sock_ops *af_specific;
u64 snt_synack; /* first SYNACK sent time */
bool tfo_listener;
#if IS_ENABLED(CONFIG_MPTCP)
bool is_mptcp;
#endif
u32 txhash;
u32 rcv_isn;
u32 snt_isn;
......@@ -379,6 +411,9 @@ struct tcp_sock {
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user.
*/
#if IS_ENABLED(CONFIG_MPTCP)
bool is_mptcp;
#endif
#ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */
......
......@@ -9,6 +9,7 @@
#define __NET_MPTCP_H
#include <linux/skbuff.h>
#include <linux/tcp.h>
#include <linux/types.h>
/* MPTCP sk_buff extension data */
......@@ -22,12 +23,49 @@ struct mptcp_ext {
data_fin:1,
use_ack:1,
ack64:1,
__unused:3;
mpc_map:1,
__unused:2;
/* one byte hole */
};
struct mptcp_out_options {
#if IS_ENABLED(CONFIG_MPTCP)
u16 suboptions;
u64 sndr_key;
u64 rcvr_key;
struct mptcp_ext ext_copy;
#endif
};
#ifdef CONFIG_MPTCP
void mptcp_init(void);
static inline bool sk_is_mptcp(const struct sock *sk)
{
return tcp_sk(sk)->is_mptcp;
}
static inline bool rsk_is_mptcp(const struct request_sock *req)
{
return tcp_rsk(req)->is_mptcp;
}
void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
int opsize, struct tcp_options_received *opt_rx);
bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
unsigned int *size, struct mptcp_out_options *opts);
void mptcp_rcv_synsent(struct sock *sk);
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
struct mptcp_out_options *opts);
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts);
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct tcp_options_received *opt_rx);
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
/* move the skb extension owership, with the assumption that 'to' is
* newly allocated
*/
......@@ -70,6 +108,59 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
#else
static inline void mptcp_init(void)
{
}
static inline bool sk_is_mptcp(const struct sock *sk)
{
return false;
}
static inline bool rsk_is_mptcp(const struct request_sock *req)
{
return false;
}
static inline void mptcp_parse_option(const struct sk_buff *skb,
const unsigned char *ptr, int opsize,
struct tcp_options_received *opt_rx)
{
}
static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
unsigned int *size,
struct mptcp_out_options *opts)
{
return false;
}
static inline void mptcp_rcv_synsent(struct sock *sk)
{
}
static inline bool mptcp_synack_options(const struct request_sock *req,
unsigned int *size,
struct mptcp_out_options *opts)
{
return false;
}
static inline bool mptcp_established_options(struct sock *sk,
struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
return false;
}
static inline void mptcp_incoming_options(struct sock *sk,
struct sk_buff *skb,
struct tcp_options_received *opt_rx)
{
}
static inline void mptcp_skb_ext_move(struct sk_buff *to,
const struct sk_buff *from)
{
......@@ -82,4 +173,16 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
}
#endif /* CONFIG_MPTCP */
void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int mptcpv6_init(void);
#elif IS_ENABLED(CONFIG_IPV6)
static inline int mptcpv6_init(void)
{
return 0;
}
#endif
#endif /* __NET_MPTCP_H */
......@@ -91,6 +91,7 @@ if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
source "net/netlabel/Kconfig"
source "net/mptcp/Kconfig"
endif # if INET
......
......@@ -87,3 +87,4 @@ endif
obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
......@@ -271,6 +271,7 @@
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
......@@ -4021,4 +4022,5 @@ void __init tcp_init(void)
tcp_metrics_init();
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
tcp_tasklet_init();
mptcp_init();
}
......@@ -79,6 +79,7 @@
#include <trace/events/tcp.h>
#include <linux/jump_label_ratelimit.h>
#include <net/busy_poll.h>
#include <net/mptcp.h>
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
......@@ -3924,6 +3925,10 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
case TCPOPT_MPTCP:
mptcp_parse_option(skb, ptr, opsize, opt_rx);
break;
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
......@@ -4765,6 +4770,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen;
int eaten;
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb, &tp->rx_opt);
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
return;
......@@ -5973,6 +5981,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
if (sk_is_mptcp(sk))
mptcp_rcv_synsent(sk);
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
......@@ -6338,8 +6349,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb, &tp->rx_opt);
break;
}
/* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
......@@ -6595,6 +6609,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
#if IS_ENABLED(CONFIG_MPTCP)
tcp_rsk(req)->is_mptcp = 0;
#endif
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
......
......@@ -38,6 +38,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <net/tcp.h>
#include <net/mptcp.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
......@@ -414,6 +415,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
#define OPTION_SMC (1 << 9)
#define OPTION_MPTCP (1 << 10)
static void smc_options_write(__be32 *ptr, u16 *options)
{
......@@ -439,8 +441,17 @@ struct tcp_out_options {
__u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
struct mptcp_out_options mptcp;
};
static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options))
mptcp_write_options(ptr, &opts->mptcp);
#endif
}
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
......@@ -549,6 +560,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
}
smc_options_write(ptr, &options);
mptcp_options_write(ptr, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
......@@ -584,6 +597,22 @@ static void smc_set_option_cond(const struct tcp_sock *tp,
#endif
}
static void mptcp_set_option_cond(const struct request_sock *req,
struct tcp_out_options *opts,
unsigned int *remaining)
{
if (rsk_is_mptcp(req)) {
unsigned int size;
if (mptcp_synack_options(req, &size, &opts->mptcp)) {
if (*remaining >= size) {
opts->options |= OPTION_MPTCP;
*remaining -= size;
}
}
}
}
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
......@@ -653,6 +682,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
smc_set_option(tp, opts, &remaining);
if (sk_is_mptcp(sk)) {
unsigned int size;
if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
opts->options |= OPTION_MPTCP;
remaining -= size;
}
}
return MAX_TCP_OPTION_SPACE - remaining;
}
......@@ -714,6 +752,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
}
mptcp_set_option_cond(req, opts, &remaining);
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
return MAX_TCP_OPTION_SPACE - remaining;
......@@ -751,6 +791,23 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
/* MPTCP options have precedence over SACK for the limited TCP
* option space because a MPTCP connection would be forced to
* fall back to regular TCP if a required multipath option is
* missing. SACK still gets a chance to use whatever space is
* left.
*/
if (sk_is_mptcp(sk)) {
unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
unsigned int opt_size = 0;
if (mptcp_established_options(sk, skb, &opt_size, remaining,
&opts->mptcp)) {
opts->options |= OPTION_MPTCP;
size += opt_size;
}
}
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
......
......@@ -238,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
icsk->icsk_af_ops = &ipv6_mapped;
if (sk_is_mptcp(sk))
mptcp_handle_ipv6_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
......@@ -248,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
if (sk_is_mptcp(sk))
mptcp_handle_ipv6_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_specific;
......@@ -1203,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->saddr = newsk->sk_v6_rcv_saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
if (sk_is_mptcp(newsk))
mptcp_handle_ipv6_mapped(newsk, true);
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
......@@ -2163,9 +2169,16 @@ int __init tcpv6_init(void)
ret = register_pernet_subsys(&tcpv6_net_ops);
if (ret)
goto out_tcpv6_protosw;
ret = mptcpv6_init();
if (ret)
goto out_tcpv6_pernet_subsys;
out:
return ret;
out_tcpv6_pernet_subsys:
unregister_pernet_subsys(&tcpv6_net_ops);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
......
config MPTCP
bool "MPTCP: Multipath TCP"
depends on INET
select SKB_EXTENSIONS
select CRYPTO_LIB_SHA256
help
Multipath TCP (MPTCP) connections send and receive data over multiple
subflows in order to utilize multiple network paths. Each subflow
uses the TCP protocol, and TCP options carry header information for
MPTCP.
config MPTCP_IPV6
bool "MPTCP: IPv6 support for Multipath TCP"
depends on MPTCP
select IPV6
default y
config MPTCP_HMAC_TEST
bool "Tests for MPTCP HMAC implementation"
default n
help
This option enable boot time self-test for the HMAC implementation
used by the MPTCP code
Say N if you are unsure.
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP cryptographic functions
* Copyright (c) 2017 - 2019, Intel Corporation.
*
* Note: This code is based on mptcp_ctrl.c, mptcp_ipv4.c, and
* mptcp_ipv6 from multipath-tcp.org, authored by:
*
* Sébastien Barré <sebastien.barre@uclouvain.be>
* Christoph Paasch <christoph.paasch@uclouvain.be>
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
* Gregory Detal <gregory.detal@uclouvain.be>
* Fabien Duchêne <fabien.duchene@uclouvain.be>
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
* Lavkesh Lahngir <lavkesh51@gmail.com>
* Andreas Ripke <ripke@neclab.eu>
* Vlad Dogaru <vlad.dogaru@intel.com>
* Octavian Purdila <octavian.purdila@intel.com>
* John Ronan <jronan@tssg.org>
* Catalin Nicutar <catalin.nicutar@gmail.com>
* Brandon Heller <brandonh@stanford.edu>
*/
#include <linux/kernel.h>
#include <crypto/sha.h>
#include <asm/unaligned.h>
#include "protocol.h"
#define SHA256_DIGEST_WORDS (SHA256_DIGEST_SIZE / 4)
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
{
__be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
__be64 input = cpu_to_be64(key);
struct sha256_state state;
sha256_init(&state);
sha256_update(&state, (__force u8 *)&input, sizeof(input));
sha256_final(&state, (u8 *)mptcp_hashed_key);
if (token)
*token = be32_to_cpu(mptcp_hashed_key[0]);
if (idsn)
*idsn = be64_to_cpu(*((__be64 *)&mptcp_hashed_key[6]));
}
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
void *hmac)
{
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
__be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
__be32 *hash_out = (__force __be32 *)hmac;
struct sha256_state state;
u8 key1be[8];
u8 key2be[8];
int i;
put_unaligned_be64(key1, key1be);
put_unaligned_be64(key2, key2be);
/* Generate key xored with ipad */
memset(input, 0x36, SHA_MESSAGE_BYTES);
for (i = 0; i < 8; i++)
input[i] ^= key1be[i];
for (i = 0; i < 8; i++)
input[i + 8] ^= key2be[i];
put_unaligned_be32(nonce1, &input[SHA256_BLOCK_SIZE]);
put_unaligned_be32(nonce2, &input[SHA256_BLOCK_SIZE + 4]);
sha256_init(&state);
sha256_update(&state, input, SHA256_BLOCK_SIZE + 8);
/* emit sha256(K1 || msg) on the second input block, so we can
* reuse 'input' for the last hashing
*/
sha256_final(&state, &input[SHA256_BLOCK_SIZE]);
/* Prepare second part of hmac */
memset(input, 0x5C, SHA_MESSAGE_BYTES);
for (i = 0; i < 8; i++)
input[i] ^= key1be[i];
for (i = 0; i < 8; i++)
input[i + 8] ^= key2be[i];
sha256_init(&state);
sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE);
sha256_final(&state, (u8 *)mptcp_hashed_key);
/* takes only first 160 bits */
for (i = 0; i < 5; i++)
hash_out[i] = mptcp_hashed_key[i];
}
#ifdef CONFIG_MPTCP_HMAC_TEST
struct test_cast {
char *key;
char *msg;
char *result;
};
/* we can't reuse RFC 4231 test vectors, as we have constraint on the
* input and key size, and we truncate the output.
*/
static struct test_cast tests[] = {
{
.key = "0b0b0b0b0b0b0b0b",
.msg = "48692054",
.result = "8385e24fb4235ac37556b6b886db106284a1da67",
},
{
.key = "aaaaaaaaaaaaaaaa",
.msg = "dddddddd",
.result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492",
},
{
.key = "0102030405060708",
.msg = "cdcdcdcd",
.result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6",
},
};
static int __init test_mptcp_crypto(void)
{
char hmac[20], hmac_hex[41];
u32 nonce1, nonce2;
u64 key1, key2;
int i, j;
for (i = 0; i < ARRAY_SIZE(tests); ++i) {
/* mptcp hmap will convert to be before computing the hmac */
key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0]));
key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8]));
nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0]));
nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4]));
mptcp_crypto_hmac_sha(key1, key2, nonce1, nonce2, hmac);
for (j = 0; j < 20; ++j)
sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
hmac_hex[40] = 0;
if (memcmp(hmac_hex, tests[i].result, 40))
pr_err("test %d failed, got %s expected %s", i,
hmac_hex, tests[i].result);
else
pr_info("test %d [ ok ]", i);
}
return 0;
}
late_initcall(test_mptcp_crypto);
#endif
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP
*
* Copyright (c) 2019, Tessares SA.
*/
#include <linux/sysctl.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include "protocol.h"
#define MPTCP_SYSCTL_PATH "net/mptcp"
static int mptcp_pernet_id;
struct mptcp_pernet {
struct ctl_table_header *ctl_table_hdr;
int mptcp_enabled;
};
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
{
return net_generic(net, mptcp_pernet_id);
}
int mptcp_is_enabled(struct net *net)
{
return mptcp_get_pernet(net)->mptcp_enabled;
}
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
.maxlen = sizeof(int),
.mode = 0644,
/* users with CAP_NET_ADMIN or root (not and) can change this
* value, same as other sysctl or the 'net' tree.
*/
.proc_handler = proc_dointvec,
},
{}
};
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
}
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
{
struct ctl_table_header *hdr;
struct ctl_table *table;
table = mptcp_sysctl_table;
if (!net_eq(net, &init_net)) {
table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL);
if (!table)
goto err_alloc;
}
table[0].data = &pernet->mptcp_enabled;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
goto err_reg;
pernet->ctl_table_hdr = hdr;
return 0;
err_reg:
if (!net_eq(net, &init_net))
kfree(table);
err_alloc:
return -ENOMEM;
}
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
{
struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
unregister_net_sysctl_table(pernet->ctl_table_hdr);
kfree(table);
}
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
mptcp_pernet_set_defaults(pernet);
return mptcp_pernet_new_table(net, pernet);
}
/* Note: the callback will only be called per extra netns */
static void __net_exit mptcp_net_exit(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
mptcp_pernet_del_table(pernet);
}
static struct pernet_operations mptcp_pernet_ops = {
.init = mptcp_net_init,
.exit = mptcp_net_exit,
.id = &mptcp_pernet_id,
.size = sizeof(struct mptcp_pernet),
};
void __init mptcp_init(void)
{
mptcp_proto_init();
if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
panic("Failed to register MPTCP pernet subsystem.\n");
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcpv6_init(void)
{
int err;
err = mptcp_proto_v6_init();
return err;
}
#endif
This diff is collapsed.
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/* Multipath TCP
*
* Copyright (c) 2017 - 2019, Intel Corporation.
*/
#ifndef __MPTCP_PROTOCOL_H
#define __MPTCP_PROTOCOL_H
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
#define MPTCP_SUPPORTED_VERSION 1
/* MPTCP option bits */
#define OPTION_MPTCP_MPC_SYN BIT(0)
#define OPTION_MPTCP_MPC_SYNACK BIT(1)
#define OPTION_MPTCP_MPC_ACK BIT(2)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
#define MPTCPOPT_MP_JOIN 1
#define MPTCPOPT_DSS 2
#define MPTCPOPT_ADD_ADDR 3
#define MPTCPOPT_RM_ADDR 4
#define MPTCPOPT_MP_PRIO 5
#define MPTCPOPT_MP_FAIL 6
#define MPTCPOPT_MP_FASTCLOSE 7
/* MPTCP suboption lengths */
#define TCPOLEN_MPTCP_MPC_SYN 4
#define TCPOLEN_MPTCP_MPC_SYNACK 12
#define TCPOLEN_MPTCP_MPC_ACK 20
#define TCPOLEN_MPTCP_MPC_ACK_DATA 22
#define TCPOLEN_MPTCP_DSS_BASE 4
#define TCPOLEN_MPTCP_DSS_ACK32 4
#define TCPOLEN_MPTCP_DSS_ACK64 8
#define TCPOLEN_MPTCP_DSS_MAP32 10
#define TCPOLEN_MPTCP_DSS_MAP64 14
#define TCPOLEN_MPTCP_DSS_CHECKSUM 2
/* MPTCP MP_CAPABLE flags */
#define MPTCP_VERSION_MASK (0x0F)
#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
#define MPTCP_CAP_EXTENSIBILITY BIT(6)
#define MPTCP_CAP_HMAC_SHA256 BIT(0)
#define MPTCP_CAP_FLAG_MASK (0x3F)
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN BIT(4)
#define MPTCP_DSS_DSN64 BIT(3)
#define MPTCP_DSS_HAS_MAP BIT(2)
#define MPTCP_DSS_ACK64 BIT(1)
#define MPTCP_DSS_HAS_ACK BIT(0)
#define MPTCP_DSS_FLAG_MASK (0x1F)
/* MPTCP socket flags */
#define MPTCP_DATA_READY BIT(0)
#define MPTCP_SEND_SPACE BIT(1)
/* MPTCP connection sock */
struct mptcp_sock {
/* inet_connection_sock must be the first member */
struct inet_connection_sock sk;
u64 local_key;
u64 remote_key;
u64 write_seq;
u64 ack_seq;
u32 token;
unsigned long flags;
bool can_ack;
struct list_head conn_list;
struct skb_ext *cached_ext; /* for the next sendmsg */
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
};
#define mptcp_for_each_subflow(__msk, __subflow) \
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
{
return (struct mptcp_sock *)sk;
}
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
remote_key_valid : 1;
u64 local_key;
u64 remote_key;
u64 idsn;
u32 token;
u32 ssn_offset;
};
static inline struct mptcp_subflow_request_sock *
mptcp_subflow_rsk(const struct request_sock *rsk)
{
return (struct mptcp_subflow_request_sock *)rsk;
}
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
u64 local_key;
u64 remote_key;
u64 idsn;
u64 map_seq;
u32 snd_isn;
u32 token;
u32 rel_write_seq;
u32 map_subflow_seq;
u32 ssn_offset;
u32 map_data_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */
mp_capable : 1, /* remote is MPTCP capable */
fourth_ack : 1, /* send initial DSS */
conn_finished : 1,
map_valid : 1,
mpc_map : 1,
data_avail : 1,
rx_eof : 1,
can_ack : 1; /* only after processing the remote a key */
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops;
void (*tcp_data_ready)(struct sock *sk);
void (*tcp_state_change)(struct sock *sk);
void (*tcp_write_space)(struct sock *sk);
struct rcu_head rcu;
};
static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/* Use RCU on icsk_ulp_data only for sock diag code */
return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
}
static inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
return subflow->tcp_sock;
}
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
subflow->ssn_offset -
subflow->map_subflow_seq;
}
static inline u64
mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
{
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}
int mptcp_is_enabled(struct net *net);
bool mptcp_subflow_data_available(struct sock *sk);
void mptcp_subflow_init(void);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
sk->sk_data_ready = ctx->tcp_data_ready;
sk->sk_state_change = ctx->tcp_state_change;
sk->sk_write_space = ctx->tcp_write_space;
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
extern const struct inet_connection_sock_af_ops ipv4_specific;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
extern const struct inet_connection_sock_af_ops ipv6_specific;
#endif
void mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int mptcp_proto_v6_init(void);
#endif
struct mptcp_read_arg {
struct msghdr *msg;
};
int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb,
unsigned int offset, size_t len);
void mptcp_get_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx);
void mptcp_finish_connect(struct sock *sk);
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(u32 token);
int mptcp_token_new_connect(struct sock *sk);
int mptcp_token_new_accept(u32 token);
void mptcp_token_update_accept(struct sock *sk, struct sock *conn);
void mptcp_token_destroy(u32 token);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
{
/* we might consider a faster version that computes the key as a
* hash of some information available in the MPTCP socket. Use
* random data at the moment, as it's probably the safest option
* in case multiple sockets are opened in different namespaces at
* the same time.
*/
get_random_bytes(key, sizeof(u64));
mptcp_crypto_key_sha(*key, token, idsn);
}
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
void *hash_out);
static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
{
return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
}
static inline bool before64(__u64 seq1, __u64 seq2)
{
return (__s64)(seq1 - seq2) < 0;
}
#define after64(seq2, seq1) before64(seq1, seq2)
#endif /* __MPTCP_PROTOCOL_H */
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP token management
* Copyright (c) 2017 - 2019, Intel Corporation.
*
* Note: This code is based on mptcp_ctrl.c from multipath-tcp.org,
* authored by:
*
* Sébastien Barré <sebastien.barre@uclouvain.be>
* Christoph Paasch <christoph.paasch@uclouvain.be>
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
* Gregory Detal <gregory.detal@uclouvain.be>
* Fabien Duchêne <fabien.duchene@uclouvain.be>
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
* Lavkesh Lahngir <lavkesh51@gmail.com>
* Andreas Ripke <ripke@neclab.eu>
* Vlad Dogaru <vlad.dogaru@intel.com>
* Octavian Purdila <octavian.purdila@intel.com>
* John Ronan <jronan@tssg.org>
* Catalin Nicutar <catalin.nicutar@gmail.com>
* Brandon Heller <brandonh@stanford.edu>
*/
#define pr_fmt(fmt) "MPTCP: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/radix-tree.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/protocol.h>
#include <net/mptcp.h>
#include "protocol.h"
static RADIX_TREE(token_tree, GFP_ATOMIC);
static RADIX_TREE(token_req_tree, GFP_ATOMIC);
static DEFINE_SPINLOCK(token_tree_lock);
static int token_used __read_mostly;
/**
* mptcp_token_new_request - create new key/idsn/token for subflow_request
* @req - the request socket
*
* This function is called when a new mptcp connection is coming in.
*
* It creates a unique token to identify the new mptcp connection,
* a secret local key and the initial data sequence number (idsn).
*
* Returns 0 on success.
*/
int mptcp_token_new_request(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
int err;
while (1) {
u32 token;
mptcp_crypto_key_gen_sha(&subflow_req->local_key,
&subflow_req->token,
&subflow_req->idsn);
pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n",
req, subflow_req->local_key, subflow_req->token,
subflow_req->idsn);
token = subflow_req->token;
spin_lock_bh(&token_tree_lock);
if (!radix_tree_lookup(&token_req_tree, token) &&
!radix_tree_lookup(&token_tree, token))
break;
spin_unlock_bh(&token_tree_lock);
}
err = radix_tree_insert(&token_req_tree,
subflow_req->token, &token_used);
spin_unlock_bh(&token_tree_lock);
return err;
}
/**
* mptcp_token_new_connect - create new key/idsn/token for subflow
* @sk - the socket that will initiate a connection
*
* This function is called when a new outgoing mptcp connection is
* initiated.
*
* It creates a unique token to identify the new mptcp connection,
* a secret local key and the initial data sequence number (idsn).
*
* On success, the mptcp connection can be found again using
* the computed token at a later time, this is needed to process
* join requests.
*
* returns 0 on success.
*/
int mptcp_token_new_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *mptcp_sock = subflow->conn;
int err;
while (1) {
u32 token;
mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
&subflow->idsn);
pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
sk, subflow->local_key, subflow->token, subflow->idsn);
token = subflow->token;
spin_lock_bh(&token_tree_lock);
if (!radix_tree_lookup(&token_req_tree, token) &&
!radix_tree_lookup(&token_tree, token))
break;
spin_unlock_bh(&token_tree_lock);
}
err = radix_tree_insert(&token_tree, subflow->token, mptcp_sock);
spin_unlock_bh(&token_tree_lock);
return err;
}
/**
* mptcp_token_new_accept - insert token for later processing
* @token: the token to insert to the tree
*
* Called when a SYN packet creates a new logical connection, i.e.
* is not a join request.
*
* We don't have an mptcp socket yet at that point.
* This is paired with mptcp_token_update_accept, called on accept().
*/
int mptcp_token_new_accept(u32 token)
{
int err;
spin_lock_bh(&token_tree_lock);
err = radix_tree_insert(&token_tree, token, &token_used);
spin_unlock_bh(&token_tree_lock);
return err;
}
/**
* mptcp_token_update_accept - update token to map to mptcp socket
* @conn: the new struct mptcp_sock
* @sk: the initial subflow for this mptcp socket
*
* Called when the first mptcp socket is created on accept to
* refresh the dummy mapping (done to reserve the token) with
* the mptcp_socket structure that wasn't allocated before.
*/
void mptcp_token_update_accept(struct sock *sk, struct sock *conn)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
void __rcu **slot;
spin_lock_bh(&token_tree_lock);
slot = radix_tree_lookup_slot(&token_tree, subflow->token);
WARN_ON_ONCE(!slot);
if (slot) {
WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used);
radix_tree_replace_slot(&token_tree, slot, conn);
}
spin_unlock_bh(&token_tree_lock);
}
/**
* mptcp_token_destroy_request - remove mptcp connection/token
* @token - token of mptcp connection to remove
*
* Remove not-yet-fully-established incoming connection identified
* by @token.
*/
void mptcp_token_destroy_request(u32 token)
{
spin_lock_bh(&token_tree_lock);
radix_tree_delete(&token_req_tree, token);
spin_unlock_bh(&token_tree_lock);
}
/**
* mptcp_token_destroy - remove mptcp connection/token
* @token - token of mptcp connection to remove
*
* Remove the connection identified by @token.
*/
void mptcp_token_destroy(u32 token)
{
spin_lock_bh(&token_tree_lock);
radix_tree_delete(&token_tree, token);
spin_unlock_bh(&token_tree_lock);
}
......@@ -32,6 +32,7 @@ TARGETS += memory-hotplug
TARGETS += mount
TARGETS += mqueue
TARGETS += net
TARGETS += net/mptcp
TARGETS += netfilter
TARGETS += networking/timestamping
TARGETS += nsfs
......
# SPDX-License-Identifier: GPL-2.0
top_srcdir = ../../../../..
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
TEST_PROGS := mptcp_connect.sh
TEST_GEN_FILES = mptcp_connect
EXTRA_CLEAN := *.pcap
include ../../lib.mk
CONFIG_MPTCP=y
CONFIG_MPTCP_IPV6=y
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment