Commit c189b548 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-multiple-subflows-path-management'

Mat Martineau says:

====================
Multipath TCP part 3: Multiple subflows and path management

v2 -> v3: Remove 'inline' in .c files, fix uapi bit macros, and rebase.

v1 -> v2: Rebase on current net-next, fix for netlink limit setting,
and update .gitignore for selftest.

This patch set allows more than one TCP subflow to be established and
used for a multipath TCP connection. Subflows are added to an existing
connection using the MP_JOIN option during the 3-way handshake. With
multiple TCP subflows available, sent data is now stored in the MPTCP
socket so it may be retransmitted on any TCP subflow if there is no
DATA_ACK before a timeout. If an MPTCP-level timeout occurs, data is
retransmitted using an available subflow. Storing this sent data
requires the addition of memory accounting at the MPTCP level, which was
previously delegated to the single subflow. Incoming DATA_ACKs now free
data from the MPTCP-level retransmit buffer.

IP addresses available for new subflow connections can now be advertised
and received with the ADD_ADDR option, and the corresponding REMOVE_ADDR
option likewise advertises that an address is no longer available.

The MPTCP path manager netlink interface has commands to set in-kernel
limits for the number of concurrent subflows and control the
advertisement of IP addresses between peers.

To track and debug MPTCP connections there are new MPTCP MIB counters,
and subflow context can be requested using inet_diag. The MPTCP
self-tests now validate multiple-subflow operation and the netlink path
manager interface.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 41b14502 b08fbf24
......@@ -11727,6 +11727,7 @@ W: https://github.com/multipath-tcp/mptcp_net-next/wiki
B: https://github.com/multipath-tcp/mptcp_net-next/issues
S: Maintained
F: include/net/mptcp.h
F: include/uapi/linux/mptcp.h
F: net/mptcp/
F: tools/testing/selftests/net/mptcp/
......
......@@ -86,9 +86,19 @@ struct mptcp_options_received {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
u8 mp_capable : 1,
u16 mp_capable : 1,
mp_join : 1,
dss : 1;
dss : 1,
add_addr : 1,
rm_addr : 1,
family : 4,
echo : 1,
backup : 1;
u32 token;
u32 nonce;
u64 thmac;
u8 hmac[20];
u8 join_id;
u8 use_map:1,
dsn64:1,
data_fin:1,
......@@ -96,6 +106,16 @@ struct mptcp_options_received {
ack64:1,
mpc_map:1,
__unused:2;
u8 addr_id;
u8 rm_id;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
struct in6_addr addr6;
#endif
};
u64 ahmac;
u16 port;
};
#endif
......@@ -131,6 +151,8 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
#if IS_ENABLED(CONFIG_MPTCP)
rx_opt->mptcp.mp_capable = 0;
rx_opt->mptcp.mp_join = 0;
rx_opt->mptcp.add_addr = 0;
rx_opt->mptcp.rm_addr = 0;
rx_opt->mptcp.dss = 0;
#endif
}
......
......@@ -12,6 +12,8 @@
#include <linux/tcp.h>
#include <linux/types.h>
struct seq_file;
/* MPTCP sk_buff extension data */
struct mptcp_ext {
u64 data_ack;
......@@ -33,6 +35,21 @@ struct mptcp_out_options {
u16 suboptions;
u64 sndr_key;
u64 rcvr_key;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
struct in6_addr addr6;
#endif
};
u8 addr_id;
u64 ahmac;
u8 rm_id;
u8 join_id;
u8 backup;
u32 nonce;
u64 thmac;
u32 token;
u8 hmac[20];
struct mptcp_ext ext_copy;
#endif
};
......@@ -106,6 +123,9 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
skb_ext_find(from, SKB_EXT_MPTCP));
}
bool mptcp_sk_is_subflow(const struct sock *sk);
void mptcp_seq_show(struct seq_file *seq);
#else
static inline void mptcp_init(void)
......@@ -172,6 +192,12 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
return true;
}
static inline bool mptcp_sk_is_subflow(const struct sock *sk)
{
return false;
}
static inline void mptcp_seq_show(struct seq_file *seq) { }
#endif /* CONFIG_MPTCP */
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
......
......@@ -27,6 +27,9 @@ struct netns_mib {
#if IS_ENABLED(CONFIG_TLS)
DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics);
#endif
#ifdef CONFIG_MPTCP
DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
#endif
};
#endif
......@@ -166,6 +166,7 @@ enum {
INET_ULP_INFO_UNSPEC,
INET_ULP_INFO_NAME,
INET_ULP_INFO_TLS,
INET_ULP_INFO_MPTCP,
__INET_ULP_INFO_MAX,
};
#define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1)
......
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _UAPI_MPTCP_H
#define _UAPI_MPTCP_H
#include <linux/const.h>
#include <linux/types.h>
#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3)
#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4)
#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5)
#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6)
#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7)
#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8)
enum {
MPTCP_SUBFLOW_ATTR_UNSPEC,
MPTCP_SUBFLOW_ATTR_TOKEN_REM,
MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
MPTCP_SUBFLOW_ATTR_MAP_SEQ,
MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
MPTCP_SUBFLOW_ATTR_FLAGS,
MPTCP_SUBFLOW_ATTR_ID_REM,
MPTCP_SUBFLOW_ATTR_ID_LOC,
MPTCP_SUBFLOW_ATTR_PAD,
__MPTCP_SUBFLOW_ATTR_MAX
};
#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
/* netlink interface */
#define MPTCP_PM_NAME "mptcp_pm"
#define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds"
#define MPTCP_PM_VER 0x1
/*
* ATTR types defined for MPTCP
*/
enum {
MPTCP_PM_ATTR_UNSPEC,
MPTCP_PM_ATTR_ADDR, /* nested address */
MPTCP_PM_ATTR_RCV_ADD_ADDRS, /* u32 */
MPTCP_PM_ATTR_SUBFLOWS, /* u32 */
__MPTCP_PM_ATTR_MAX
};
#define MPTCP_PM_ATTR_MAX (__MPTCP_PM_ATTR_MAX - 1)
enum {
MPTCP_PM_ADDR_ATTR_UNSPEC,
MPTCP_PM_ADDR_ATTR_FAMILY, /* u16 */
MPTCP_PM_ADDR_ATTR_ID, /* u8 */
MPTCP_PM_ADDR_ATTR_ADDR4, /* struct in_addr */
MPTCP_PM_ADDR_ATTR_ADDR6, /* struct in6_addr */
MPTCP_PM_ADDR_ATTR_PORT, /* u16 */
MPTCP_PM_ADDR_ATTR_FLAGS, /* u32 */
MPTCP_PM_ADDR_ATTR_IF_IDX, /* s32 */
__MPTCP_PM_ADDR_ATTR_MAX
};
#define MPTCP_PM_ADDR_ATTR_MAX (__MPTCP_PM_ADDR_ATTR_MAX - 1)
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
enum {
MPTCP_PM_CMD_UNSPEC,
MPTCP_PM_CMD_ADD_ADDR,
MPTCP_PM_CMD_DEL_ADDR,
MPTCP_PM_CMD_GET_ADDR,
MPTCP_PM_CMD_FLUSH_ADDRS,
MPTCP_PM_CMD_SET_LIMITS,
MPTCP_PM_CMD_GET_LIMITS,
__MPTCP_PM_CMD_AFTER_LAST
};
#endif /* _UAPI_MPTCP_H */
......@@ -1793,6 +1793,10 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
free_percpu(net->mib.net_statistics);
free_percpu(net->mib.ip_statistics);
free_percpu(net->mib.tcp_statistics);
#ifdef CONFIG_MPTCP
/* allocated on demand, see mptcp_init_sock() */
free_percpu(net->mib.mptcp_statistics);
#endif
}
static __net_initdata struct pernet_operations ipv4_mib_ops = {
......
......@@ -32,6 +32,7 @@
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <linux/bottom_half.h>
......@@ -485,6 +486,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
offsetof(struct ipstats_mib, syncp)));
seq_putc(seq, '\n');
mptcp_seq_show(seq);
return 0;
}
......
......@@ -774,6 +774,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!child)
goto listen_overflow;
if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
inet_csk_reqsk_queue_drop_and_put(sk, req);
return child;
}
sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
*req_stolen = !own_req;
......
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
mib.o pm_netlink.o
......@@ -44,8 +44,7 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
*idsn = be64_to_cpu(*((__be64 *)&mptcp_hashed_key[6]));
}
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
void *hmac)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
{
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
__be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
......@@ -55,6 +54,9 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
u8 key2be[8];
int i;
if (WARN_ON_ONCE(len > SHA256_DIGEST_SIZE))
len = SHA256_DIGEST_SIZE;
put_unaligned_be64(key1, key1be);
put_unaligned_be64(key2, key2be);
......@@ -65,11 +67,10 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
for (i = 0; i < 8; i++)
input[i + 8] ^= key2be[i];
put_unaligned_be32(nonce1, &input[SHA256_BLOCK_SIZE]);
put_unaligned_be32(nonce2, &input[SHA256_BLOCK_SIZE + 4]);
memcpy(&input[SHA256_BLOCK_SIZE], msg, len);
sha256_init(&state);
sha256_update(&state, input, SHA256_BLOCK_SIZE + 8);
sha256_update(&state, input, SHA256_BLOCK_SIZE + len);
/* emit sha256(K1 || msg) on the second input block, so we can
* reuse 'input' for the last hashing
......@@ -125,6 +126,7 @@ static int __init test_mptcp_crypto(void)
char hmac[20], hmac_hex[41];
u32 nonce1, nonce2;
u64 key1, key2;
u8 msg[8];
int i, j;
for (i = 0; i < ARRAY_SIZE(tests); ++i) {
......@@ -134,7 +136,10 @@ static int __init test_mptcp_crypto(void)
nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0]));
nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4]));
mptcp_crypto_hmac_sha(key1, key2, nonce1, nonce2, hmac);
put_unaligned_be32(nonce1, &msg[0]);
put_unaligned_be32(nonce2, &msg[4]);
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
for (j = 0; j < 20; ++j)
sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
hmac_hex[40] = 0;
......
// SPDX-License-Identifier: GPL-2.0
/* MPTCP socket monitoring support
*
* Copyright (c) 2019 Red Hat
*
* Author: Davide Caratti <dcaratti@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/net.h>
#include <linux/inet_diag.h>
#include <net/netlink.h>
#include <uapi/linux/mptcp.h>
#include "protocol.h"
static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
int err;
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
err = 0;
goto nla_failure;
}
if (sf->mp_capable)
flags |= MPTCP_SUBFLOW_FLAG_MCAP_REM;
if (sf->request_mptcp)
flags |= MPTCP_SUBFLOW_FLAG_MCAP_LOC;
if (sf->mp_join)
flags |= MPTCP_SUBFLOW_FLAG_JOIN_REM;
if (sf->request_join)
flags |= MPTCP_SUBFLOW_FLAG_JOIN_LOC;
if (sf->backup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM;
if (sf->request_bkup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC;
if (sf->fully_established)
flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED;
if (sf->conn_finished)
flags |= MPTCP_SUBFLOW_FLAG_CONNECTED;
if (sf->map_valid)
flags |= MPTCP_SUBFLOW_FLAG_MAPVALID;
if (nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_TOKEN_REM, sf->remote_token) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_TOKEN_LOC, sf->token) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
sf->rel_write_seq) ||
nla_put_u64_64bit(skb, MPTCP_SUBFLOW_ATTR_MAP_SEQ, sf->map_seq,
MPTCP_SUBFLOW_ATTR_PAD) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
sf->map_subflow_seq) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_SSN_OFFSET, sf->ssn_offset) ||
nla_put_u16(skb, MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
nla_nest_cancel(skb, start);
return err;
}
static size_t subflow_get_info_size(const struct sock *sk)
{
size_t size = 0;
size += nla_total_size(0) + /* INET_ULP_INFO_MPTCP */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_TOKEN_REM */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_TOKEN_LOC */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */
nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */
nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */
nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */
nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */
nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */
nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_LOC */
0;
return size;
}
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops)
{
ops->get_info = subflow_get_info;
ops->get_info_size = subflow_get_info_size;
}
// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/seq_file.h>
#include <net/ip.h>
#include <net/mptcp.h>
#include <net/snmp.h>
#include <net/net_namespace.h>
#include "mib.h"
static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE),
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX),
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_SENTINEL
};
/* mptcp_mib_alloc - allocate percpu mib counters
*
* These are allocated when the first mptcp socket is created so
* we do not waste percpu memory if mptcp isn't in use.
*/
bool mptcp_mib_alloc(struct net *net)
{
struct mptcp_mib __percpu *mib = alloc_percpu(struct mptcp_mib);
if (!mib)
return false;
if (cmpxchg(&net->mib.mptcp_statistics, NULL, mib))
free_percpu(mib);
return true;
}
void mptcp_seq_show(struct seq_file *seq)
{
struct net *net = seq->private;
int i;
seq_puts(seq, "MPTcpExt:");
for (i = 0; mptcp_snmp_list[i].name; i++)
seq_printf(seq, " %s", mptcp_snmp_list[i].name);
seq_puts(seq, "\nMPTcpExt:");
if (!net->mib.mptcp_statistics) {
for (i = 0; mptcp_snmp_list[i].name; i++)
seq_puts(seq, " 0");
return;
}
for (i = 0; mptcp_snmp_list[i].name; i++)
seq_printf(seq, " %lu",
snmp_fold_field(net->mib.mptcp_statistics,
mptcp_snmp_list[i].entry));
seq_putc(seq, '\n');
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
enum linux_mptcp_mib_field {
MPTCP_MIB_NUM = 0,
MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */
MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
__MPTCP_MIB_MAX
};
#define LINUX_MIB_MPTCP_MAX __MPTCP_MIB_MAX
struct mptcp_mib {
unsigned long mibs[LINUX_MIB_MPTCP_MAX];
};
static inline void MPTCP_INC_STATS(struct net *net,
enum linux_mptcp_mib_field field)
{
if (likely(net->mib.mptcp_statistics))
SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
static inline void __MPTCP_INC_STATS(struct net *net,
enum linux_mptcp_mib_field field)
{
if (likely(net->mib.mptcp_statistics))
__SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
bool mptcp_mib_alloc(struct net *net);
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP
*
* Copyright (c) 2019, Intel Corporation.
*/
#include <linux/kernel.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
static struct workqueue_struct *pm_wq;
/* path manager command handlers */
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
pr_debug("msk=%p, local_id=%d", msk, addr->id);
msk->pm.local = *addr;
WRITE_ONCE(msk->pm.addr_signal, true);
return 0;
}
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id)
{
return -ENOTSUPP;
}
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id)
{
return -ENOTSUPP;
}
/* path manager event handlers */
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
WRITE_ONCE(pm->server_side, server_side);
}
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
int ret;
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
pm->subflows_max, READ_ONCE(pm->accept_subflow));
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->accept_subflow))
return false;
spin_lock_bh(&pm->lock);
ret = pm->subflows < pm->subflows_max;
if (ret && ++pm->subflows == pm->subflows_max)
WRITE_ONCE(pm->accept_subflow, false);
spin_unlock_bh(&pm->lock);
return ret;
}
/* return true if the new status bit is currently cleared, that is, this event
* can be server, eventually by an already scheduled work
*/
static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
enum mptcp_pm_status new_status)
{
pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
BIT(new_status));
if (msk->pm.status & BIT(new_status))
return false;
msk->pm.status |= BIT(new_status);
if (queue_work(pm_wq, &msk->pm.work))
sock_hold((struct sock *)msk);
return true;
}
void mptcp_pm_fully_established(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p", msk);
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->work_pending))
return;
spin_lock_bh(&pm->lock);
if (READ_ONCE(pm->work_pending))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
pr_debug("msk=%p", msk);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p", msk);
if (!READ_ONCE(pm->work_pending))
return;
spin_lock_bh(&pm->lock);
if (READ_ONCE(pm->work_pending))
mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id)
{
pr_debug("msk=%p", msk);
}
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
/* avoid acquiring the lock if there is no room for fouther addresses */
if (!READ_ONCE(pm->accept_addr))
return;
spin_lock_bh(&pm->lock);
/* be sure there is something to signal re-checking under PM lock */
if (READ_ONCE(pm->accept_addr) &&
mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED))
pm->remote = *addr;
spin_unlock_bh(&pm->lock);
}
/* path manager helpers */
bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_addr_info *saddr)
{
int ret = false;
spin_lock_bh(&msk->pm.lock);
/* double check after the lock is acquired */
if (!mptcp_pm_should_signal(msk))
goto out_unlock;
if (remaining < mptcp_add_addr_len(msk->pm.local.family))
goto out_unlock;
*saddr = msk->pm.local;
WRITE_ONCE(msk->pm.addr_signal, false);
ret = true;
out_unlock:
spin_unlock_bh(&msk->pm.lock);
return ret;
}
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
{
return mptcp_pm_nl_get_local_id(msk, skc);
}
static void pm_worker(struct work_struct *work)
{
struct mptcp_pm_data *pm = container_of(work, struct mptcp_pm_data,
work);
struct mptcp_sock *msk = container_of(pm, struct mptcp_sock, pm);
struct sock *sk = (struct sock *)msk;
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
mptcp_pm_nl_fully_established(msk);
}
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
mptcp_pm_nl_subflow_established(msk);
}
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
sock_put(sk);
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
msk->pm.add_addr_accepted = 0;
msk->pm.local_addr_used = 0;
msk->pm.subflows = 0;
WRITE_ONCE(msk->pm.work_pending, false);
WRITE_ONCE(msk->pm.addr_signal, false);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);
INIT_WORK(&msk->pm.work, pm_worker);
mptcp_pm_nl_data_init(msk);
}
void mptcp_pm_close(struct mptcp_sock *msk)
{
if (cancel_work_sync(&msk->pm.work))
sock_put((struct sock *)msk);
}
void mptcp_pm_init(void)
{
pm_wq = alloc_workqueue("pm_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8);
if (!pm_wq)
panic("Failed to allocate workqueue");
mptcp_pm_nl_init();
}
This diff is collapsed.
This diff is collapsed.
......@@ -17,6 +17,12 @@
#define OPTION_MPTCP_MPC_SYN BIT(0)
#define OPTION_MPTCP_MPC_SYNACK BIT(1)
#define OPTION_MPTCP_MPC_ACK BIT(2)
#define OPTION_MPTCP_MPJ_SYN BIT(3)
#define OPTION_MPTCP_MPJ_SYNACK BIT(4)
#define OPTION_MPTCP_MPJ_ACK BIT(5)
#define OPTION_MPTCP_ADD_ADDR BIT(6)
#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
#define OPTION_MPTCP_RM_ADDR BIT(8)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
......@@ -33,12 +39,30 @@
#define TCPOLEN_MPTCP_MPC_SYNACK 12
#define TCPOLEN_MPTCP_MPC_ACK 20
#define TCPOLEN_MPTCP_MPC_ACK_DATA 22
#define TCPOLEN_MPTCP_MPJ_SYN 12
#define TCPOLEN_MPTCP_MPJ_SYNACK 16
#define TCPOLEN_MPTCP_MPJ_ACK 24
#define TCPOLEN_MPTCP_DSS_BASE 4
#define TCPOLEN_MPTCP_DSS_ACK32 4
#define TCPOLEN_MPTCP_DSS_ACK64 8
#define TCPOLEN_MPTCP_DSS_MAP32 10
#define TCPOLEN_MPTCP_DSS_MAP64 14
#define TCPOLEN_MPTCP_DSS_CHECKSUM 2
#define TCPOLEN_MPTCP_ADD_ADDR 16
#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18
#define TCPOLEN_MPTCP_ADD_ADDR_BASE 8
#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10
#define TCPOLEN_MPTCP_ADD_ADDR6 28
#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22
#define TCPOLEN_MPTCP_PORT_LEN 2
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
#define MPTCPOPT_HMAC_LEN 20
#define MPTCPOPT_THMAC_LEN 8
/* MPTCP MP_CAPABLE flags */
#define MPTCP_VERSION_MASK (0x0F)
......@@ -55,9 +79,75 @@
#define MPTCP_DSS_HAS_ACK BIT(0)
#define MPTCP_DSS_FLAG_MASK (0x1F)
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO BIT(0)
#define MPTCP_ADDR_HMAC_LEN 20
#define MPTCP_ADDR_IPVERSION_4 4
#define MPTCP_ADDR_IPVERSION_6 6
/* MPTCP socket flags */
#define MPTCP_DATA_READY 0
#define MPTCP_SEND_SPACE 1
#define MPTCP_WORK_RTX 2
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
((nib & 0xF) << 8) | field);
}
#define MPTCP_PM_MAX_ADDR 4
struct mptcp_addr_info {
sa_family_t family;
__be16 port;
u8 id;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
struct in6_addr addr6;
#endif
};
};
enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED,
MPTCP_PM_SUBFLOW_ESTABLISHED,
};
struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
spinlock_t lock; /*protects the whole PM data */
bool addr_signal;
bool server_side;
bool work_pending;
bool accept_addr;
bool accept_subflow;
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
u8 subflows;
u8 add_addr_signal_max;
u8 add_addr_accept_max;
u8 local_addr_max;
u8 subflows_max;
u8 status;
struct work_struct work;
};
struct mptcp_data_frag {
struct list_head list;
u64 data_seq;
int data_len;
int offset;
int overhead;
struct page *page;
};
/* MPTCP connection sock */
struct mptcp_sock {
......@@ -67,14 +157,20 @@ struct mptcp_sock {
u64 remote_key;
u64 write_seq;
u64 ack_seq;
atomic64_t snd_una;
unsigned long timer_ival;
u32 token;
unsigned long flags;
bool can_ack;
spinlock_t join_list_lock;
struct work_struct work;
struct list_head conn_list;
struct list_head rtx_queue;
struct list_head join_list;
struct skb_ext *cached_ext; /* for the next sendmsg */
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
struct mptcp_pm_data pm;
};
#define mptcp_for_each_subflow(__msk, __subflow) \
......@@ -85,17 +181,42 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
return (struct mptcp_sock *)sk;
}
static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (list_empty(&msk->rtx_queue))
return NULL;
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (list_empty(&msk->rtx_queue))
return NULL;
return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
remote_key_valid : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
u64 remote_key;
u64 idsn;
u32 token;
u32 ssn_offset;
u64 thmac;
u32 local_nonce;
u32 remote_nonce;
};
static inline struct mptcp_subflow_request_sock *
......@@ -118,16 +239,28 @@ struct mptcp_subflow_context {
u32 ssn_offset;
u32 map_data_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */
request_bkup : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
fully_established : 1, /* path validated */
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
mpc_map : 1,
backup : 1,
data_avail : 1,
rx_eof : 1,
data_fin_tx_enable : 1,
can_ack : 1; /* only after processing the remote a key */
u64 data_fin_tx_seq;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
u32 remote_token;
u8 hmac[MPTCPOPT_HMAC_LEN];
u8 local_id;
u8 remote_id;
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
......@@ -171,6 +304,11 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
int mptcp_is_enabled(struct net *net);
bool mptcp_subflow_data_available(struct sock *sk);
void mptcp_subflow_init(void);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, int ifindex,
const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
......@@ -199,11 +337,14 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
void mptcp_data_acked(struct sock *sk);
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(u32 token);
int mptcp_token_new_connect(struct sock *sk);
int mptcp_token_new_accept(u32 token, struct sock *conn);
struct mptcp_sock *mptcp_token_get_sock(u32 token);
void mptcp_token_destroy(u32 token);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
......@@ -219,8 +360,48 @@ static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
mptcp_crypto_key_sha(*key, token, idsn);
}
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
void *hash_out);
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_close(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow);
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id);
static inline bool mptcp_pm_should_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal);
}
static inline unsigned int mptcp_add_addr_len(int family)
{
if (family == AF_INET)
return TCPOLEN_MPTCP_ADD_ADDR;
return TCPOLEN_MPTCP_ADD_ADDR6;
}
bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_addr_info *saddr);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
void mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
{
......@@ -234,4 +415,6 @@ static inline bool before64(__u64 seq1, __u64 seq2)
#define after64(seq2, seq1) before64(seq1, seq2)
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
#endif /* __MPTCP_PROTOCOL_H */
This diff is collapsed.
......@@ -140,6 +140,33 @@ int mptcp_token_new_accept(u32 token, struct sock *conn)
return err;
}
/**
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
* @token: token of the mptcp connection to retrieve
*
* This function returns the mptcp connection structure with the given token.
* A reference count on the mptcp socket returned is taken.
*
* returns NULL if no connection with the given token value exists.
*/
struct mptcp_sock *mptcp_token_get_sock(u32 token)
{
struct sock *conn;
spin_lock_bh(&token_tree_lock);
conn = radix_tree_lookup(&token_tree, token);
if (conn) {
/* token still reserved? */
if (conn == (struct sock *)&token_used)
conn = NULL;
else
sock_hold(conn);
}
spin_unlock_bh(&token_tree_lock);
return mptcp_sk(conn);
}
/**
* mptcp_token_destroy_request - remove mptcp connection/token
* @token - token of mptcp connection to remove
......
# SPDX-License-Identifier: GPL-2.0
top_srcdir = ../../../../..
KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
TEST_PROGS := mptcp_connect.sh
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
TEST_GEN_FILES = mptcp_connect
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
TEST_FILES := settings
......
......@@ -51,6 +51,7 @@ static bool tcpulp_audit;
static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
static void die_usage(void)
{
......@@ -250,6 +251,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
static size_t do_rnd_write(const int fd, char *buf, const size_t len)
{
static bool first = true;
unsigned int do_w;
ssize_t bw;
......@@ -257,10 +259,19 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (do_w == 0 || do_w > len)
do_w = len;
if (cfg_join && first && do_w > 100)
do_w = 100;
bw = write(fd, buf, do_w);
if (bw < 0)
perror("write");
/* let the join handshake complete, before going on */
if (cfg_join && first) {
usleep(200000);
first = false;
}
return bw;
}
......@@ -385,8 +396,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
break;
/* ... but we still receive.
* Close our write side.
* Close our write side, ev. give some time
* for address notification
*/
if (cfg_join)
usleep(400000);
shutdown(peerfd, SHUT_WR);
} else {
if (errno == EINTR)
......@@ -403,6 +417,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
}
/* leave some time for late join/announce */
if (cfg_join)
usleep(400000);
close(peerfd);
return 0;
}
......@@ -658,7 +676,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
if (r & 1)
if (!cfg_join && (r & 1))
close(fd);
}
......@@ -794,8 +812,12 @@ static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "6lp:s:hut:m:S:R:")) != -1) {
while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
break;
case 'l':
listen_mode = true;
break;
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ret=0
sin=""
sout=""
cin=""
cout=""
ksft_skip=4
timeout=30
capture=0
TEST_COUNT=0
init()
{
capout=$(mktemp)
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
ns2="ns2-$rndh"
for netns in "$ns1" "$ns2";do
ip netns add $netns || exit $ksft_skip
ip -net $netns link set lo up
ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
done
# ns1 ns2
# ns1eth1 ns2eth1
# ns1eth2 ns2eth2
# ns1eth3 ns2eth3
# ns1eth4 ns2eth4
for i in `seq 1 4`; do
ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
ip -net "$ns1" link set ns1eth$i up
ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
ip -net "$ns2" link set ns2eth$i up
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
done
}
cleanup_partial()
{
rm -f "$capout"
for netns in "$ns1" "$ns2"; do
ip netns del $netns
done
}
cleanup()
{
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
cleanup_partial
}
reset()
{
cleanup_partial
init
}
for arg in "$@"; do
if [ "$arg" = "-c" ]; then
capture=1
fi
done
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
check_transfer()
{
in=$1
out=$2
what=$3
cmp "$in" "$out" > /dev/null 2>&1
if [ $? -ne 0 ] ;then
echo "[ FAIL ] $what does not match (in, out):"
print_file_err "$in"
print_file_err "$out"
return 1
fi
return 0
}
do_ping()
{
listener_ns="$1"
connector_ns="$2"
connect_addr="$3"
ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
if [ $? -ne 0 ] ; then
echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
ret=1
fi
}
do_transfer()
{
listener_ns="$1"
connector_ns="$2"
cl_proto="$3"
srv_proto="$4"
connect_addr="$5"
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
:> "$cout"
:> "$sout"
:> "$capout"
if [ $capture -eq 1 ]; then
if [ -z $SUDO_USER ] ; then
capuser=""
else
capuser="-Z $SUDO_USER"
fi
capfile="mp_join-${listener_ns}.pcap"
echo "Capturing traffic for test $TEST_COUNT into $capfile"
ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
cappid=$!
sleep 1
fi
ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
spid=$!
sleep 1
ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
cpid=$!
wait $cpid
retc=$?
wait $spid
rets=$?
if [ $capture -eq 1 ]; then
sleep 1
kill $cappid
fi
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
return 1
fi
check_transfer $sin $cout "file received by client"
retc=$?
check_transfer $cin $sout "file received by server"
rets=$?
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
cat "$capout"
return 0
fi
cat "$capout"
return 1
}
make_file()
{
name=$1
who=$2
SIZE=1
dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
echo "Created $name (size $SIZE KB) containing data sent by $who"
}
run_tests()
{
listener_ns="$1"
connector_ns="$2"
connect_addr="$3"
lret=0
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
return
fi
}
chk_join_nr()
{
local msg="$1"
local syn_nr=$2
local syn_ack_nr=$3
local ack_nr=$4
local count
local dump_stats
printf "%-36s %s" "$msg" "syn"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$syn_nr" ]; then
echo "[fail] got $count JOIN[s] syn expected $syn_nr"
ret=1
dump_stats=1
else
echo -n "[ ok ]"
fi
echo -n " - synack"
count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$syn_ack_nr" ]; then
echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
ret=1
dump_stats=1
else
echo -n "[ ok ]"
fi
echo -n " - ack"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$ack_nr" ]; then
echo "[fail] got $count JOIN[s] ack expected $ack_nr"
ret=1
dump_stats=1
else
echo "[ ok ]"
fi
if [ "${dump_stats}" = 1 ]; then
echo Server ns stats
ip netns exec $ns1 nstat -as | grep MPTcp
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
}
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
cout=$(mktemp)
init
make_file "$cin" "client"
make_file "$sin" "server"
trap cleanup EXIT
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "no JOIN" "0" "0" "0"
# subflow limted by client
reset
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow, limited by client" 0 0 0
# subflow limted by server
reset
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow, limited by server" 1 1 0
# subflow
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow" 1 1 1
# multiple subflows
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows" 2 2 2
# multiple subflows limited by serverf
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows, limited by server" 2 2 1
# add_address, unused
reset
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "unused signal address" 0 0 0
# accept and use add_addr
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address" 1 1 1
# accept and use add_addr with an additional subflow
# note: signal address in server ns and local addresses in client ns must
# belong to different subnets or one of the listed local address could be
# used for 'add_addr' subflow
reset
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal" 2 2 2
# accept and use add_addr with additional subflows
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
exit $ret
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ksft_skip=4
ret=0
usage() {
echo "Usage: $0 [ -h ]"
}
while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
exit 0
;;
"?")
usage $0
exit 1
;;
esac
done
sec=$(date +%s)
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
err=$(mktemp)
ret=0
cleanup()
{
rm -f $out
ip netns del $ns1
}
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
trap cleanup EXIT
ip netns add $ns1 || exit $ksft_skip
ip -net $ns1 link set lo up
ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
check()
{
local cmd="$1"
local expected="$2"
local msg="$3"
local out=`$cmd 2>$err`
local cmd_ret=$?
printf "%-50s %s" "$msg"
if [ $cmd_ret -ne 0 ]; then
echo "[FAIL] command execution '$cmd' stderr "
cat $err
ret=1
elif [ "$out" = "$expected" ]; then
echo "[ OK ]"
else
echo -n "[FAIL] "
echo "expected '$expected' got '$out'"
ret=1
fi
}
check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
subflows 0" "defaults limits"
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1 " "simple add/get addr"
check "ip netns exec $ns1 ./pm_nl_ctl dump" \
"id 1 flags 10.0.1.1
id 2 flags subflow dev lo 10.0.1.2
id 3 flags signal,backup 10.0.1.3 " "dump addrs"
ip netns exec $ns1 ./pm_nl_ctl del 2
check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
check "ip netns exec $ns1 ./pm_nl_ctl dump" \
"id 1 flags 10.0.1.1
id 3 flags signal,backup 10.0.1.3 " "dump addrs after del"
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3
check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal
check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4 " "id addr increment"
for i in `seq 5 9`; do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9 " "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
for i in `seq 9 256`; do
ip netns exec $ns1 ./pm_nl_ctl del $i
ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9
done
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
id 3 flags signal,backup 10.0.1.3
id 4 flags signal 10.0.1.4
id 5 flags signal 10.0.1.5
id 6 flags signal 10.0.1.6
id 7 flags signal 10.0.1.7
id 8 flags signal 10.0.1.8 " "id limit"
ip netns exec $ns1 ./pm_nl_ctl flush
check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
ip netns exec $ns1 ./pm_nl_ctl limits 9 1
check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
subflows 0" "rcv addrs above hard limit"
ip netns exec $ns1 ./pm_nl_ctl limits 1 9
check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
subflows 0" "subflows above hard limit"
ip netns exec $ns1 ./pm_nl_ctl limits 8 8
check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
subflows 8" "set limits"
exit $ret
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment