Commit cb0f8b03 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-next'

Mat Martineau says:

====================
mptcp: Optimize output options and add MP_FAIL

This patch set contains two groups of changes that we've been testing in
the MPTCP tree.

The first optimizes the code path and data structure for populating
MPTCP option headers when transmitting.

Patch 1 reorganizes code to reduce the number of conditionals that need
to be evaluated in common cases.

Patch 2 rearranges struct mptcp_out_options to save 80 bytes (on x86_64).

The next five patches add partial support for the MP_FAIL option as
defined in RFC 8684. MP_FAIL is an option header used to cleanly handle
MPTCP checksum failures. When the MPTCP checksum detects an error in the
MPTCP DSS header or the data mapped by that header, the receiver uses a
TCP RST with MP_FAIL to close the subflow that experienced the error and
provide associated MPTCP sequence number information to the peer. RFC
8684 also describes how a single-subflow connection can discard corrupt
data and remain connected under certain conditions using MP_FAIL, but
that feature is not implemented here.

Patches 3-5 implement MP_FAIL transmit and receive, and integrates with
checksum validation.

Patches 6 & 7 add MP_FAIL selftests and the MIBs required for those
tests.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d484dc2b 6bb3ab49
......@@ -58,10 +58,6 @@ struct mptcp_addr_info {
struct mptcp_out_options {
#if IS_ENABLED(CONFIG_MPTCP)
u16 suboptions;
u64 sndr_key;
u64 rcvr_key;
u64 ahmac;
struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list;
u8 join_id;
u8 backup;
......@@ -69,11 +65,26 @@ struct mptcp_out_options {
reset_transient:1,
csum_reqd:1,
allow_join_id0:1;
u32 nonce;
u64 thmac;
u32 token;
u8 hmac[20];
struct mptcp_ext ext_copy;
union {
struct {
u64 sndr_key;
u64 rcvr_key;
};
struct {
struct mptcp_addr_info addr;
u64 ahmac;
};
struct {
struct mptcp_ext ext_copy;
u64 fail_seq;
};
struct {
u32 nonce;
u32 token;
u64 thmac;
u8 hmac[20];
};
};
#endif
};
......
......@@ -44,6 +44,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
......
......@@ -37,6 +37,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
......
This diff is collapsed.
......@@ -249,6 +249,11 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
pr_debug("fail_seq=%llu", fail_seq);
}
/* path manager helpers */
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
......
......@@ -26,6 +26,8 @@
#define OPTION_MPTCP_FASTCLOSE BIT(8)
#define OPTION_MPTCP_PRIO BIT(9)
#define OPTION_MPTCP_RST BIT(10)
#define OPTION_MPTCP_DSS BIT(11)
#define OPTION_MPTCP_FAIL BIT(12)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
......@@ -67,6 +69,7 @@
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
#define TCPOLEN_MPTCP_FAIL 12
#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
......@@ -137,6 +140,7 @@ struct mptcp_options_received {
add_addr : 1,
rm_addr : 1,
mp_prio : 1,
mp_fail : 1,
echo : 1,
csum_reqd : 1,
backup : 1,
......@@ -158,6 +162,7 @@ struct mptcp_options_received {
u64 ahmac;
u8 reset_reason:4;
u8 reset_transient:1;
u64 fail_seq;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
......@@ -428,6 +433,7 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
send_mp_fail : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
......@@ -608,6 +614,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
static inline bool mptcp_has_another_subflow(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
mptcp_for_each_subflow(msk, tmp) {
if (tmp != subflow)
return true;
}
return false;
}
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
......@@ -722,6 +741,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
......
......@@ -910,6 +910,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
if (unlikely(csum_fold(csum))) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
subflow->send_mp_fail = 1;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
......@@ -1157,6 +1159,20 @@ static bool subflow_check_data_avail(struct sock *ssk)
fallback:
/* RFC 8684 section 3.7. */
if (subflow->send_mp_fail) {
if (mptcp_has_another_subflow(ssk)) {
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
}
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, 0);
return true;
}
if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
......
......@@ -578,6 +578,43 @@ chk_csum_nr()
fi
}
chk_fail_nr()
{
local mp_fail_nr_tx=$1
local mp_fail_nr_rx=$2
local count
local dump_stats
printf "%-39s %s" " " "ftx"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$mp_fail_nr_tx" ]; then
echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
ret=1
dump_stats=1
else
echo -n "[ ok ]"
fi
echo -n " - frx "
count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$mp_fail_nr_rx" ]; then
echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
ret=1
dump_stats=1
else
echo "[ ok ]"
fi
if [ "${dump_stats}" = 1 ]; then
echo Server ns stats
ip netns exec $ns1 nstat -as | grep MPTcp
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
}
chk_join_nr()
{
local msg="$1"
......@@ -627,6 +664,7 @@ chk_join_nr()
fi
if [ $checksum -eq 1 ]; then
chk_csum_nr
chk_fail_nr 0 0
fi
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment