Commit 0cd55ef9 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'mptcp-fix-inconsistent-backup-usage'

Matthieu Baerts says:

====================
mptcp: fix inconsistent backup usage

In all the MPTCP backup related tests, the backup flag was set on one
side, and the expected behaviour is to have both sides respecting this
decision. That's also the "natural" way, and what the users seem to
expect.

On the scheduler side, only the 'backup' field was checked, which is
supposed to be set only if the other peer flagged a subflow as backup.
But in various places, this flag was also set when the local host
flagged the subflow as backup, certainly to have the expected behaviour
mentioned above.

Patch 1 modifies the packet scheduler to check if the backup flag has
been set on both directions, not to change its behaviour after having
applied the following patches. That's what the default packet scheduler
should have done since the beginning in v5.7.

Patch 2 fixes the backup flag being mirrored on the MPJ+SYN+ACK by
accident since its introduction in v5.7. Instead, the received and sent
backup flags are properly distinguished in requests.

Patch 3 stops setting the received backup flag as well when sending an
MP_PRIO, something that was done since the MP_PRIO support in v5.12.

Patch 4 adds related and missing MIB counters to be able to easily check
if MP_JOIN are sent with a backup flag. Certainly because these counters
were not there, the behaviour that is fixed by patches here was not
properly verified.

Patch 5 validates the previous patch by extending the MPTCP Join
selftest.

Patch 6 fixes the backup support in signal endpoints: if a signal
endpoint had the backup flag, it was not set in the MPJ+SYN+ACK as
expected. It was only set for ongoing connections, but not future ones
as expected, since the introduction of the backup flag in endpoints in
v5.10.

Patch 7 validates the previous patch by extending the MPTCP Join
selftest as well.
Signed-off-by: default avatarMatthieu Baerts (NGI0) <matttbe@kernel.org>
---
Matthieu Baerts (NGI0) (7):
      mptcp: sched: check both directions for backup
      mptcp: distinguish rcv vs sent backup flag in requests
      mptcp: pm: only set request_bkup flag when sending MP_PRIO
      mptcp: mib: count MPJ with backup flag
      selftests: mptcp: join: validate backup in MPJ
      mptcp: pm: fix backup support in signal endpoints
      selftests: mptcp: join: check backup support in signal endp

 include/trace/events/mptcp.h                    |  2 +-
 net/mptcp/mib.c                                 |  2 +
 net/mptcp/mib.h                                 |  2 +
 net/mptcp/options.c                             |  2 +-
 net/mptcp/pm.c                                  | 12 +++++
 net/mptcp/pm_netlink.c                          | 19 ++++++-
 net/mptcp/pm_userspace.c                        | 18 +++++++
 net/mptcp/protocol.c                            | 10 ++--
 net/mptcp/protocol.h                            |  4 ++
 net/mptcp/subflow.c                             | 10 ++++
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 72 ++++++++++++++++++++-----
 11 files changed, 132 insertions(+), 21 deletions(-)
====================

Link: https://patch.msgid.link/20240727-upstream-net-20240727-mptcp-backup-signal-v1-0-f50b31604cf1@kernel.orgSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 039564d2 f833470c
......@@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send,
struct sock *ssk;
__entry->active = mptcp_subflow_active(subflow);
__entry->backup = subflow->backup;
__entry->backup = subflow->backup || subflow->request_bkup;
if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock))
__entry->free = sk_stream_memory_free(subflow->tcp_sock);
......
......@@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX),
SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
......
......@@ -14,7 +14,9 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */
MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
......
......@@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
return true;
} else if (subflow_req->mp_join) {
opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
opts->backup = subflow_req->backup;
opts->backup = subflow_req->request_bkup;
opts->join_id = subflow_req->local_id;
opts->thmac = subflow_req->thmac;
opts->nonce = subflow_req->local_nonce;
......
......@@ -426,6 +426,18 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, &skc_local);
}
bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
{
struct mptcp_addr_info skc_local;
mptcp_local_address((struct sock_common *)skc, &skc_local);
if (mptcp_pm_is_userspace(msk))
return mptcp_userspace_pm_is_backup(msk, &skc_local);
return mptcp_pm_nl_is_backup(msk, &skc_local);
}
int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
u8 *flags, int *ifindex)
{
......
......@@ -471,7 +471,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
slow = lock_sock_fast(ssk);
if (prio) {
subflow->send_mp_prio = 1;
subflow->backup = backup;
subflow->request_bkup = backup;
}
......@@ -1102,6 +1101,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
return ret;
}
bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
struct mptcp_pm_addr_entry *entry;
bool backup = false;
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
break;
}
}
rcu_read_unlock();
return backup;
}
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
......
......@@ -165,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
struct mptcp_pm_addr_entry *entry;
bool backup = false;
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
if (mptcp_addresses_equal(&entry->addr, skc, false)) {
backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
break;
}
}
spin_unlock_bh(&msk->pm.lock);
return backup;
}
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
......
......@@ -1422,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
}
mptcp_for_each_subflow(msk, subflow) {
bool backup = subflow->backup || subflow->request_bkup;
trace_mptcp_subflow_get_send(subflow);
ssk = mptcp_subflow_tcp_sock(subflow);
if (!mptcp_subflow_active(subflow))
continue;
tout = max(tout, mptcp_timeout_from_subflow(subflow));
nr_active += !subflow->backup;
nr_active += !backup;
pace = subflow->avg_pacing_rate;
if (unlikely(!pace)) {
/* init pacing rate from socket */
......@@ -1439,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
}
linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace);
if (linger_time < send_info[subflow->backup].linger_time) {
send_info[subflow->backup].ssk = ssk;
send_info[subflow->backup].linger_time = linger_time;
if (linger_time < send_info[backup].linger_time) {
send_info[backup].ssk = ssk;
send_info[backup].linger_time = linger_time;
}
}
__mptcp_set_timeout(sk, tout);
......
......@@ -448,6 +448,7 @@ struct mptcp_subflow_request_sock {
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
request_bkup : 1,
csum_reqd : 1,
allow_join_id0 : 1;
u8 local_id;
......@@ -1108,6 +1109,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc);
bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb);
int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
struct netlink_callback *cb);
......
......@@ -100,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
return NULL;
}
subflow_req->local_id = local_id;
subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req);
return msk;
}
......@@ -168,6 +169,9 @@ static int subflow_check_req(struct request_sock *req,
return 0;
} else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
if (mp_opt.backup)
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
}
if (opt_mp_capable && listener->request_mptcp) {
......@@ -577,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
if (subflow->backup)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX);
if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
......@@ -614,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk)
return err;
subflow_set_local_id(subflow, err);
subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk);
return 0;
}
......@@ -2005,6 +2014,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->request_bkup = subflow_req->request_bkup;
WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
......
......@@ -1634,6 +1634,8 @@ chk_prio_nr()
{
local mp_prio_nr_tx=$1
local mp_prio_nr_rx=$2
local mpj_syn=$3
local mpj_syn_ack=$4
local count
print_check "ptx"
......@@ -1655,6 +1657,26 @@ chk_prio_nr()
else
print_ok
fi
print_check "syn backup"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mpj_syn" ]; then
fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn"
else
print_ok
fi
print_check "synack backup"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mpj_syn_ack" ]; then
fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack"
else
print_ok
fi
}
chk_subflow_nr()
......@@ -2612,11 +2634,24 @@ backup_tests()
sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_prio_nr 0 1
chk_prio_nr 0 1 1 0
fi
# single address, backup
if reset "single address, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 1
sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 0 0 1
fi
# single address, switch to backup
if reset "single address, switch to backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
......@@ -2625,20 +2660,20 @@ backup_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 1
chk_prio_nr 1 1 0 0
fi
# single address with port, backup
if reset "single address with port, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100
pm_nl_set_limits $ns2 1 1
sflags=backup speed=slow \
sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 1
chk_prio_nr 1 0 0 1
fi
if reset "mpc backup" &&
......@@ -2647,17 +2682,26 @@ backup_tests()
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 0 1
chk_prio_nr 0 1 0 0
fi
if reset "mpc backup both sides" &&
continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
# 10.0.2.2 (non-backup) -> 10.0.1.1 (backup)
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
# 10.0.1.2 (backup) -> 10.0.2.1 (non-backup)
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 1 1
chk_join_nr 2 2 2
chk_prio_nr 1 1 1 1
fi
if reset "mpc switch to backup" &&
......@@ -2666,7 +2710,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 0 1
chk_prio_nr 0 1 0 0
fi
if reset "mpc switch to backup both sides" &&
......@@ -2676,7 +2720,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_prio_nr 1 1
chk_prio_nr 1 1 0 0
fi
}
......@@ -3053,7 +3097,7 @@ fullmesh_tests()
addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_prio_nr 0 1
chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
......@@ -3066,7 +3110,7 @@ fullmesh_tests()
sflags=nobackup,nofullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_prio_nr 0 1
chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
}
......@@ -3318,7 +3362,7 @@ userspace_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 0
chk_prio_nr 0 0
chk_prio_nr 0 0 0 0
fi
# userspace pm type prevents rm_addr
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment