Commit 3980cf16 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-more-misc-fixes-for-v6-8'

Matthieu Baerts says:

====================
mptcp: more misc. fixes for v6.8

This series includes 6 types of fixes:

- Patch 1 fixes v4 mapped in v6 addresses support for the userspace PM,
  when asking to delete a subflow. It was done everywhere else, but not
  there. Patch 2 validates the modification, thanks to a subtest in
  mptcp_join.sh. These patches can be backported up to v5.19.

- Patch 3 is a small fix for a recent bug-fix patch, just to avoid
  printing an irrelevant warning (pr_warn()) once. It can be backported
  up to v5.6, alongside the bug-fix that has been introduced in the
  v6.8-rc5.

- Patches 4 to 6 are fixes for bugs found by Paolo while working on
  TCP_NOTSENT_LOWAT support for MPTCP. These fixes can improve the
  performances in some cases. Patches can be backported up to v5.6,
  v5.11 and v6.7 respectively.

- Patch 7 makes sure 'ss -M' is available when starting MPTCP Join
  selftest as it is required for some subtests since v5.18.

- Patch 8 fixes a possible double-free on socket dismantle. The issue
  always existed, but was unnoticed because it was not causing any
  problem so far. This fix can be backported up to v5.6.

- Patch 9 is a fix for a very recent patch causing lockdep warnings in
  subflow diag. The patch causing the regression -- which fixes another
  issue present since v5.7 -- should be part of the future v6.8-rc6.
  Patch 10 validates the modification, thanks to a new subtest in
  diag.sh.
====================

Link: https://lore.kernel.org/r/20240223-upstream-net-20240223-misc-fixes-v1-0-162e87e48497@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 0d60d8df b4b51d36
...@@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) ...@@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
bool slow; bool slow;
int err; int err;
if (inet_sk_state_load(sk) == TCP_LISTEN)
return 0;
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start) if (!start)
return -EMSGSIZE; return -EMSGSIZE;
......
...@@ -981,10 +981,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, ...@@ -981,10 +981,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0) if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true); WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side))) if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk"); pr_warn_once("bogus mpc option on established client sk");
set_fully_established:
mptcp_data_lock((struct sock *)msk); mptcp_data_lock((struct sock *)msk);
__mptcp_subflow_fully_established(msk, subflow, mp_opt); __mptcp_subflow_fully_established(msk, subflow, mp_opt);
mptcp_data_unlock((struct sock *)msk); mptcp_data_unlock((struct sock *)msk);
......
...@@ -495,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info ...@@ -495,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err; goto destroy_err;
} }
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
addr_l.family = AF_INET6;
}
if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
addr_r.family = AF_INET6;
}
#endif
if (addr_l.family != addr_r.family) { if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match"); GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL; err = -EINVAL;
......
...@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, ...@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb); mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1; TCP_SKB_CB(skb)->eor = 1;
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb; goto alloc_skb;
} }
...@@ -3177,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) ...@@ -3177,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset); return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
} }
static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
{
const struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_txoptions *opt;
struct ipv6_pinfo *newnp;
newnp = inet6_sk(newsk);
rcu_read_lock();
opt = rcu_dereference(np->opt);
if (opt) {
opt = ipv6_dup_options(newsk, opt);
if (!opt)
net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
}
RCU_INIT_POINTER(newnp->opt, opt);
rcu_read_unlock();
}
#endif #endif
static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
{
struct ip_options_rcu *inet_opt, *newopt = NULL;
const struct inet_sock *inet = inet_sk(sk);
struct inet_sock *newinet;
newinet = inet_sk(newsk);
rcu_read_lock();
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt) {
newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
inet_opt->opt.optlen, GFP_ATOMIC);
if (newopt)
memcpy(newopt, inet_opt, sizeof(*inet_opt) +
inet_opt->opt.optlen);
else
net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
}
RCU_INIT_POINTER(newinet->inet_opt, newopt);
rcu_read_unlock();
}
struct sock *mptcp_sk_clone_init(const struct sock *sk, struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt, const struct mptcp_options_received *mp_opt,
struct sock *ssk, struct sock *ssk,
...@@ -3199,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, ...@@ -3199,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk); __mptcp_init_sock(nsk);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (nsk->sk_family == AF_INET6)
mptcp_copy_ip6_options(nsk, sk);
else
#endif
mptcp_copy_ip_options(nsk, sk);
msk = mptcp_sk(nsk); msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key; msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token; msk->token = subflow_req->token;
...@@ -3210,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, ...@@ -3210,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1; msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq; msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq; msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched); mptcp_init_sched(msk, mptcp_sk(sk)->sched);
......
...@@ -790,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) ...@@ -790,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
} }
static inline void mptcp_write_space(struct sock *sk)
{
if (sk_stream_is_writeable(sk)) {
/* pairs with memory barrier in mptcp_poll */
smp_mb();
if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
sk_stream_write_space(sk);
}
}
static inline void __mptcp_sync_sndbuf(struct sock *sk) static inline void __mptcp_sync_sndbuf(struct sock *sk)
{ {
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
...@@ -808,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) ...@@ -808,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
mptcp_write_space(sk);
} }
/* The called held both the msk socket and the subflow socket locks, /* The called held both the msk socket and the subflow socket locks,
...@@ -838,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) ...@@ -838,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable(); local_bh_enable();
} }
static inline void mptcp_write_space(struct sock *sk)
{
if (sk_stream_is_writeable(sk)) {
/* pairs with memory barrier in mptcp_poll */
smp_mb();
if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
sk_stream_write_space(sk);
}
}
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4 #define MPTCP_TOKEN_MAX_RETRIES 4
......
...@@ -20,7 +20,7 @@ flush_pids() ...@@ -20,7 +20,7 @@ flush_pids()
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
for _ in $(seq 10); do for _ in $(seq $((timeout_poll * 10))); do
[ -z "$(ip netns pids "${ns}")" ] && break [ -z "$(ip netns pids "${ns}")" ] && break
sleep 0.1 sleep 0.1
done done
...@@ -91,6 +91,15 @@ chk_msk_nr() ...@@ -91,6 +91,15 @@ chk_msk_nr()
__chk_msk_nr "grep -c token:" "$@" __chk_msk_nr "grep -c token:" "$@"
} }
chk_listener_nr()
{
local expected=$1
local msg="$2"
__chk_nr "ss -inmlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0
__chk_nr "ss -inmlHtON $ns | wc -l" "$expected" "$msg - subflows"
}
wait_msk_nr() wait_msk_nr()
{ {
local condition="grep -c token:" local condition="grep -c token:"
...@@ -289,5 +298,24 @@ flush_pids ...@@ -289,5 +298,24 @@ flush_pids
chk_msk_inuse 0 "many->0" chk_msk_inuse 0 "many->0"
chk_msk_cestab 0 "many->0" chk_msk_cestab 0 "many->0"
chk_listener_nr 0 "no listener sockets"
NR_SERVERS=100
for I in $(seq 1 $NR_SERVERS); do
ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \
-t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 &
done
for I in $(seq 1 $NR_SERVERS); do
mptcp_lib_wait_local_port_listen $ns $((I + 20001))
done
chk_listener_nr $NR_SERVERS "many listener sockets"
# graceful termination
for I in $(seq 1 $NR_SERVERS); do
echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 &
done
flush_pids
mptcp_lib_result_print_all_tap mptcp_lib_result_print_all_tap
exit $ret exit $ret
...@@ -161,6 +161,11 @@ check_tools() ...@@ -161,6 +161,11 @@ check_tools()
exit $ksft_skip exit $ksft_skip
fi fi
if ! ss -h | grep -q MPTCP; then
echo "SKIP: ss tool does not support MPTCP"
exit $ksft_skip
fi
# Use the legacy version if available to support old kernel versions # Use the legacy version if available to support old kernel versions
if iptables-legacy -V &> /dev/null; then if iptables-legacy -V &> /dev/null; then
iptables="iptables-legacy" iptables="iptables-legacy"
...@@ -3333,16 +3338,17 @@ userspace_pm_rm_sf() ...@@ -3333,16 +3338,17 @@ userspace_pm_rm_sf()
{ {
local evts=$evts_ns1 local evts=$evts_ns1
local t=${3:-1} local t=${3:-1}
local ip=4 local ip
local tk da dp sp local tk da dp sp
local cnt local cnt
[ "$1" == "$ns2" ] && evts=$evts_ns2 [ "$1" == "$ns2" ] && evts=$evts_ns2
if mptcp_lib_is_v6 $2; then ip=6; fi [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4
[ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6
tk=$(mptcp_lib_evts_get_info token "$evts") tk=$(mptcp_lib_evts_get_info token "$evts")
da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t) da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2)
dp=$(mptcp_lib_evts_get_info dport "$evts" $t) dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2)
sp=$(mptcp_lib_evts_get_info sport "$evts" $t) sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2)
cnt=$(rm_sf_count ${1}) cnt=$(rm_sf_count ${1})
ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
...@@ -3429,20 +3435,23 @@ userspace_tests() ...@@ -3429,20 +3435,23 @@ userspace_tests()
if reset_with_events "userspace pm add & remove address" && if reset_with_events "userspace pm add & remove address" &&
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1 set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1 pm_nl_set_limits $ns2 2 2
speed=5 \ speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$! local tests_pid=$!
wait_mpj $ns1 wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10 userspace_pm_add_addr $ns1 10.0.2.1 10
chk_join_nr 1 1 1 userspace_pm_add_addr $ns1 10.0.3.1 20
chk_add_nr 1 1 chk_join_nr 2 2 2
chk_mptcp_info subflows 1 subflows 1 chk_add_nr 2 2
chk_subflows_total 2 2 chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 chk_subflows_total 3 3
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
userspace_pm_rm_addr $ns1 10 userspace_pm_rm_addr $ns1 10
userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
chk_rm_nr 1 1 invert userspace_pm_rm_addr $ns1 20
userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED
chk_rm_nr 2 2 invert
chk_mptcp_info subflows 0 subflows 0 chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1 chk_subflows_total 1 1
kill_events_pids kill_events_pids
......
...@@ -213,9 +213,9 @@ mptcp_lib_get_info_value() { ...@@ -213,9 +213,9 @@ mptcp_lib_get_info_value() {
grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
} }
# $1: info name ; $2: evts_ns ; $3: event type # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() { mptcp_lib_evts_get_info() {
mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}" grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
} }
# $1: PID # $1: PID
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment