Commit 965bffd2 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-fixes'

Matthieu Baerts says:

====================
mptcp: fixes for v6.2

Patch 1 clears resources earlier if there is no more reasons to keep
MPTCP sockets alive.

Patches 2 and 3 fix some locking issues visible in some rare corner
cases: the linked issues should be quite hard to reproduce.

Patch 4 makes sure subflows are correctly cleaned after the end of a
connection.

Patch 5 and 6 improve the selftests stability when running in a slow
environment by transfering data for a longer period on one hand and by
stopping the tests when all expected events have been observed on the
other hand.

All these patches fix issues introduced before v6.2.
====================
Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1a3245fe 070d6daf
...@@ -998,8 +998,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, ...@@ -998,8 +998,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
{ {
int addrlen = sizeof(struct sockaddr_in); int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr; struct sockaddr_storage addr;
struct mptcp_sock *msk;
struct socket *ssock; struct socket *ssock;
struct sock *newsk;
int backlog = 1024; int backlog = 1024;
int err; int err;
...@@ -1008,11 +1008,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, ...@@ -1008,11 +1008,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err) if (err)
return err; return err;
msk = mptcp_sk(entry->lsk->sk); newsk = entry->lsk->sk;
if (!msk) if (!newsk)
return -EINVAL; return -EINVAL;
ssock = __mptcp_nmpc_socket(msk); lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
release_sock(newsk);
if (!ssock) if (!ssock)
return -EINVAL; return -EINVAL;
......
...@@ -2897,6 +2897,7 @@ bool __mptcp_close(struct sock *sk, long timeout) ...@@ -2897,6 +2897,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false; bool do_cancel_work = false;
int subflows_alive = 0;
sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_shutdown = SHUTDOWN_MASK;
...@@ -2922,6 +2923,8 @@ bool __mptcp_close(struct sock *sk, long timeout) ...@@ -2922,6 +2923,8 @@ bool __mptcp_close(struct sock *sk, long timeout)
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk); bool slow = lock_sock_fast_nested(ssk);
subflows_alive += ssk->sk_state != TCP_CLOSE;
/* since the close timeout takes precedence on the fail one, /* since the close timeout takes precedence on the fail one,
* cancel the latter * cancel the latter
*/ */
...@@ -2937,6 +2940,12 @@ bool __mptcp_close(struct sock *sk, long timeout) ...@@ -2937,6 +2940,12 @@ bool __mptcp_close(struct sock *sk, long timeout)
} }
sock_orphan(sk); sock_orphan(sk);
/* all the subflows are closed, only timeout can change the msk
* state, let's not keep resources busy for no reasons
*/
if (subflows_alive == 0)
inet_sk_state_store(sk, TCP_CLOSE);
sock_hold(sk); sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state); pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (msk->token) if (msk->token)
......
...@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, ...@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
sockptr_t optval, unsigned int optlen) sockptr_t optval, unsigned int optlen)
{ {
struct sock *sk = (struct sock *)msk;
struct socket *sock; struct socket *sock;
int ret = -EINVAL;
/* Limit to first subflow, before the connection establishment */ /* Limit to first subflow, before the connection establishment */
lock_sock(sk);
sock = __mptcp_nmpc_socket(msk); sock = __mptcp_nmpc_socket(msk);
if (!sock) if (!sock)
return -EINVAL; goto unlock;
return tcp_setsockopt(sock->sk, level, optname, optval, optlen); ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
unlock:
release_sock(sk);
return ret;
} }
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
......
...@@ -1399,6 +1399,7 @@ void __mptcp_error_report(struct sock *sk) ...@@ -1399,6 +1399,7 @@ void __mptcp_error_report(struct sock *sk)
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk); int err = sock_error(ssk);
int ssk_state;
if (!err) if (!err)
continue; continue;
...@@ -1409,7 +1410,14 @@ void __mptcp_error_report(struct sock *sk) ...@@ -1409,7 +1410,14 @@ void __mptcp_error_report(struct sock *sk)
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue; continue;
inet_sk_state_store(sk, inet_sk_state_load(ssk)); /* We need to propagate only transition to CLOSE state.
* Orphaned socket will see such state change via
* subflow_sched_work_if_closed() and that path will properly
* destroy the msk as needed.
*/
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
inet_sk_state_store(sk, ssk_state);
sk->sk_err = -err; sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
...@@ -1679,7 +1687,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, ...@@ -1679,7 +1687,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
if (err) if (err)
return err; return err;
lock_sock(sf->sk); lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
/* the newly created socket has to be in the same cgroup as its parent */ /* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk); mptcp_attach_cgroup(sk, sf->sk);
......
...@@ -498,6 +498,12 @@ kill_events_pids() ...@@ -498,6 +498,12 @@ kill_events_pids()
kill_wait $evts_ns2_pid kill_wait $evts_ns2_pid
} }
kill_tests_wait()
{
kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1)
wait
}
pm_nl_set_limits() pm_nl_set_limits()
{ {
local ns=$1 local ns=$1
...@@ -1694,6 +1700,7 @@ chk_subflow_nr() ...@@ -1694,6 +1700,7 @@ chk_subflow_nr()
local subflow_nr=$3 local subflow_nr=$3
local cnt1 local cnt1
local cnt2 local cnt2
local dump_stats
if [ -n "${need_title}" ]; then if [ -n "${need_title}" ]; then
printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
...@@ -1711,7 +1718,12 @@ chk_subflow_nr() ...@@ -1711,7 +1718,12 @@ chk_subflow_nr()
echo "[ ok ]" echo "[ ok ]"
fi fi
[ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint ) if [ "${dump_stats}" = 1 ]; then
ss -N $ns1 -tOni
ss -N $ns1 -tOni | grep token
ip -n $ns1 mptcp endpoint
dump_stats
fi
} }
chk_link_usage() chk_link_usage()
...@@ -3049,7 +3061,7 @@ endpoint_tests() ...@@ -3049,7 +3061,7 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2 pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null &
wait_mpj $ns1 wait_mpj $ns1
pm_nl_check_endpoint 1 "creation" \ pm_nl_check_endpoint 1 "creation" \
...@@ -3062,14 +3074,14 @@ endpoint_tests() ...@@ -3062,14 +3074,14 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
pm_nl_check_endpoint 0 "modif is allowed" \ pm_nl_check_endpoint 0 "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal $ns2 10.0.2.2 id 1 flags signal
wait kill_tests_wait
fi fi
if reset "delete and re-add"; then if reset "delete and re-add"; then
pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1 pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow & run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null &
wait_mpj $ns2 wait_mpj $ns2
pm_nl_del_endpoint $ns2 2 10.0.2.2 pm_nl_del_endpoint $ns2 2 10.0.2.2
...@@ -3079,7 +3091,7 @@ endpoint_tests() ...@@ -3079,7 +3091,7 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
wait_mpj $ns2 wait_mpj $ns2
chk_subflow_nr "" "after re-add" 2 chk_subflow_nr "" "after re-add" 2
wait kill_tests_wait
fi fi
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment