Commit e16163b6 authored by Paolo Abeni's avatar Paolo Abeni Committed by Jakub Kicinski

mptcp: refactor shutdown and close

We must not close the subflows before all the MPTCP level
data, comprising the DATA_FIN has been acked at the MPTCP
level, otherwise we could be unable to retransmit as needed.

__mptcp_wr_shutdown() shutdown is responsible to check for the
correct status and close all subflows. Is called by the output
path after spooling any data and at shutdown/close time.

In a similar way, __mptcp_destroy_sock() is responsible to clean-up
the MPTCP level status, and is called when the msk transition
to TCP_CLOSE.

The protocol level close() does not force anymore the TCP_CLOSE
status, but orphan the msk socket and all the subflows.
Orphaned msk sockets are forciby closed after a timeout or
when all MPTCP-level data is acked.

There is a caveat about keeping the orphaned subflows around:
the TCP stack can asynchronusly call tcp_cleanup_ulp() on them via
tcp_close(). To prevent accessing freed memory on later MPTCP
level operations, the msk acquires a reference to each subflow
socket and prevent subflow_ulp_release() from releasing the
subflow context before __mptcp_destroy_sock().

The additional subflow references are released by __mptcp_done()
and the async ULP release is detected checking ULP ops. If such
field has been already cleared by the ULP release path, the
dangling context is freed directly by __mptcp_done().
Co-developed-by: default avatarDavide Caratti <dcaratti@redhat.com>
Signed-off-by: default avatarDavide Caratti <dcaratti@redhat.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent eaa2ffab
......@@ -492,7 +492,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
mpext = skb ? mptcp_get_ext(skb) : NULL;
snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable);
snd_data_fin_enable = mptcp_data_fin_enabled(msk);
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size;
......
......@@ -416,14 +416,13 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
long timeout = 0;
if (msk->pm.rm_id != subflow->remote_id)
continue;
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
__mptcp_close_ssk(sk, ssk, subflow, timeout);
__mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_accepted--;
......@@ -452,14 +451,13 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
long timeout = 0;
if (rm_id != subflow->local_id)
continue;
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
__mptcp_close_ssk(sk, ssk, subflow, timeout);
__mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.local_addr_used--;
......
This diff is collapsed.
......@@ -91,6 +91,7 @@
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
#define MPTCP_WORKER_RUNNING 6
static inline bool before64(__u64 seq1, __u64 seq2)
{
......@@ -352,7 +353,8 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
rx_eof : 1,
can_ack : 1; /* only after processing the remote a key */
can_ack : 1, /* only after processing the remote a key */
disposable : 1; /* ctx can be free at ulp release time */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
......@@ -409,8 +411,7 @@ bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
long timeout);
struct mptcp_subflow_context *subflow);
void mptcp_subflow_reset(struct sock *ssk);
/* called with sk socket lock held */
......@@ -452,6 +453,12 @@ bool mptcp_schedule_work(struct sock *sk);
void mptcp_data_acked(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
return READ_ONCE(msk->snd_data_fin_enable) &&
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
void mptcp_destroy_common(struct mptcp_sock *msk);
void __init mptcp_token_init(void);
......
......@@ -1125,6 +1125,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (err && err != -EINPROGRESS)
goto failed;
sock_hold(ssk);
spin_lock_bh(&msk->join_list_lock);
list_add_tail(&subflow->node, &msk->join_list);
spin_unlock_bh(&msk->join_list_lock);
......@@ -1132,6 +1133,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
return err;
failed:
subflow->disposable = 1;
sock_release(sf);
return err;
}
......@@ -1254,7 +1256,6 @@ static void subflow_state_change(struct sock *sk)
mptcp_data_ready(parent, sk);
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!(parent->sk_shutdown & RCV_SHUTDOWN) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
mptcp_subflow_eof(parent);
......@@ -1297,17 +1298,26 @@ static int subflow_ulp_init(struct sock *sk)
return err;
}
static void subflow_ulp_release(struct sock *sk)
static void subflow_ulp_release(struct sock *ssk)
{
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
bool release = true;
struct sock *sk;
if (!ctx)
return;
if (ctx->conn)
sock_put(ctx->conn);
sk = ctx->conn;
if (sk) {
/* if the msk has been orphaned, keep the ctx
* alive, will be freed by mptcp_done()
*/
release = ctx->disposable;
sock_put(sk);
}
kfree_rcu(ctx, rcu);
if (release)
kfree_rcu(ctx, rcu);
}
static void subflow_ulp_clone(const struct request_sock *req,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment