Commit 2201124d authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-userspace-path-manager-prerequisites'

Mat Martineau says:

====================
mptcp: Userspace path manager prerequisites

This series builds upon the path manager mode selection changes merged
in 4994d4fa ("Merge branch 'mptcp-path-manager-mode-selection'") to
further modify the path manager code in preparation for adding the new
netlink commands to announce/remove advertised addresses and
create/destroy subflows of an MPTCP connection. The third and final
patch series for the userspace path manager will implement those
commands as discussed in
https://lore.kernel.org/netdev/23ff3b49-2563-1874-fa35-3af55d3088e7@linux.intel.com/#r

Patches 1, 5, and 7 remove some internal constraints on path managers
(in general) without changing in-kernel PM behavior.

Patch 2 adds a self test to validate MPTCP address advertisement ack
behavior.

Patches 3, 4, and 6 add new attributes to existing MPTCP netlink events
and track internal state for populating those attributes.
====================

Link: https://lore.kernel.org/r/20220502205237.129297-1-mathew.j.martineau@linux.intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 2b68abf9 304ab97f
...@@ -188,6 +188,7 @@ enum mptcp_event_attr { ...@@ -188,6 +188,7 @@ enum mptcp_event_attr {
MPTCP_ATTR_IF_IDX, /* s32 */ MPTCP_ATTR_IF_IDX, /* s32 */
MPTCP_ATTR_RESET_REASON,/* u32 */ MPTCP_ATTR_RESET_REASON,/* u32 */
MPTCP_ATTR_RESET_FLAGS, /* u32 */ MPTCP_ATTR_RESET_FLAGS, /* u32 */
MPTCP_ATTR_SERVER_SIDE, /* u8 */
__MPTCP_ATTR_AFTER_LAST __MPTCP_ATTR_AFTER_LAST
}; };
......
...@@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, ...@@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
READ_ONCE(msk->pm.server_side)) !subflow->request_join)
tcp_send_ack(ssk); tcp_send_ack(ssk);
goto fully_established; goto fully_established;
} }
...@@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
add_addr_hmac_valid(msk, &mp_opt)) { add_addr_hmac_valid(msk, &mp_opt)) {
if (!mp_opt.echo) { if (!mp_opt.echo) {
mptcp_pm_add_addr_received(msk, &mp_opt.addr); mptcp_pm_add_addr_received(sk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
} else { } else {
mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
......
...@@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) ...@@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
unsigned int subflows_max; unsigned int subflows_max;
int ret = 0; int ret = 0;
if (mptcp_pm_is_userspace(msk))
return mptcp_userspace_pm_active(msk);
subflows_max = mptcp_pm_get_subflows_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk);
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
...@@ -179,7 +182,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, ...@@ -179,7 +182,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
bool update_subflows; bool update_subflows;
update_subflows = (ssk->sk_state == TCP_CLOSE) && update_subflows = (ssk->sk_state == TCP_CLOSE) &&
(subflow->request_join || subflow->mp_join); (subflow->request_join || subflow->mp_join) &&
mptcp_pm_is_kernel(msk);
if (!READ_ONCE(pm->work_pending) && !update_subflows) if (!READ_ONCE(pm->work_pending) && !update_subflows)
return; return;
...@@ -196,19 +200,28 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, ...@@ -196,19 +200,28 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock); spin_unlock_bh(&pm->lock);
} }
void mptcp_pm_add_addr_received(struct mptcp_sock *msk, void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr) const struct mptcp_addr_info *addr)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_pm_data *pm = &msk->pm; struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr)); READ_ONCE(pm->accept_addr));
mptcp_event_addr_announced(msk, addr); mptcp_event_addr_announced(ssk, addr);
spin_lock_bh(&pm->lock); spin_lock_bh(&pm->lock);
if (!READ_ONCE(pm->accept_addr) || mptcp_pm_is_userspace(msk)) { if (mptcp_pm_is_userspace(msk)) {
if (mptcp_userspace_pm_active(msk)) {
mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else {
__MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
} else if (!READ_ONCE(pm->accept_addr)) {
mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk); mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
......
...@@ -369,8 +369,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, ...@@ -369,8 +369,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock); lockdep_assert_held(&msk->pm.lock);
if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
return false;
if (add_entry) {
if (mptcp_pm_is_kernel(msk))
return false;
sk_reset_timer(sk, &add_entry->add_timer,
jiffies + mptcp_get_add_addr_timeout(net));
return true;
}
add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
if (!add_entry) if (!add_entry)
...@@ -805,6 +813,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, ...@@ -805,6 +813,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
if (!removed) if (!removed)
continue; continue;
if (!mptcp_pm_is_kernel(msk))
continue;
if (rm_type == MPTCP_MIB_RMADDR) { if (rm_type == MPTCP_MIB_RMADDR) {
msk->pm.add_addr_accepted--; msk->pm.add_addr_accepted--;
WRITE_ONCE(msk->pm.accept_addr, true); WRITE_ONCE(msk->pm.accept_addr, true);
...@@ -1855,6 +1866,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf ...@@ -1855,6 +1866,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf
nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
} }
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk)
{
return genl_has_listeners(&mptcp_genl_family,
sock_net((const struct sock *)msk),
MPTCP_PM_EV_GRP_OFFSET);
}
static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
{ {
const struct inet_sock *issk = inet_sk(ssk); const struct inet_sock *issk = inet_sk(ssk);
...@@ -1975,6 +1993,9 @@ static int mptcp_event_created(struct sk_buff *skb, ...@@ -1975,6 +1993,9 @@ static int mptcp_event_created(struct sk_buff *skb,
if (err) if (err)
return err; return err;
if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
return -EMSGSIZE;
return mptcp_event_add_subflow(skb, ssk); return mptcp_event_add_subflow(skb, ssk);
} }
...@@ -2009,10 +2030,12 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) ...@@ -2009,10 +2030,12 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
kfree_skb(skb); kfree_skb(skb);
} }
void mptcp_event_addr_announced(const struct mptcp_sock *msk, void mptcp_event_addr_announced(const struct sock *ssk,
const struct mptcp_addr_info *info) const struct mptcp_addr_info *info)
{ {
struct net *net = sock_net((const struct sock *)msk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct net *net = sock_net(ssk);
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct sk_buff *skb; struct sk_buff *skb;
...@@ -2034,7 +2057,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk, ...@@ -2034,7 +2057,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk,
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
goto nla_put_failure; goto nla_put_failure;
if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) if (nla_put_be16(skb, MPTCP_ATTR_DPORT,
info->port == 0 ?
inet_sk(ssk)->inet_dport :
info->port))
goto nla_put_failure; goto nla_put_failure;
switch (info->family) { switch (info->family) {
......
...@@ -3321,15 +3321,12 @@ bool mptcp_finish_join(struct sock *ssk) ...@@ -3321,15 +3321,12 @@ bool mptcp_finish_join(struct sock *ssk)
return false; return false;
} }
if (!msk->pm.server_side) if (!list_empty(&subflow->node))
goto out; goto out;
if (!mptcp_pm_allow_new_subflow(msk)) if (!mptcp_pm_allow_new_subflow(msk))
goto err_prohibited; goto err_prohibited;
if (WARN_ON_ONCE(!list_empty(&subflow->node)))
goto err_prohibited;
/* active connections are already on conn_list. /* active connections are already on conn_list.
* If we can't acquire msk socket lock here, let the release callback * If we can't acquire msk socket lock here, let the release callback
* handle it * handle it
......
...@@ -753,7 +753,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk); ...@@ -753,7 +753,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
const struct mptcp_subflow_context *subflow); const struct mptcp_subflow_context *subflow);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk, void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr); const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr); const struct mptcp_addr_info *addr);
...@@ -782,8 +782,9 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list * ...@@ -782,8 +782,9 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp); const struct sock *ssk, gfp_t gfp);
void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{ {
...@@ -811,6 +812,11 @@ static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) ...@@ -811,6 +812,11 @@ static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
} }
static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL;
}
static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
{ {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
...@@ -905,13 +911,17 @@ static inline bool mptcp_check_infinite_map(struct sk_buff *skb) ...@@ -905,13 +911,17 @@ static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
return false; return false;
} }
static inline bool is_active_ssk(struct mptcp_subflow_context *subflow)
{
return (subflow->request_mptcp || subflow->request_join);
}
static inline bool subflow_simultaneous_connect(struct sock *sk) static inline bool subflow_simultaneous_connect(struct sock *sk)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
return sk->sk_state == TCP_ESTABLISHED && return sk->sk_state == TCP_ESTABLISHED &&
!mptcp_sk(parent)->pm.server_side && is_active_ssk(subflow) &&
!subflow->conn_finished; !subflow->conn_finished;
} }
......
...@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, ...@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{ {
return mptcp_is_fully_established((void *)msk) && return mptcp_is_fully_established((void *)msk) &&
READ_ONCE(msk->pm.accept_subflow); ((mptcp_pm_is_userspace(msk) &&
mptcp_userspace_pm_active(msk)) ||
READ_ONCE(msk->pm.accept_subflow));
} }
/* validate received token and create truncated hmac and nonce for SYN-ACK */ /* validate received token and create truncated hmac and nonce for SYN-ACK */
...@@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup; subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac; subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce; subflow->remote_nonce = mp_opt.nonce;
subflow->remote_id = mp_opt.join_id;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce, subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup); subflow->backup);
......
...@@ -2719,6 +2719,17 @@ userspace_tests() ...@@ -2719,6 +2719,17 @@ userspace_tests()
chk_add_nr 0 0 chk_add_nr 0 0
fi fi
# userspace pm type does not echo add_addr without daemon
if reset "userspace pm no echo w/o daemon"; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_add_nr 1 0
fi
# userspace pm type rejects join # userspace pm type rejects join
if reset "userspace pm type rejects join"; then if reset "userspace pm type rejects join"; then
set_userspace_pm $ns1 set_userspace_pm $ns1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment