Commit 0384dd9d authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-refactor'

Mat Martineau says:

====================
mptcp: Refactor ADD_ADDR/RM_ADDR handling

This patch set changes the way MPTCP ADD_ADDR and RM_ADDR options are
handled to improve the reliability of sending and updating address
advertisements. The information used to populate outgoing advertisement
option headers is now stored separately to avoid rare cases where a more
recent request would overwrite something that had not been sent
yet. While the peers would recover from this, it's better to avoid the
problem in the first place.

Patch 1 moves an advertisement option check under a lock so the changes
made in the next several patches will not introduce a race.

Patches 2-4 make sure ADD_ADDR, ADD_ADDR echo, and RM_ADDR options use
separate flags and data.

Patch 5 removes some now-redundant flags.

Patch 6 adds a selftest that confirms the advertisement reliability
improvements.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents faf482ca 33c563ad
...@@ -667,29 +667,29 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * ...@@ -667,29 +667,29 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
bool port; bool port;
int len; int len;
if ((mptcp_pm_should_add_signal_ipv6(msk) || /* add addr will strip the existing options, be sure to avoid breaking
mptcp_pm_should_add_signal_port(msk) || * MPC/MPJ handshakes
mptcp_pm_should_add_signal_echo(msk)) && */
skb && skb_is_tcp_pure_ack(skb)) {
pr_debug("drop other suboptions");
opts->suboptions = 0;
opts->ext_copy.use_ack = 0;
opts->ext_copy.use_map = 0;
remaining += opt_size;
drop_other_suboptions = true;
}
if (!mptcp_pm_should_add_signal(msk) || if (!mptcp_pm_should_add_signal(msk) ||
!(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port))) (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
!mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
&echo, &port, &drop_other_suboptions))
return false; return false;
if (drop_other_suboptions)
remaining += opt_size;
len = mptcp_add_addr_len(opts->addr.family, echo, port); len = mptcp_add_addr_len(opts->addr.family, echo, port);
if (remaining < len) if (remaining < len)
return false; return false;
*size = len; *size = len;
if (drop_other_suboptions) if (drop_other_suboptions) {
pr_debug("drop other suboptions");
opts->suboptions = 0;
opts->ext_copy.use_ack = 0;
opts->ext_copy.use_map = 0;
*size -= opt_size; *size -= opt_size;
}
opts->suboptions |= OPTION_MPTCP_ADD_ADDR; opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) { if (!echo) {
opts->ahmac = add_addr_generate_hmac(msk->local_key, opts->ahmac = add_addr_generate_hmac(msk->local_key,
......
...@@ -20,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, ...@@ -20,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
{ {
u8 add_addr = READ_ONCE(msk->pm.addr_signal); u8 add_addr = READ_ONCE(msk->pm.addr_signal);
pr_debug("msk=%p, local_id=%d", msk, addr->id); pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
lockdep_assert_held(&msk->pm.lock); lockdep_assert_held(&msk->pm.lock);
if (add_addr) { if (add_addr &
pr_warn("addr_signal error, add_addr=%d", add_addr); (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
return -EINVAL; return -EINVAL;
} }
if (echo) {
msk->pm.remote = *addr;
add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
} else {
msk->pm.local = *addr; msk->pm.local = *addr;
add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
if (echo) }
add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
if (addr->family == AF_INET6)
add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
if (addr->port)
add_addr |= BIT(MPTCP_ADD_ADDR_PORT);
WRITE_ONCE(msk->pm.addr_signal, add_addr); WRITE_ONCE(msk->pm.addr_signal, add_addr);
return 0; return 0;
} }
...@@ -251,10 +251,14 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) ...@@ -251,10 +251,14 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
/* path manager helpers */ /* path manager helpers */
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
struct mptcp_addr_info *saddr, bool *echo, bool *port) unsigned int opt_size, unsigned int remaining,
struct mptcp_addr_info *addr, bool *echo,
bool *port, bool *drop_other_suboptions)
{ {
int ret = false; int ret = false;
u8 add_addr;
u8 family;
spin_lock_bh(&msk->pm.lock); spin_lock_bh(&msk->pm.lock);
...@@ -262,14 +266,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, ...@@ -262,14 +266,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_add_signal(msk)) if (!mptcp_pm_should_add_signal(msk))
goto out_unlock; goto out_unlock;
/* always drop every other options for pure ack ADD_ADDR; this is a
* plain dup-ack from TCP perspective. The other MPTCP-relevant info,
* if any, will be carried by the 'original' TCP ack
*/
if (skb && skb_is_tcp_pure_ack(skb)) {
remaining += opt_size;
*drop_other_suboptions = true;
}
*echo = mptcp_pm_should_add_signal_echo(msk); *echo = mptcp_pm_should_add_signal_echo(msk);
*port = mptcp_pm_should_add_signal_port(msk); *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port)) family = *echo ? msk->pm.remote.family : msk->pm.local.family;
if (remaining < mptcp_add_addr_len(family, *echo, *port))
goto out_unlock; goto out_unlock;
*saddr = msk->pm.local; if (*echo) {
WRITE_ONCE(msk->pm.addr_signal, 0); *addr = msk->pm.remote;
add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
} else {
*addr = msk->pm.local;
add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
}
WRITE_ONCE(msk->pm.addr_signal, add_addr);
ret = true; ret = true;
out_unlock: out_unlock:
...@@ -281,6 +301,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, ...@@ -281,6 +301,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list) struct mptcp_rm_list *rm_list)
{ {
int ret = false, len; int ret = false, len;
u8 rm_addr;
spin_lock_bh(&msk->pm.lock); spin_lock_bh(&msk->pm.lock);
...@@ -288,16 +309,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, ...@@ -288,16 +309,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_rm_signal(msk)) if (!mptcp_pm_should_rm_signal(msk))
goto out_unlock; goto out_unlock;
rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
if (len < 0) { if (len < 0) {
WRITE_ONCE(msk->pm.addr_signal, 0); WRITE_ONCE(msk->pm.addr_signal, rm_addr);
goto out_unlock; goto out_unlock;
} }
if (remaining < len) if (remaining < len)
goto out_unlock; goto out_unlock;
*rm_list = msk->pm.rm_list_tx; *rm_list = msk->pm.rm_list_tx;
WRITE_ONCE(msk->pm.addr_signal, 0); WRITE_ONCE(msk->pm.addr_signal, rm_addr);
ret = true; ret = true;
out_unlock: out_unlock:
......
...@@ -317,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer) ...@@ -317,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
if (!entry->addr.id) if (!entry->addr.id)
return; return;
if (mptcp_pm_should_add_signal(msk)) { if (mptcp_pm_should_add_signal_addr(msk)) {
sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
goto out; goto out;
} }
spin_lock_bh(&msk->pm.lock); spin_lock_bh(&msk->pm.lock);
if (!mptcp_pm_should_add_signal(msk)) { if (!mptcp_pm_should_add_signal_addr(msk)) {
pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk); mptcp_pm_add_addr_send_ack(msk);
...@@ -647,10 +647,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) ...@@ -647,10 +647,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
bool slow; bool slow;
spin_unlock_bh(&msk->pm.lock); spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for %s%s%s", pr_debug("send ack for %s",
mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr", mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
slow = lock_sock_fast(ssk); slow = lock_sock_fast(ssk);
tcp_send_ack(ssk); tcp_send_ack(ssk);
......
...@@ -178,8 +178,6 @@ enum mptcp_pm_status { ...@@ -178,8 +178,6 @@ enum mptcp_pm_status {
enum mptcp_addr_signal_status { enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO, MPTCP_ADD_ADDR_ECHO,
MPTCP_ADD_ADDR_IPV6,
MPTCP_ADD_ADDR_PORT,
MPTCP_RM_ADDR_SIGNAL, MPTCP_RM_ADDR_SIGNAL,
}; };
...@@ -748,22 +746,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); ...@@ -748,22 +746,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{ {
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); return READ_ONCE(msk->pm.addr_signal) &
(BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO));
} }
static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk)
{ {
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
}
static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6);
} }
static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk) static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
{ {
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT); return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
} }
static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
...@@ -794,8 +788,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) ...@@ -794,8 +788,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
} }
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
struct mptcp_addr_info *saddr, bool *echo, bool *port); unsigned int opt_size, unsigned int remaining,
struct mptcp_addr_info *addr, bool *echo,
bool *port, bool *drop_other_suboptions);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list); struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
......
...@@ -1016,6 +1016,21 @@ signal_address_tests() ...@@ -1016,6 +1016,21 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal invalid addresses" 1 1 1 chk_join_nr "signal invalid addresses" 1 1 1
chk_add_nr 3 3 chk_add_nr 3 3
# signal addresses race test
reset
ip netns exec $ns1 ./pm_nl_ctl limits 4 4
ip netns exec $ns2 ./pm_nl_ctl limits 4 4
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_add_nr 4 4
} }
link_failure_tests() link_failure_tests()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment