Commit fb64bb56 authored by Michal Ostrowski's avatar Michal Ostrowski Committed by David S. Miller

PPPoE: Fix flush/close races.

Be more careful about the state of pointers during tear-down.
The "pppoe_dev" field can only be looked at safely while holding socket locks.
This subsequently allows for the flush_lock to be killed.

We depend on the PPPOX_CONNECTED state to tell us that that those fields are
valid, so whoever clears that state (pppox_unbind_sock()) is responsible for
the dev_put() call.

We also have to ensure that we delete_item() on all sockets before they are
cleaned up.

The need for these changes has been exposed by scenarios wherein namespace
bindings of ethernet devices change while there are ongoing PPPoE sessions,
which resulted in oopses due to unusual socket connection termination paths,
exposing these issues.
Signed-off-by: default avatarMichal Ostrowski <mostrows@gmail.com>
Reviewed-by: default avatarCyril Gorcunov <gorcunov@gmail.com>
Reported-by: default avatarDenys Fedoryschenko <denys@visp.net.lb>
Tested-by: default avatarDenys Fedoryschenko <denys@visp.net.lb>
parent 5ccdcecb
...@@ -111,9 +111,6 @@ struct pppoe_net { ...@@ -111,9 +111,6 @@ struct pppoe_net {
rwlock_t hash_lock; rwlock_t hash_lock;
}; };
/* to eliminate a race btw pppoe_flush_dev and pppoe_release */
static DEFINE_SPINLOCK(flush_lock);
/* /*
* PPPoE could be in the following stages: * PPPoE could be in the following stages:
* 1) Discovery stage (to obtain remote MAC and Session ID) * 1) Discovery stage (to obtain remote MAC and Session ID)
...@@ -303,45 +300,48 @@ static void pppoe_flush_dev(struct net_device *dev) ...@@ -303,45 +300,48 @@ static void pppoe_flush_dev(struct net_device *dev)
write_lock_bh(&pn->hash_lock); write_lock_bh(&pn->hash_lock);
for (i = 0; i < PPPOE_HASH_SIZE; i++) { for (i = 0; i < PPPOE_HASH_SIZE; i++) {
struct pppox_sock *po = pn->hash_table[i]; struct pppox_sock *po = pn->hash_table[i];
struct sock *sk;
while (po != NULL) { while (po) {
struct sock *sk; while (po && po->pppoe_dev != dev) {
if (po->pppoe_dev != dev) {
po = po->next; po = po->next;
continue;
} }
if (!po)
break;
sk = sk_pppox(po); sk = sk_pppox(po);
spin_lock(&flush_lock);
po->pppoe_dev = NULL;
spin_unlock(&flush_lock);
dev_put(dev);
/* We always grab the socket lock, followed by the /* We always grab the socket lock, followed by the
* hash_lock, in that order. Since we should * hash_lock, in that order. Since we should hold the
* hold the sock lock while doing any unbinding, * sock lock while doing any unbinding, we need to
* we need to release the lock we're holding. * release the lock we're holding. Hold a reference to
* Hold a reference to the sock so it doesn't disappear * the sock so it doesn't disappear as we're jumping
* as we're jumping between locks. * between locks.
*/ */
sock_hold(sk); sock_hold(sk);
write_unlock_bh(&pn->hash_lock); write_unlock_bh(&pn->hash_lock);
lock_sock(sk); lock_sock(sk);
if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { if (po->pppoe_dev == dev
&& sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
pppox_unbind_sock(sk); pppox_unbind_sock(sk);
sk->sk_state = PPPOX_ZOMBIE; sk->sk_state = PPPOX_ZOMBIE;
sk->sk_state_change(sk); sk->sk_state_change(sk);
po->pppoe_dev = NULL;
dev_put(dev);
} }
release_sock(sk); release_sock(sk);
sock_put(sk); sock_put(sk);
/* Restart scan at the beginning of this hash chain. /* Restart the process from the start of the current
* While the lock was dropped the chain contents may * hash chain. We dropped locks so the world may have
* have changed. * change from underneath us.
*/ */
BUG_ON(pppoe_pernet(dev_net(dev)) == NULL);
write_lock_bh(&pn->hash_lock); write_lock_bh(&pn->hash_lock);
po = pn->hash_table[i]; po = pn->hash_table[i];
} }
...@@ -388,11 +388,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) ...@@ -388,11 +388,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
struct pppox_sock *po = pppox_sk(sk); struct pppox_sock *po = pppox_sk(sk);
struct pppox_sock *relay_po; struct pppox_sock *relay_po;
/* Backlog receive. Semantics of backlog rcv preclude any code from
* executing in lock_sock()/release_sock() bounds; meaning sk->sk_state
* can't change.
*/
if (sk->sk_state & PPPOX_BOUND) { if (sk->sk_state & PPPOX_BOUND) {
ppp_input(&po->chan, skb); ppp_input(&po->chan, skb);
} else if (sk->sk_state & PPPOX_RELAY) { } else if (sk->sk_state & PPPOX_RELAY) {
relay_po = get_item_by_addr(dev_net(po->pppoe_dev), relay_po = get_item_by_addr(sock_net(sk),
&po->pppoe_relay); &po->pppoe_relay);
if (relay_po == NULL) if (relay_po == NULL)
goto abort_kfree; goto abort_kfree;
...@@ -447,6 +452,10 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, ...@@ -447,6 +452,10 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop; goto drop;
pn = pppoe_pernet(dev_net(dev)); pn = pppoe_pernet(dev_net(dev));
/* Note that get_item does a sock_hold(), so sk_pppox(po)
* is known to be safe.
*/
po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
if (!po) if (!po)
goto drop; goto drop;
...@@ -561,6 +570,7 @@ static int pppoe_release(struct socket *sock) ...@@ -561,6 +570,7 @@ static int pppoe_release(struct socket *sock)
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct pppox_sock *po; struct pppox_sock *po;
struct pppoe_net *pn; struct pppoe_net *pn;
struct net *net = NULL;
if (!sk) if (!sk)
return 0; return 0;
...@@ -571,44 +581,28 @@ static int pppoe_release(struct socket *sock) ...@@ -571,44 +581,28 @@ static int pppoe_release(struct socket *sock)
return -EBADF; return -EBADF;
} }
po = pppox_sk(sk);
if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
}
pppox_unbind_sock(sk); pppox_unbind_sock(sk);
/* Signal the death of the socket. */ /* Signal the death of the socket. */
sk->sk_state = PPPOX_DEAD; sk->sk_state = PPPOX_DEAD;
/* net = sock_net(sk);
* pppoe_flush_dev could lead to a race with pn = pppoe_pernet(net);
* this routine so we use flush_lock to eliminate
* such a case (we only need per-net specific data)
*/
spin_lock(&flush_lock);
po = pppox_sk(sk);
if (!po->pppoe_dev) {
spin_unlock(&flush_lock);
goto out;
}
pn = pppoe_pernet(dev_net(po->pppoe_dev));
spin_unlock(&flush_lock);
/* /*
* protect "po" from concurrent updates * protect "po" from concurrent updates
* on pppoe_flush_dev * on pppoe_flush_dev
*/ */
write_lock_bh(&pn->hash_lock); delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
po->pppoe_ifindex);
po = pppox_sk(sk);
if (stage_session(po->pppoe_pa.sid))
__delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
po->pppoe_ifindex);
if (po->pppoe_dev) {
dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
}
write_unlock_bh(&pn->hash_lock);
out:
sock_orphan(sk); sock_orphan(sk);
sock->sk = NULL; sock->sk = NULL;
...@@ -625,8 +619,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, ...@@ -625,8 +619,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr;
struct pppox_sock *po = pppox_sk(sk); struct pppox_sock *po = pppox_sk(sk);
struct net_device *dev; struct net_device *dev = NULL;
struct pppoe_net *pn; struct pppoe_net *pn;
struct net *net = NULL;
int error; int error;
lock_sock(sk); lock_sock(sk);
...@@ -652,12 +647,14 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, ...@@ -652,12 +647,14 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Delete the old binding */ /* Delete the old binding */
if (stage_session(po->pppoe_pa.sid)) { if (stage_session(po->pppoe_pa.sid)) {
pppox_unbind_sock(sk); pppox_unbind_sock(sk);
pn = pppoe_pernet(sock_net(sk));
delete_item(pn, po->pppoe_pa.sid,
po->pppoe_pa.remote, po->pppoe_ifindex);
if (po->pppoe_dev) { if (po->pppoe_dev) {
pn = pppoe_pernet(dev_net(po->pppoe_dev));
delete_item(pn, po->pppoe_pa.sid,
po->pppoe_pa.remote, po->pppoe_ifindex);
dev_put(po->pppoe_dev); dev_put(po->pppoe_dev);
po->pppoe_dev = NULL;
} }
memset(sk_pppox(po) + 1, 0, memset(sk_pppox(po) + 1, 0,
sizeof(struct pppox_sock) - sizeof(struct sock)); sizeof(struct pppox_sock) - sizeof(struct sock));
sk->sk_state = PPPOX_NONE; sk->sk_state = PPPOX_NONE;
...@@ -666,16 +663,15 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, ...@@ -666,16 +663,15 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Re-bind in session stage only */ /* Re-bind in session stage only */
if (stage_session(sp->sa_addr.pppoe.sid)) { if (stage_session(sp->sa_addr.pppoe.sid)) {
error = -ENODEV; error = -ENODEV;
dev = dev_get_by_name(sock_net(sk), sp->sa_addr.pppoe.dev); net = sock_net(sk);
dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev);
if (!dev) if (!dev)
goto end; goto err_put;
po->pppoe_dev = dev; po->pppoe_dev = dev;
po->pppoe_ifindex = dev->ifindex; po->pppoe_ifindex = dev->ifindex;
pn = pppoe_pernet(dev_net(dev)); pn = pppoe_pernet(net);
write_lock_bh(&pn->hash_lock);
if (!(dev->flags & IFF_UP)) { if (!(dev->flags & IFF_UP)) {
write_unlock_bh(&pn->hash_lock);
goto err_put; goto err_put;
} }
...@@ -683,6 +679,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, ...@@ -683,6 +679,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
&sp->sa_addr.pppoe, &sp->sa_addr.pppoe,
sizeof(struct pppoe_addr)); sizeof(struct pppoe_addr));
write_lock_bh(&pn->hash_lock);
error = __set_item(pn, po); error = __set_item(pn, po);
write_unlock_bh(&pn->hash_lock); write_unlock_bh(&pn->hash_lock);
if (error < 0) if (error < 0)
...@@ -696,8 +693,11 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, ...@@ -696,8 +693,11 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
po->chan.ops = &pppoe_chan_ops; po->chan.ops = &pppoe_chan_ops;
error = ppp_register_net_channel(dev_net(dev), &po->chan); error = ppp_register_net_channel(dev_net(dev), &po->chan);
if (error) if (error) {
delete_item(pn, po->pppoe_pa.sid,
po->pppoe_pa.remote, po->pppoe_ifindex);
goto err_put; goto err_put;
}
sk->sk_state = PPPOX_CONNECTED; sk->sk_state = PPPOX_CONNECTED;
} }
...@@ -915,6 +915,14 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -915,6 +915,14 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
struct pppoe_hdr *ph; struct pppoe_hdr *ph;
int data_len = skb->len; int data_len = skb->len;
/* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP
* xmit operations conclude prior to an unregistration call. Thus
* sk->sk_state cannot change, so we don't need to do lock_sock().
* But, we also can't do a lock_sock since that introduces a potential
* deadlock as we'd reverse the lock ordering used when calling
* ppp_unregister_channel().
*/
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
goto abort; goto abort;
...@@ -944,7 +952,6 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -944,7 +952,6 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
po->pppoe_pa.remote, NULL, data_len); po->pppoe_pa.remote, NULL, data_len);
dev_queue_xmit(skb); dev_queue_xmit(skb);
return 1; return 1;
abort: abort:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment