Commit f214fc40 authored by Parthasarathy Bhuvaragan's avatar Parthasarathy Bhuvaragan Committed by David S. Miller

tipc: Revert "tipc: use existing sk_write_queue for outgoing packet chain"

reverts commit 94153e36 ("tipc: use existing sk_write_queue for
outgoing packet chain")

In Commit 94153e36, we assume that we fill & empty the socket's
sk_write_queue within the same lock_sock() session.

This is not true if the link is congested. During congestion, the
socket lock is released while we wait for the congestion to cease.
This implementation causes a nullptr exception, if the user space
program has several threads accessing the same socket descriptor.

Consider two threads of the same program performing the following:
     Thread1                                  Thread2
--------------------                    ----------------------
Enter tipc_sendmsg()                    Enter tipc_sendmsg()
lock_sock()                             lock_sock()
Enter tipc_link_xmit(), ret=ELINKCONG   spin on socket lock..
sk_wait_event()                             :
release_sock()                          grab socket lock
    :                                   Enter tipc_link_xmit(), ret=0
    :                                   release_sock()
Wakeup after congestion
lock_sock()
skb = skb_peek(pktchain);
!! TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;

In this case, the second thread transmits the buffers belonging to
both thread1 and thread2 successfully. When the first thread wakeup
after the congestion it assumes that the pktchain is intact and
operates on the skb's in it, which leads to the following exception:

[2102.439969] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
[2102.440074] IP: [<ffffffffa005f330>] __tipc_link_xmit+0x2b0/0x4d0 [tipc]
[2102.440074] PGD 3fa3f067 PUD 3fa6b067 PMD 0
[2102.440074] Oops: 0000 [#1] SMP
[2102.440074] CPU: 2 PID: 244 Comm: sender Not tainted 3.12.28 #1
[2102.440074] RIP: 0010:[<ffffffffa005f330>]  [<ffffffffa005f330>] __tipc_link_xmit+0x2b0/0x4d0 [tipc]
[...]
[2102.440074] Call Trace:
[2102.440074]  [<ffffffff8163f0b9>] ? schedule+0x29/0x70
[2102.440074]  [<ffffffffa006a756>] ? tipc_node_unlock+0x46/0x170 [tipc]
[2102.440074]  [<ffffffffa005f761>] tipc_link_xmit+0x51/0xf0 [tipc]
[2102.440074]  [<ffffffffa006d8ae>] tipc_send_stream+0x11e/0x4f0 [tipc]
[2102.440074]  [<ffffffff8106b150>] ? __wake_up_sync+0x20/0x20
[2102.440074]  [<ffffffffa006dc9c>] tipc_send_packet+0x1c/0x20 [tipc]
[2102.440074]  [<ffffffff81502478>] sock_sendmsg+0xa8/0xd0
[2102.440074]  [<ffffffff81507895>] ? release_sock+0x145/0x170
[2102.440074]  [<ffffffff815030d8>] ___sys_sendmsg+0x3d8/0x3e0
[2102.440074]  [<ffffffff816426ae>] ? _raw_spin_unlock+0xe/0x10
[2102.440074]  [<ffffffff81115c2a>] ? handle_mm_fault+0x6ca/0x9d0
[2102.440074]  [<ffffffff8107dd65>] ? set_next_entity+0x85/0xa0
[2102.440074]  [<ffffffff816426de>] ? _raw_spin_unlock_irq+0xe/0x20
[2102.440074]  [<ffffffff8107463c>] ? finish_task_switch+0x5c/0xc0
[2102.440074]  [<ffffffff8163ea8c>] ? __schedule+0x34c/0x950
[2102.440074]  [<ffffffff81504e12>] __sys_sendmsg+0x42/0x80
[2102.440074]  [<ffffffff81504e62>] SyS_sendmsg+0x12/0x20
[2102.440074]  [<ffffffff8164aed2>] system_call_fastpath+0x16/0x1b

In this commit, we maintain the skb list always in the stack.
Signed-off-by: default avatarParthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: default avatarYing Xue <ying.xue@windriver.com>
Acked-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1837b2e2
...@@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, ...@@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_sock *tsk = tipc_sk(sk); struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr; struct tipc_msg *mhdr = &tsk->phdr;
struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff_head pktchain;
struct iov_iter save = msg->msg_iter; struct iov_iter save = msg->msg_iter;
uint mtu; uint mtu;
int rc; int rc;
...@@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, ...@@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_nameupper(mhdr, seq->upper); msg_set_nameupper(mhdr, seq->upper);
msg_set_hdr_sz(mhdr, MCAST_H_SIZE); msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
skb_queue_head_init(&pktchain);
new_mtu: new_mtu:
mtu = tipc_bcast_get_mtu(net); mtu = tipc_bcast_get_mtu(net);
rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
if (unlikely(rc < 0)) if (unlikely(rc < 0))
return rc; return rc;
do { do {
rc = tipc_bcast_xmit(net, pktchain); rc = tipc_bcast_xmit(net, &pktchain);
if (likely(!rc)) if (likely(!rc))
return dsz; return dsz;
...@@ -704,7 +706,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, ...@@ -704,7 +706,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
if (!rc) if (!rc)
continue; continue;
} }
__skb_queue_purge(pktchain); __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) { if (rc == -EMSGSIZE) {
msg->msg_iter = save; msg->msg_iter = save;
goto new_mtu; goto new_mtu;
...@@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr; struct tipc_msg *mhdr = &tsk->phdr;
u32 dnode, dport; u32 dnode, dport;
struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff_head pktchain;
struct sk_buff *skb; struct sk_buff *skb;
struct tipc_name_seq *seq; struct tipc_name_seq *seq;
struct iov_iter save; struct iov_iter save;
...@@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
msg_set_hdr_sz(mhdr, BASIC_H_SIZE); msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
} }
skb_queue_head_init(&pktchain);
save = m->msg_iter; save = m->msg_iter;
new_mtu: new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid); mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
if (rc < 0) if (rc < 0)
return rc; return rc;
do { do {
skb = skb_peek(pktchain); skb = skb_peek(&pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
if (likely(!rc)) { if (likely(!rc)) {
if (sock->state != SS_READY) if (sock->state != SS_READY)
sock->state = SS_CONNECTING; sock->state = SS_CONNECTING;
...@@ -946,7 +949,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -946,7 +949,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
if (!rc) if (!rc)
continue; continue;
} }
__skb_queue_purge(pktchain); __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) { if (rc == -EMSGSIZE) {
m->msg_iter = save; m->msg_iter = save;
goto new_mtu; goto new_mtu;
...@@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk); struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *mhdr = &tsk->phdr; struct tipc_msg *mhdr = &tsk->phdr;
struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff_head pktchain;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
u32 portid = tsk->portid; u32 portid = tsk->portid;
int rc = -EINVAL; int rc = -EINVAL;
...@@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
dnode = tsk_peer_node(tsk); dnode = tsk_peer_node(tsk);
skb_queue_head_init(&pktchain);
next: next:
save = m->msg_iter; save = m->msg_iter;
mtu = tsk->max_pkt; mtu = tsk->max_pkt;
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
if (unlikely(rc < 0)) if (unlikely(rc < 0))
return rc; return rc;
do { do {
if (likely(!tsk_conn_cong(tsk))) { if (likely(!tsk_conn_cong(tsk))) {
rc = tipc_node_xmit(net, pktchain, dnode, portid); rc = tipc_node_xmit(net, &pktchain, dnode, portid);
if (likely(!rc)) { if (likely(!rc)) {
tsk->sent_unacked++; tsk->sent_unacked++;
sent += send; sent += send;
...@@ -1063,7 +1068,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -1063,7 +1068,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
goto next; goto next;
} }
if (rc == -EMSGSIZE) { if (rc == -EMSGSIZE) {
__skb_queue_purge(pktchain); __skb_queue_purge(&pktchain);
tsk->max_pkt = tipc_node_get_mtu(net, dnode, tsk->max_pkt = tipc_node_get_mtu(net, dnode,
portid); portid);
m->msg_iter = save; m->msg_iter = save;
...@@ -1077,7 +1082,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) ...@@ -1077,7 +1082,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
rc = tipc_wait_for_sndpkt(sock, &timeo); rc = tipc_wait_for_sndpkt(sock, &timeo);
} while (!rc); } while (!rc);
__skb_queue_purge(pktchain); __skb_queue_purge(&pktchain);
return sent ? sent : rc; return sent ? sent : rc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment