Commit 7c4a54b9 authored by Jon Paul Maloy's avatar Jon Paul Maloy Committed by David S. Miller

tipc: rate limit broadcast retransmissions

As cluster sizes grow, so does the amount of identical or overlapping
broadcast NACKs generated by the packet receivers. This often leads to
'NACK crunches' resulting in huge numbers of redundant retransmissions
of the same packet ranges.

In this commit, we introduce rate control of broadcast retransmissions,
so that a retransmitted range cannot be retransmitted again until after
at least 10 ms. This reduces the frequency of duplicate, redundant
retransmissions by an order of magnitude, while having a significant
positive impact on overall throughput and scalability.
Reviewed-by: default avatarYing Xue <ying.xue@windriver.com>
Signed-off-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 02d11ca2
...@@ -181,7 +181,10 @@ struct tipc_link { ...@@ -181,7 +181,10 @@ struct tipc_link {
u16 acked; u16 acked;
struct tipc_link *bc_rcvlink; struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink; struct tipc_link *bc_sndlink;
int nack_state; unsigned long prev_retr;
u16 prev_from;
u16 prev_to;
u8 nack_state;
bool bc_peer_is_up; bool bc_peer_is_up;
/* Statistics */ /* Statistics */
...@@ -202,6 +205,8 @@ enum { ...@@ -202,6 +205,8 @@ enum {
BC_NACK_SND_SUPPRESS, BC_NACK_SND_SUPPRESS,
}; };
#define TIPC_BC_RETR_LIMIT 10 /* [ms] */
/* /*
* Interval between NACKs when packets arrive out of order * Interval between NACKs when packets arrive out of order
*/ */
...@@ -1590,11 +1595,48 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) ...@@ -1590,11 +1595,48 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
l->rcv_nxt = peers_snd_nxt; l->rcv_nxt = peers_snd_nxt;
} }
/* link_bc_retr eval()- check if the indicated range can be retransmitted now
* - Adjust permitted range if there is overlap with previous retransmission
*/
static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
{
unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
if (less(*to, *from))
return false;
/* New retransmission request */
if ((elapsed > TIPC_BC_RETR_LIMIT) ||
less(*to, l->prev_from) || more(*from, l->prev_to)) {
l->prev_from = *from;
l->prev_to = *to;
l->prev_retr = jiffies;
return true;
}
/* Inside range of previous retransmit */
if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
return false;
/* Fully or partially outside previous range => exclude overlap */
if (less(*from, l->prev_from)) {
*to = l->prev_from - 1;
l->prev_from = *from;
}
if (more(*to, l->prev_to)) {
*from = l->prev_to + 1;
l->prev_to = *to;
}
l->prev_retr = jiffies;
return true;
}
/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
*/ */
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq) struct sk_buff_head *xmitq)
{ {
struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
u16 from = msg_bcast_ack(hdr) + 1; u16 from = msg_bcast_ack(hdr) + 1;
u16 to = from + msg_bc_gap(hdr) - 1; u16 to = from + msg_bc_gap(hdr) - 1;
...@@ -1613,14 +1655,14 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, ...@@ -1613,14 +1655,14 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
if (!l->bc_peer_is_up) if (!l->bc_peer_is_up)
return rc; return rc;
l->stats.recv_nacks++;
/* Ignore if peers_snd_nxt goes beyond receive window */ /* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window)) if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc; return rc;
if (!less(to, from)) { if (link_bc_retr_eval(snd_l, &from, &to))
rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); rc = tipc_link_retrans(snd_l, from, to, xmitq);
l->stats.recv_nacks++;
}
l->snd_nxt = peers_snd_nxt; l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l)) if (link_bc_rcv_gap(l))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment