Commit f21e897e authored by Jon Paul Maloy's avatar Jon Paul Maloy Committed by David S. Miller

tipc: improve link congestion algorithm

The link congestion algorithm used until now implies two problems.

- It is too generous towards lower-level messages in situations of high
  load by giving "absolute" bandwidth guarantees to the different
  priority levels. LOW traffic is guaranteed 10%, MEDIUM is guaranted
  20%, HIGH is guaranteed 30%, and CRITICAL is guaranteed 40% of the
  available bandwidth. But, in the absence of higher level traffic, the
  ratio between two distinct levels becomes unreasonable. E.g. if there
  is only LOW and MEDIUM traffic on a system, the former is guaranteed
  1/3 of the bandwidth, and the latter 2/3. This again means that if
  there is e.g. one LOW user and 10 MEDIUM users, the  former will have
  33.3% of the bandwidth, and the others will have to compete for the
  remainder, i.e. each will end up with 6.7% of the capacity.

- Packets of type MSG_BUNDLER are created at SYSTEM importance level,
  but only after the packets bundled into it have passed the congestion
  test for their own respective levels. Since bundled packets don't
  result in incrementing the level counter for their own importance,
  only occasionally for the SYSTEM level counter, they do in practice
  obtain SYSTEM level importance. Hence, the current implementation
  provides a gap in the congestion algorithm that in the worst case
  may lead to a link reset.

We now refine the congestion algorithm as follows:

- A message is accepted to the link backlog only if its own level
  counter, and all superior level counters, permit it.

- The importance of a created bundle packet is set according to its
  contents. A bundle packet created from messges at levels LOW to
  CRITICAL is given importance level CRITICAL, while a bundle created
  from a SYSTEM level message is given importance SYSTEM. In the latter
  case only subsequent SYSTEM level messages are allowed to be bundled
  into it.

This solves the first problem described above, by making the bandwidth
guarantee relative to the total number of users at all levels; only
the upper limit for each level remains absolute. In the example
described above, the single LOW user would use 1/11th of the bandwidth,
the same as each of the ten MEDIUM users, but he still has the same
guarantee against starvation as the latter ones.

The fix also solves the second problem. If the CRITICAL level is filled
up by bundle packets of that level, no lower level packets will be
accepted any more.
Suggested-by: default avatarGergely Kiss <gergely.kiss@ericsson.com>
Reviewed-by: default avatarYing Xue <ying.xue@windriver.com>
Signed-off-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent cd4eee3c
...@@ -645,7 +645,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, ...@@ -645,7 +645,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
{ {
struct tipc_msg *msg = buf_msg(skb_peek(list)); struct tipc_msg *msg = buf_msg(skb_peek(list));
unsigned int maxwin = link->window; unsigned int maxwin = link->window;
unsigned int imp = msg_importance(msg); unsigned int i, imp = msg_importance(msg);
uint mtu = link->mtu; uint mtu = link->mtu;
u16 ack = mod(link->rcv_nxt - 1); u16 ack = mod(link->rcv_nxt - 1);
u16 seqno = link->snd_nxt; u16 seqno = link->snd_nxt;
...@@ -655,10 +655,11 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, ...@@ -655,10 +655,11 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff_head *backlogq = &link->backlogq;
struct sk_buff *skb, *tmp; struct sk_buff *skb, *tmp;
/* Match backlog limit against msg importance: */ /* Match msg importance against this and all higher backlog limits: */
if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
return link_schedule_user(link, list); return link_schedule_user(link, list);
}
if (unlikely(msg_size(msg) > mtu)) { if (unlikely(msg_size(msg) > mtu)) {
__skb_queue_purge(list); __skb_queue_purge(list);
return -EMSGSIZE; return -EMSGSIZE;
......
...@@ -365,6 +365,9 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) ...@@ -365,6 +365,9 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu)
return false; return false;
if (unlikely(max < (start + msz))) if (unlikely(max < (start + msz)))
return false; return false;
if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
(msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
return false;
skb_put(bskb, pad + msz); skb_put(bskb, pad + msz);
skb_copy_to_linear_data_offset(bskb, start, skb->data, msz); skb_copy_to_linear_data_offset(bskb, start, skb->data, msz);
...@@ -448,6 +451,10 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) ...@@ -448,6 +451,10 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode)
bmsg = buf_msg(bskb); bmsg = buf_msg(bskb);
tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
INT_H_SIZE, dnode); INT_H_SIZE, dnode);
if (msg_isdata(msg))
msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE);
else
msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE);
msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_seqno(bmsg, msg_seqno(msg));
msg_set_ack(bmsg, msg_ack(msg)); msg_set_ack(bmsg, msg_ack(msg));
msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
......
...@@ -352,18 +352,22 @@ static inline void msg_set_seqno(struct tipc_msg *m, u16 n) ...@@ -352,18 +352,22 @@ static inline void msg_set_seqno(struct tipc_msg *m, u16 n)
*/ */
static inline u32 msg_importance(struct tipc_msg *m) static inline u32 msg_importance(struct tipc_msg *m)
{ {
if (unlikely(msg_user(m) == MSG_FRAGMENTER)) int usr = msg_user(m);
if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m)))
return usr;
if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))
return msg_bits(m, 5, 13, 0x7); return msg_bits(m, 5, 13, 0x7);
if (likely(msg_isdata(m) && !msg_errcode(m)))
return msg_user(m);
return TIPC_SYSTEM_IMPORTANCE; return TIPC_SYSTEM_IMPORTANCE;
} }
static inline void msg_set_importance(struct tipc_msg *m, u32 i) static inline void msg_set_importance(struct tipc_msg *m, u32 i)
{ {
if (unlikely(msg_user(m) == MSG_FRAGMENTER)) int usr = msg_user(m);
if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)))
msg_set_bits(m, 5, 13, 0x7, i); msg_set_bits(m, 5, 13, 0x7, i);
else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) else if (i < TIPC_SYSTEM_IMPORTANCE)
msg_set_user(m, i); msg_set_user(m, i);
else else
pr_warn("Trying to set illegal importance in message\n"); pr_warn("Trying to set illegal importance in message\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment