Commit 02ec6caf authored by Hoang Le's avatar Hoang Le Committed by David S. Miller

tipc: support broadcast/replicast configurable for bc-link

Currently, a multicast stream uses either broadcast or replicast as
transmission method, based on the ratio between number of actual
destinations nodes and cluster size.

However, when an L2 interface (e.g., VXLAN) provides pseudo
broadcast support, this becomes very inefficient, as it blindly
replicates multicast packets to all cluster/subnet nodes,
irrespective of whether they host actual target sockets or not.

The TIPC multicast algorithm is able to distinguish real destination
nodes from other nodes, and hence provides a smarter and more
efficient method for transferring multicast messages than
pseudo broadcast can do.

Because of this, we now make it possible for users to force
the broadcast link to permanently switch to using replicast,
irrespective of which capabilities the bearer provides,
or pretend to provide.
Conversely, we also make it possible to force the broadcast link
to always use true broadcast. While maybe less useful in
deployed systems, this may at least be useful for testing the
broadcast algorithm in small clusters.

We retain the current AUTOSELECT ability, i.e., to let the broadcast link
automatically select which algorithm to use, and to switch back and forth
between broadcast and replicast as the ratio between destination
node number and cluster size changes. This remains the default method.

Furthermore, we make it possible to configure the threshold ratio for
such switches. The default ratio is now set to 10%, down from 25% in the
earlier implementation.
Acked-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarHoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 310974fa
...@@ -281,6 +281,8 @@ enum { ...@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_TOL, /* u32 */
TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */
TIPC_NLA_PROP_MTU, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */
TIPC_NLA_PROP_BROADCAST, /* u32 */
TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */
__TIPC_NLA_PROP_MAX, __TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
......
...@@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; ...@@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link";
* @dests: array keeping number of reachable destinations per bearer * @dests: array keeping number of reachable destinations per bearer
* @primary_bearer: a bearer having links to all broadcast destinations, if any * @primary_bearer: a bearer having links to all broadcast destinations, if any
* @bcast_support: indicates if primary bearer, if any, supports broadcast * @bcast_support: indicates if primary bearer, if any, supports broadcast
* @force_bcast: forces broadcast for multicast traffic
* @rcast_support: indicates if all peer nodes support replicast * @rcast_support: indicates if all peer nodes support replicast
* @force_rcast: forces replicast for multicast traffic
* @rc_ratio: dest count as percentage of cluster size where send method changes * @rc_ratio: dest count as percentage of cluster size where send method changes
* @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
*/ */
...@@ -64,7 +66,9 @@ struct tipc_bc_base { ...@@ -64,7 +66,9 @@ struct tipc_bc_base {
int dests[MAX_BEARERS]; int dests[MAX_BEARERS];
int primary_bearer; int primary_bearer;
bool bcast_support; bool bcast_support;
bool force_bcast;
bool rcast_support; bool rcast_support;
bool force_rcast;
int rc_ratio; int rc_ratio;
int bc_threshold; int bc_threshold;
}; };
...@@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) ...@@ -485,10 +489,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
return 0; return 0;
} }
static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
switch (bc_mode) {
case BCLINK_MODE_BCAST:
if (!bb->bcast_support)
return -ENOPROTOOPT;
bb->force_bcast = true;
bb->force_rcast = false;
break;
case BCLINK_MODE_RCAST:
if (!bb->rcast_support)
return -ENOPROTOOPT;
bb->force_bcast = false;
bb->force_rcast = true;
break;
case BCLINK_MODE_SEL:
if (!bb->bcast_support || !bb->rcast_support)
return -ENOPROTOOPT;
bb->force_bcast = false;
bb->force_rcast = false;
break;
default:
return -EINVAL;
}
return 0;
}
static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
if (!bb->bcast_support || !bb->rcast_support)
return -ENOPROTOOPT;
if (bc_ratio > 100 || bc_ratio <= 0)
return -EINVAL;
bb->rc_ratio = bc_ratio;
tipc_bcast_lock(net);
tipc_bcbase_calc_bc_threshold(net);
tipc_bcast_unlock(net);
return 0;
}
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
{ {
int err; int err;
u32 win; u32 win;
u32 bc_mode;
u32 bc_ratio;
struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
if (!attrs[TIPC_NLA_LINK_PROP]) if (!attrs[TIPC_NLA_LINK_PROP])
...@@ -498,12 +555,28 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) ...@@ -498,12 +555,28 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
if (err) if (err)
return err; return err;
if (!props[TIPC_NLA_PROP_WIN]) if (!props[TIPC_NLA_PROP_WIN] &&
!props[TIPC_NLA_PROP_BROADCAST] &&
!props[TIPC_NLA_PROP_BROADCAST_RATIO]) {
return -EOPNOTSUPP; return -EOPNOTSUPP;
}
if (props[TIPC_NLA_PROP_BROADCAST]) {
bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]);
err = tipc_bc_link_set_broadcast_mode(net, bc_mode);
}
win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) {
bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]);
err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio);
}
return tipc_bc_link_set_queue_limits(net, win); if (!err && props[TIPC_NLA_PROP_WIN]) {
win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
err = tipc_bc_link_set_queue_limits(net, win);
}
return err;
} }
int tipc_bcast_init(struct net *net) int tipc_bcast_init(struct net *net)
...@@ -529,7 +602,7 @@ int tipc_bcast_init(struct net *net) ...@@ -529,7 +602,7 @@ int tipc_bcast_init(struct net *net)
goto enomem; goto enomem;
bb->link = l; bb->link = l;
tn->bcl = l; tn->bcl = l;
bb->rc_ratio = 25; bb->rc_ratio = 10;
bb->rcast_support = true; bb->rcast_support = true;
return 0; return 0;
enomem: enomem:
...@@ -576,3 +649,26 @@ void tipc_nlist_purge(struct tipc_nlist *nl) ...@@ -576,3 +649,26 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
nl->remote = 0; nl->remote = 0;
nl->local = false; nl->local = false;
} }
u32 tipc_bcast_get_broadcast_mode(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
if (bb->force_bcast)
return BCLINK_MODE_BCAST;
if (bb->force_rcast)
return BCLINK_MODE_RCAST;
if (bb->bcast_support && bb->rcast_support)
return BCLINK_MODE_SEL;
return 0;
}
u32 tipc_bcast_get_broadcast_ratio(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
return bb->rc_ratio;
}
...@@ -48,6 +48,10 @@ extern const char tipc_bclink_name[]; ...@@ -48,6 +48,10 @@ extern const char tipc_bclink_name[];
#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
#define BCLINK_MODE_BCAST 0x1
#define BCLINK_MODE_RCAST 0x2
#define BCLINK_MODE_SEL 0x4
struct tipc_nlist { struct tipc_nlist {
struct list_head list; struct list_head list;
u32 self; u32 self;
...@@ -92,6 +96,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); ...@@ -92,6 +96,9 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
int tipc_bclink_reset_stats(struct net *net); int tipc_bclink_reset_stats(struct net *net);
u32 tipc_bcast_get_broadcast_mode(struct net *net);
u32 tipc_bcast_get_broadcast_ratio(struct net *net);
static inline void tipc_bcast_lock(struct net *net) static inline void tipc_bcast_lock(struct net *net)
{ {
spin_lock_bh(&tipc_net(net)->bclock); spin_lock_bh(&tipc_net(net)->bclock);
......
...@@ -2197,6 +2197,8 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) ...@@ -2197,6 +2197,8 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
struct nlattr *attrs; struct nlattr *attrs;
struct nlattr *prop; struct nlattr *prop;
struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
struct tipc_link *bcl = tn->bcl; struct tipc_link *bcl = tn->bcl;
if (!bcl) if (!bcl)
...@@ -2233,6 +2235,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) ...@@ -2233,6 +2235,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
goto attr_msg_full; goto attr_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window))
goto prop_msg_full; goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode))
goto prop_msg_full;
if (bc_mode & BCLINK_MODE_SEL)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO,
bc_ratio))
goto prop_msg_full;
nla_nest_end(msg->skb, prop); nla_nest_end(msg->skb, prop);
err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats);
......
...@@ -110,7 +110,9 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { ...@@ -110,7 +110,9 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
[TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
[TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
[TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
[TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 },
[TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 }
}; };
const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment