Commit 7d231e3f authored by David S. Miller's avatar David S. Miller

Merge branch 'tipc-next'

Jon Maloy says:

====================
tipc: resolve message disordering problem

When TIPC receives messages from multi-threaded device drivers it may
occasionally deliver messages to their destination sockets in the wrong
order. This happens despite correct resequencing at the link layer,
because the upcall path from link to socket is not protected by any
locks.

These commits solve this problem by introducing an 'input' message
queue in each link, through which messages must be delivered to the
upper layers.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4134069f cb1b7280
/*
* net/tipc/bcast.c: TIPC broadcast code
*
* Copyright (c) 2004-2006, 2014, Ericsson AB
* Copyright (c) 2004-2006, 2014-2015, Ericsson AB
* Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
......@@ -79,6 +79,13 @@ static void tipc_bclink_unlock(struct net *net)
tipc_link_reset_all(node);
}
void tipc_bclink_input(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq);
}
uint tipc_bclink_get_mtu(void)
{
return MAX_PKT_DEFAULT_MCAST;
......@@ -189,10 +196,8 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
void tipc_bclink_wakeup_users(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct sk_buff *skb;
while ((skb = skb_dequeue(&tn->bclink->link.waiting_sks)))
tipc_sk_rcv(net, skb);
tipc_sk_rcv(net, &tn->bclink->link.wakeupq);
}
/**
......@@ -271,9 +276,8 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
tipc_link_push_packets(tn->bcl);
bclink_set_last_sent(net);
}
if (unlikely(released && !skb_queue_empty(&tn->bcl->waiting_sks)))
if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq)))
n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
exit:
tipc_bclink_unlock(net);
}
......@@ -283,10 +287,11 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
*
* RCU and node lock set
*/
void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
void tipc_bclink_update_link_state(struct tipc_node *n_ptr,
u32 last_sent)
{
struct sk_buff *buf;
struct net *net = n_ptr->net;
struct tipc_net *tn = net_generic(net, tipc_net_id);
/* Ignore "stale" link state info */
......@@ -317,7 +322,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue);
u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent;
tipc_msg_init(net, msg, BCAST_PROTOCOL, STATE_MSG,
tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG,
INT_H_SIZE, n_ptr->addr);
msg_set_non_seq(msg, 1);
msg_set_mc_netid(msg, tn->net_id);
......@@ -358,7 +363,7 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
tipc_node_unlock(n_ptr);
}
/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster
/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
* and to identified node local sockets
* @net: the applicable net namespace
* @list: chain of buffers containing message
......@@ -373,6 +378,8 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
int rc = 0;
int bc = 0;
struct sk_buff *skb;
struct sk_buff_head arrvq;
struct sk_buff_head inputq;
/* Prepare clone of message for local node */
skb = tipc_msg_reassemble(list);
......@@ -381,7 +388,7 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
return -EHOSTUNREACH;
}
/* Broadcast to all other nodes */
/* Broadcast to all nodes */
if (likely(bclink)) {
tipc_bclink_lock(net);
if (likely(bclink->bcast_nodes.count)) {
......@@ -401,12 +408,15 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
if (unlikely(!bc))
__skb_queue_purge(list);
/* Deliver message clone */
if (likely(!rc))
tipc_sk_mcast_rcv(net, skb);
else
if (unlikely(rc)) {
kfree_skb(skb);
return rc;
}
/* Deliver message clone */
__skb_queue_head_init(&arrvq);
skb_queue_head_init(&inputq);
__skb_queue_tail(&arrvq, skb);
tipc_sk_mcast_rcv(net, &arrvq, &inputq);
return rc;
}
......@@ -449,6 +459,9 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
u32 next_in;
u32 seqno;
int deferred = 0;
int pos = 0;
struct sk_buff *iskb;
struct sk_buff_head *arrvq, *inputq;
/* Screen out unwanted broadcast messages */
if (msg_mc_netid(msg) != tn->net_id)
......@@ -485,6 +498,8 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
/* Handle in-sequence broadcast message */
seqno = msg_seqno(msg);
next_in = mod(node->bclink.last_in + 1);
arrvq = &tn->bclink->arrvq;
inputq = &tn->bclink->inputq;
if (likely(seqno == next_in)) {
receive:
......@@ -492,20 +507,26 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
if (likely(msg_isdata(msg))) {
tipc_bclink_lock(net);
bclink_accept_pkt(node, seqno);
spin_lock_bh(&inputq->lock);
__skb_queue_tail(arrvq, buf);
spin_unlock_bh(&inputq->lock);
node->action_flags |= TIPC_BCAST_MSG_EVT;
tipc_bclink_unlock(net);
tipc_node_unlock(node);
if (likely(msg_mcast(msg)))
tipc_sk_mcast_rcv(net, buf);
else
kfree_skb(buf);
} else if (msg_user(msg) == MSG_BUNDLER) {
tipc_bclink_lock(net);
bclink_accept_pkt(node, seqno);
bcl->stats.recv_bundles++;
bcl->stats.recv_bundled += msg_msgcnt(msg);
pos = 0;
while (tipc_msg_extract(buf, &iskb, &pos)) {
spin_lock_bh(&inputq->lock);
__skb_queue_tail(arrvq, iskb);
spin_unlock_bh(&inputq->lock);
}
node->action_flags |= TIPC_BCAST_MSG_EVT;
tipc_bclink_unlock(net);
tipc_node_unlock(node);
tipc_link_bundle_rcv(net, buf);
} else if (msg_user(msg) == MSG_FRAGMENTER) {
tipc_buf_append(&node->bclink.reasm_buf, &buf);
if (unlikely(!buf && !node->bclink.reasm_buf))
......@@ -521,12 +542,6 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
}
tipc_bclink_unlock(net);
tipc_node_unlock(node);
} else if (msg_user(msg) == NAME_DISTRIBUTOR) {
tipc_bclink_lock(net);
bclink_accept_pkt(node, seqno);
tipc_bclink_unlock(net);
tipc_node_unlock(node);
tipc_named_rcv(net, buf);
} else {
tipc_bclink_lock(net);
bclink_accept_pkt(node, seqno);
......@@ -943,10 +958,11 @@ int tipc_bclink_init(struct net *net)
spin_lock_init(&bclink->lock);
__skb_queue_head_init(&bcl->outqueue);
__skb_queue_head_init(&bcl->deferred_queue);
skb_queue_head_init(&bcl->waiting_sks);
skb_queue_head_init(&bcl->wakeupq);
bcl->next_out_no = 1;
spin_lock_init(&bclink->node.lock);
__skb_queue_head_init(&bclink->node.waiting_sks);
__skb_queue_head_init(&bclink->arrvq);
skb_queue_head_init(&bclink->inputq);
bcl->owner = &bclink->node;
bcl->owner->net = net;
bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
......@@ -954,6 +970,8 @@ int tipc_bclink_init(struct net *net)
bcl->bearer_id = MAX_BEARERS;
rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
bcl->state = WORKING_WORKING;
bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
msg_set_prevnode(bcl->pmsg, tn->own_addr);
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
tn->bcbearer = bcbearer;
tn->bclink = bclink;
......@@ -1032,50 +1050,3 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a,
}
}
}
/**
* tipc_port_list_add - add a port to a port list, ensuring no duplicates
*/
void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port)
{
struct tipc_port_list *item = pl_ptr;
int i;
int item_sz = PLSIZE;
int cnt = pl_ptr->count;
for (; ; cnt -= item_sz, item = item->next) {
if (cnt < PLSIZE)
item_sz = cnt;
for (i = 0; i < item_sz; i++)
if (item->ports[i] == port)
return;
if (i < PLSIZE) {
item->ports[i] = port;
pl_ptr->count++;
return;
}
if (!item->next) {
item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
if (!item->next) {
pr_warn("Incomplete multicast delivery, no memory\n");
return;
}
item->next->next = NULL;
}
}
}
/**
* tipc_port_list_free - free dynamically created entries in port_list chain
*
*/
void tipc_port_list_free(struct tipc_port_list *pl_ptr)
{
struct tipc_port_list *item;
struct tipc_port_list *next;
for (item = pl_ptr->next; item; item = next) {
next = item->next;
kfree(item);
}
}
/*
* net/tipc/bcast.h: Include file for TIPC broadcast code
*
* Copyright (c) 2003-2006, 2014, Ericsson AB
* Copyright (c) 2003-2006, 2014-2015, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
......@@ -41,22 +41,6 @@
#include "link.h"
#include "node.h"
#define TIPC_BCLINK_RESET 1
#define PLSIZE 32
#define BCBEARER MAX_BEARERS
/**
* struct tipc_port_list - set of node local destination ports
* @count: # of ports in set (only valid for first entry in list)
* @next: pointer to next entry in list
* @ports: array of port references
*/
struct tipc_port_list {
int count;
struct tipc_port_list *next;
u32 ports[PLSIZE];
};
/**
* struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
* @primary: pointer to primary bearer
......@@ -71,6 +55,9 @@ struct tipc_bcbearer_pair {
struct tipc_bearer *secondary;
};
#define TIPC_BCLINK_RESET 1
#define BCBEARER MAX_BEARERS
/**
* struct tipc_bcbearer - bearer used by broadcast link
* @bearer: (non-standard) broadcast bearer structure
......@@ -110,6 +97,8 @@ struct tipc_bclink {
struct tipc_link link;
struct tipc_node node;
unsigned int flags;
struct sk_buff_head arrvq;
struct sk_buff_head inputq;
struct tipc_node_map bcast_nodes;
struct tipc_node *retransmit_to;
};
......@@ -126,9 +115,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
return !memcmp(nm_a, nm_b, sizeof(*nm_a));
}
void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
void tipc_port_list_free(struct tipc_port_list *pl_ptr);
int tipc_bclink_init(struct net *net);
void tipc_bclink_stop(struct net *net);
void tipc_bclink_set_flags(struct net *tn, unsigned int flags);
......@@ -139,7 +125,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
u32 tipc_bclink_get_last_sent(struct net *net);
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
void tipc_bclink_update_link_state(struct tipc_node *node,
u32 last_sent);
int tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size);
int tipc_bclink_reset_stats(struct net *net);
......@@ -150,5 +136,6 @@ uint tipc_bclink_get_mtu(void);
int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
void tipc_bclink_wakeup_users(struct net *net);
int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
void tipc_bclink_input(struct net *net);
#endif
......@@ -85,7 +85,8 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
u32 dest_domain = b_ptr->domain;
msg = buf_msg(buf);
tipc_msg_init(net, msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
INT_H_SIZE, dest_domain);
msg_set_non_seq(msg, 1);
msg_set_node_sig(msg, tn->random);
msg_set_dest_domain(msg, dest_domain);
......
This diff is collapsed.
......@@ -131,8 +131,10 @@ struct tipc_stats {
* @next_in_no: next sequence number to expect for inbound messages
* @deferred_queue: deferred queue saved OOS b'cast message received from node
* @unacked_window: # of inbound messages rx'd without ack'ing back to peer
* @inputq: buffer queue for messages to be delivered upwards
* @namedq: buffer queue for name table messages to be delivered upwards
* @next_out: ptr to first unsent outbound message in queue
* @waiting_sks: linked list of sockets waiting for link congestion to abate
* @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
* @reasm_buf: head of partially reassembled inbound message fragments
* @stats: collects statistics regarding link activity
......@@ -184,10 +186,12 @@ struct tipc_link {
u32 next_in_no;
struct sk_buff_head deferred_queue;
u32 unacked_window;
struct sk_buff_head inputq;
struct sk_buff_head namedq;
/* Congestion handling */
struct sk_buff *next_out;
struct sk_buff_head waiting_sks;
struct sk_buff_head wakeupq;
/* Fragmentation/reassembly */
u32 long_msg_seq_no;
......@@ -228,7 +232,6 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
u32 selector);
int __tipc_link_xmit(struct net *net, struct tipc_link *link,
struct sk_buff_head *list);
void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf);
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
void tipc_link_push_packets(struct tipc_link *l_ptr);
......@@ -244,6 +247,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
void link_prepare_wakeup(struct tipc_link *l);
/*
* Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
......@@ -278,6 +282,10 @@ static inline u32 lesser(u32 left, u32 right)
return less_eq(left, right) ? left : right;
}
static inline u32 link_own_addr(struct tipc_link *l)
{
return msg_prevnode(l->pmsg);
}
/*
* Link status checking routines
......
......@@ -70,25 +70,23 @@ struct sk_buff *tipc_buf_acquire(u32 size)
return skb;
}
void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 destnode)
void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 dnode)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
memset(m, 0, hsize);
msg_set_version(m);
msg_set_user(m, user);
msg_set_hdr_sz(m, hsize);
msg_set_size(m, hsize);
msg_set_prevnode(m, tn->own_addr);
msg_set_prevnode(m, own_node);
msg_set_type(m, type);
if (hsize > SHORT_H_SIZE) {
msg_set_orignode(m, tn->own_addr);
msg_set_destnode(m, destnode);
msg_set_orignode(m, own_node);
msg_set_destnode(m, dnode);
}
}
struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
struct sk_buff *tipc_msg_create(uint user, uint type,
uint hdr_sz, uint data_sz, u32 dnode,
u32 onode, u32 dport, u32 oport, int errcode)
{
......@@ -100,9 +98,8 @@ struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
return NULL;
msg = buf_msg(buf);
tipc_msg_init(net, msg, user, type, hdr_sz, dnode);
tipc_msg_init(onode, msg, user, type, hdr_sz, dnode);
msg_set_size(msg, hdr_sz + data_sz);
msg_set_prevnode(msg, onode);
msg_set_origport(msg, oport);
msg_set_destport(msg, dport);
msg_set_errcode(msg, errcode);
......@@ -195,7 +192,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
*
* Returns message data size or errno: -ENOMEM, -EFAULT
*/
int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int pktmax, struct sk_buff_head *list)
{
int mhsz = msg_hdr_sz(mhdr);
......@@ -227,8 +224,8 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
}
/* Prepare reusable fragment header */
tipc_msg_init(net, &pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE,
msg_destnode(mhdr));
tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER,
FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr));
msg_set_size(&pkthdr, pktmax);
msg_set_fragm_no(&pkthdr, pktno);
......@@ -329,6 +326,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
return true;
}
/**
* tipc_msg_extract(): extract bundled inner packet from buffer
* @skb: linear outer buffer, to be extracted from.
* @iskb: extracted inner buffer, to be returned
* @pos: position of msg to be extracted. Returns with pointer of next msg
* Consumes outer buffer when last packet extracted
* Returns true when when there is an extracted buffer, otherwise false
*/
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
{
struct tipc_msg *msg = buf_msg(skb);
int imsz;
struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos);
/* Is there space left for shortest possible message? */
if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE))
goto none;
imsz = msg_size(imsg);
/* Is there space left for current message ? */
if ((*pos + imsz) > msg_data_sz(msg))
goto none;
*iskb = tipc_buf_acquire(imsz);
if (!*iskb)
goto none;
skb_copy_to_linear_data(*iskb, imsg, imsz);
*pos += align(imsz);
return true;
none:
kfree_skb(skb);
*iskb = NULL;
return false;
}
/**
* tipc_msg_make_bundle(): Create bundle buf and append message to its tail
* @list: the buffer chain
......@@ -338,7 +369,7 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
* Replaces buffer if successful
* Returns true if success, otherwise false
*/
bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
bool tipc_msg_make_bundle(struct sk_buff_head *list,
struct sk_buff *skb, u32 mtu, u32 dnode)
{
struct sk_buff *bskb;
......@@ -362,7 +393,8 @@ bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
skb_trim(bskb, INT_H_SIZE);
bmsg = buf_msg(bskb);
tipc_msg_init(net, bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode);
tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
INT_H_SIZE, dnode);
msg_set_seqno(bmsg, msg_seqno(msg));
msg_set_ack(bmsg, msg_ack(msg));
msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
......@@ -379,10 +411,9 @@ bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
* Consumes buffer if failure
* Returns true if success, otherwise false
*/
bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
int err)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_msg *msg = buf_msg(buf);
uint imp = msg_importance(msg);
struct tipc_msg ohdr;
......@@ -402,7 +433,7 @@ bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
msg_set_errcode(msg, err);
msg_set_origport(msg, msg_destport(&ohdr));
msg_set_destport(msg, msg_origport(&ohdr));
msg_set_prevnode(msg, tn->own_addr);
msg_set_prevnode(msg, own_addr);
if (!msg_short(msg)) {
msg_set_orignode(msg, msg_destnode(&ohdr));
msg_set_destnode(msg, msg_orignode(&ohdr));
......@@ -414,43 +445,43 @@ bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
return true;
exit:
kfree_skb(buf);
*dnode = 0;
return false;
}
/**
* tipc_msg_eval: determine fate of message that found no destination
* @buf: the buffer containing the message.
* @dnode: return value: next-hop node, if message to be forwarded
* @err: error code to use, if message to be rejected
*
* tipc_msg_lookup_dest(): try to find new destination for named message
* @skb: the buffer containing the message.
* @dnode: return value: next-hop node, if destination found
* @err: return value: error code to use, if message to be rejected
* Does not consume buffer
* Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error
* code if message to be rejected
* Returns true if a destination is found, false otherwise
*/
int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode)
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb,
u32 *dnode, int *err)
{
struct tipc_msg *msg = buf_msg(buf);
struct tipc_msg *msg = buf_msg(skb);
u32 dport;
if (msg_type(msg) != TIPC_NAMED_MSG)
return -TIPC_ERR_NO_PORT;
if (skb_linearize(buf))
return -TIPC_ERR_NO_NAME;
if (msg_data_sz(msg) > MAX_FORWARD_SIZE)
return -TIPC_ERR_NO_NAME;
if (!msg_isdata(msg))
return false;
if (!msg_named(msg))
return false;
*err = -TIPC_ERR_NO_NAME;
if (skb_linearize(skb))
return false;
if (msg_reroute_cnt(msg) > 0)
return -TIPC_ERR_NO_NAME;
return false;
*dnode = addr_domain(net, msg_lookup_scope(msg));
dport = tipc_nametbl_translate(net, msg_nametype(msg),
msg_nameinst(msg),
dnode);
msg_nameinst(msg), dnode);
if (!dport)
return -TIPC_ERR_NO_NAME;
return false;
msg_incr_reroute_cnt(msg);
msg_set_destnode(msg, *dnode);
msg_set_destport(msg, dport);
return TIPC_OK;
*err = TIPC_OK;
return true;
}
/* tipc_msg_reassemble() - clone a buffer chain of fragments and
......
......@@ -45,6 +45,7 @@
* Note: Some items are also used with TIPC internal message headers
*/
#define TIPC_VERSION 2
struct plist;
/*
* Payload message users are defined in TIPC's public API:
......@@ -748,20 +749,110 @@ static inline u32 msg_tot_origport(struct tipc_msg *m)
}
struct sk_buff *tipc_buf_acquire(u32 size);
bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
int err);
int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode);
void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 destnode);
struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
uint hdr_sz, uint data_sz, u32 dnode,
u32 onode, u32 dport, u32 oport, int errcode);
struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
uint data_sz, u32 dnode, u32 onode,
u32 dport, u32 oport, int errcode);
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu);
bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
bool tipc_msg_make_bundle(struct sk_buff_head *list,
struct sk_buff *skb, u32 mtu, u32 dnode);
int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
int *err);
struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
/* tipc_skb_peek(): peek and reserve first buffer in list
* @list: list to be peeked in
* Returns pointer to first buffer in list, if any
*/
static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list,
spinlock_t *lock)
{
struct sk_buff *skb;
spin_lock_bh(lock);
skb = skb_peek(list);
if (skb)
skb_get(skb);
spin_unlock_bh(lock);
return skb;
}
/* tipc_skb_peek_port(): find a destination port, ignoring all destinations
* up to and including 'filter'.
* Note: ignoring previously tried destinations minimizes the risk of
* contention on the socket lock
* @list: list to be peeked in
* @filter: last destination to be ignored from search
* Returns a destination port number, of applicable.
*/
static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter)
{
struct sk_buff *skb;
u32 dport = 0;
bool ignore = true;
spin_lock_bh(&list->lock);
skb_queue_walk(list, skb) {
dport = msg_destport(buf_msg(skb));
if (!filter || skb_queue_is_last(list, skb))
break;
if (dport == filter)
ignore = false;
else if (!ignore)
break;
}
spin_unlock_bh(&list->lock);
return dport;
}
/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list
* @list: list to be unlinked from
* @dport: selection criteria for buffer to unlink
*/
static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
u32 dport)
{
struct sk_buff *_skb, *tmp, *skb = NULL;
spin_lock_bh(&list->lock);
skb_queue_walk_safe(list, _skb, tmp) {
if (msg_destport(buf_msg(_skb)) == dport) {
__skb_unlink(_skb, list);
skb = _skb;
break;
}
}
spin_unlock_bh(&list->lock);
return skb;
}
/* tipc_skb_queue_tail(): add buffer to tail of list;
* @list: list to be appended to
* @skb: buffer to append. Always appended
* @dport: the destination port of the buffer
* returns true if dport differs from previous destination
*/
static inline bool tipc_skb_queue_tail(struct sk_buff_head *list,
struct sk_buff *skb, u32 dport)
{
struct sk_buff *_skb = NULL;
bool rv = false;
spin_lock_bh(&list->lock);
_skb = skb_peek_tail(list);
if (!_skb || (msg_destport(buf_msg(_skb)) != dport) ||
(skb_queue_len(list) > 32))
rv = true;
__skb_queue_tail(list, skb);
spin_unlock_bh(&list->lock);
return rv;
}
#endif
......@@ -71,13 +71,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
struct tipc_msg *msg;
if (buf != NULL) {
msg = buf_msg(buf);
tipc_msg_init(net, msg, NAME_DISTRIBUTOR, type, INT_H_SIZE,
dest);
tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type,
INT_H_SIZE, dest);
msg_set_size(msg, INT_H_SIZE + size);
}
return buf;
......@@ -380,25 +381,34 @@ void tipc_named_process_backlog(struct net *net)
}
/**
* tipc_named_rcv - process name table update message sent by another node
* tipc_named_rcv - process name table update messages sent by another node
*/
void tipc_named_rcv(struct net *net, struct sk_buff *buf)
void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_msg *msg = buf_msg(buf);
struct distr_item *item = (struct distr_item *)msg_data(msg);
u32 count = msg_data_sz(msg) / ITEM_SIZE;
u32 node = msg_orignode(msg);
struct tipc_msg *msg;
struct distr_item *item;
uint count;
u32 node;
struct sk_buff *skb;
int mtype;
spin_lock_bh(&tn->nametbl_lock);
while (count--) {
if (!tipc_update_nametbl(net, item, node, msg_type(msg)))
tipc_named_add_backlog(item, msg_type(msg), node);
item++;
for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
msg = buf_msg(skb);
mtype = msg_type(msg);
item = (struct distr_item *)msg_data(msg);
count = msg_data_sz(msg) / ITEM_SIZE;
node = msg_orignode(msg);
while (count--) {
if (!tipc_update_nametbl(net, item, node, mtype))
tipc_named_add_backlog(item, mtype, node);
item++;
}
kfree_skb(skb);
tipc_named_process_backlog(net);
}
tipc_named_process_backlog(net);
spin_unlock_bh(&tn->nametbl_lock);
kfree_skb(buf);
}
/**
......
......@@ -71,7 +71,7 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
void named_cluster_distribute(struct net *net, struct sk_buff *buf);
void tipc_named_node_up(struct net *net, u32 dnode);
void tipc_named_rcv(struct net *net, struct sk_buff *buf);
void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
void tipc_named_reinit(struct net *net);
void tipc_named_process_backlog(struct net *net);
void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
......
/*
* net/tipc/name_table.c: TIPC name table code
*
* Copyright (c) 2000-2006, 2014, Ericsson AB
* Copyright (c) 2000-2006, 2014-2015, Ericsson AB
* Copyright (c) 2004-2008, 2010-2014, Wind River Systems
* All rights reserved.
*
......@@ -618,7 +618,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
* Returns non-zero if any off-node ports overlap
*/
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct tipc_port_list *dports)
u32 limit, struct tipc_plist *dports)
{
struct name_seq *seq;
struct sub_seq *sseq;
......@@ -643,7 +643,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
info = sseq->info;
list_for_each_entry(publ, &info->node_list, node_list) {
if (publ->scope <= limit)
tipc_port_list_add(dports, publ->ref);
tipc_plist_push(dports, publ->ref);
}
if (info->cluster_list_size != info->node_list_size)
......@@ -1212,3 +1212,41 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
void tipc_plist_push(struct tipc_plist *pl, u32 port)
{
struct tipc_plist *nl;
if (likely(!pl->port)) {
pl->port = port;
return;
}
if (pl->port == port)
return;
list_for_each_entry(nl, &pl->list, list) {
if (nl->port == port)
return;
}
nl = kmalloc(sizeof(*nl), GFP_ATOMIC);
if (nl) {
nl->port = port;
list_add(&nl->list, &pl->list);
}
}
u32 tipc_plist_pop(struct tipc_plist *pl)
{
struct tipc_plist *nl;
u32 port = 0;
if (likely(list_empty(&pl->list))) {
port = pl->port;
pl->port = 0;
return port;
}
nl = list_first_entry(&pl->list, typeof(*nl), list);
port = nl->port;
list_del(&nl->list);
kfree(nl);
return port;
}
/*
* net/tipc/name_table.h: Include file for TIPC name table code
*
* Copyright (c) 2000-2006, 2014, Ericsson AB
* Copyright (c) 2000-2006, 2014-2015, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
......@@ -38,7 +38,7 @@
#define _TIPC_NAME_TABLE_H
struct tipc_subscription;
struct tipc_port_list;
struct tipc_plist;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
......@@ -101,7 +101,7 @@ struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area,
int req_tlv_space);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct tipc_port_list *dports);
u32 limit, struct tipc_plist *dports);
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
u32 upper, u32 scope, u32 port_ref,
u32 key);
......@@ -118,4 +118,18 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
struct tipc_plist {
struct list_head list;
u32 port;
};
static inline void tipc_plist_init(struct tipc_plist *pl)
{
INIT_LIST_HEAD(&pl->list);
pl->port = 0;
}
void tipc_plist_push(struct tipc_plist *pl, u32 port);
u32 tipc_plist_pop(struct tipc_plist *pl);
#endif
......@@ -111,11 +111,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->publ_list);
INIT_LIST_HEAD(&n_ptr->conn_sks);
skb_queue_head_init(&n_ptr->waiting_sks);
__skb_queue_head_init(&n_ptr->bclink.deferred_queue);
hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
if (n_ptr->addr < temp_node->addr)
break;
......@@ -197,25 +194,6 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
tipc_node_unlock(node);
}
void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_sock_conn *conn, *safe;
struct sk_buff *buf;
list_for_each_entry_safe(conn, safe, conns, list) {
buf = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
TIPC_CONN_MSG, SHORT_H_SIZE, 0,
tn->own_addr, conn->peer_node,
conn->port, conn->peer_port,
TIPC_ERR_NO_NODE);
if (likely(buf))
tipc_sk_rcv(net, buf);
list_del(&conn->list);
kfree(conn);
}
}
/**
* tipc_node_link_up - handle addition of link
*
......@@ -377,7 +355,11 @@ static void node_established_contact(struct tipc_node *n_ptr)
static void node_lost_contact(struct tipc_node *n_ptr)
{
char addr_string[16];
u32 i;
struct tipc_sock_conn *conn, *safe;
struct list_head *conns = &n_ptr->conn_sks;
struct sk_buff *skb;
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
uint i;
pr_debug("Lost contact with %s\n",
tipc_addr_string_fill(addr_string, n_ptr->addr));
......@@ -413,11 +395,25 @@ static void node_lost_contact(struct tipc_node *n_ptr)
n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
/* Notify subscribers and prevent re-contact with node until
* cleanup is done.
*/
n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN |
TIPC_NOTIFY_NODE_DOWN;
/* Prevent re-contact with node until cleanup is done */
n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
/* Notify publications from this node */
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
SHORT_H_SIZE, 0, tn->own_addr,
conn->peer_node, conn->port,
conn->peer_port, TIPC_ERR_NO_NODE);
if (likely(skb)) {
skb_queue_tail(n_ptr->inputq, skb);
n_ptr->action_flags |= TIPC_MSG_EVT;
}
list_del(&conn->list);
kfree(conn);
}
}
struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
......@@ -566,44 +562,36 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
void tipc_node_unlock(struct tipc_node *node)
{
struct net *net = node->net;
LIST_HEAD(nsub_list);
LIST_HEAD(conn_sks);
struct sk_buff_head waiting_sks;
u32 addr = 0;
int flags = node->action_flags;
u32 flags = node->action_flags;
u32 link_id = 0;
struct list_head *publ_list;
struct sk_buff_head *inputq = node->inputq;
struct sk_buff_head *namedq;
if (likely(!flags)) {
if (likely(!flags || (flags == TIPC_MSG_EVT))) {
node->action_flags = 0;
spin_unlock_bh(&node->lock);
if (flags == TIPC_MSG_EVT)
tipc_sk_rcv(net, inputq);
return;
}
addr = node->addr;
link_id = node->link_id;
__skb_queue_head_init(&waiting_sks);
namedq = node->namedq;
publ_list = &node->publ_list;
if (flags & TIPC_WAKEUP_USERS)
skb_queue_splice_init(&node->waiting_sks, &waiting_sks);
if (flags & TIPC_NOTIFY_NODE_DOWN) {
list_replace_init(&node->publ_list, &nsub_list);
list_replace_init(&node->conn_sks, &conn_sks);
}
node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN |
TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP |
TIPC_NOTIFY_LINK_DOWN |
TIPC_WAKEUP_BCAST_USERS);
node->action_flags &= ~(TIPC_MSG_EVT |
TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
TIPC_NAMED_MSG_EVT);
spin_unlock_bh(&node->lock);
while (!skb_queue_empty(&waiting_sks))
tipc_sk_rcv(net, __skb_dequeue(&waiting_sks));
if (!list_empty(&conn_sks))
tipc_node_abort_sock_conns(net, &conn_sks);
if (!list_empty(&nsub_list))
tipc_publ_notify(net, &nsub_list, addr);
if (flags & TIPC_NOTIFY_NODE_DOWN)
tipc_publ_notify(net, publ_list, addr);
if (flags & TIPC_WAKEUP_BCAST_USERS)
tipc_bclink_wakeup_users(net);
......@@ -618,6 +606,15 @@ void tipc_node_unlock(struct tipc_node *node)
if (flags & TIPC_NOTIFY_LINK_DOWN)
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
link_id, addr);
if (flags & TIPC_MSG_EVT)
tipc_sk_rcv(net, inputq);
if (flags & TIPC_NAMED_MSG_EVT)
tipc_named_rcv(net, namedq);
if (flags & TIPC_BCAST_MSG_EVT)
tipc_bclink_input(net);
}
/* Caller should hold node lock for the passed node */
......
/*
* net/tipc/node.h: Include file for TIPC node management routines
*
* Copyright (c) 2000-2006, 2014, Ericsson AB
* Copyright (c) 2000-2006, 2014-2015, Ericsson AB
* Copyright (c) 2005, 2010-2014, Wind River Systems
* All rights reserved.
*
......@@ -55,14 +55,16 @@
* TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
*/
enum {
TIPC_MSG_EVT = 1,
TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
TIPC_NOTIFY_NODE_UP = (1 << 4),
TIPC_WAKEUP_USERS = (1 << 5),
TIPC_WAKEUP_BCAST_USERS = (1 << 6),
TIPC_NOTIFY_LINK_UP = (1 << 7),
TIPC_NOTIFY_LINK_DOWN = (1 << 8)
TIPC_WAKEUP_BCAST_USERS = (1 << 5),
TIPC_NOTIFY_LINK_UP = (1 << 6),
TIPC_NOTIFY_LINK_DOWN = (1 << 7),
TIPC_NAMED_MSG_EVT = (1 << 8),
TIPC_BCAST_MSG_EVT = (1 << 9)
};
/**
......@@ -73,6 +75,7 @@ enum {
* @oos_state: state tracker for handling OOS b'cast messages
* @deferred_queue: deferred queue saved OOS b'cast message received from node
* @reasm_buf: broadcast reassembly queue head from node
* @inputq_map: bitmap indicating which inqueues should be kicked
* @recv_permitted: true if node is allowed to receive b'cast messages
*/
struct tipc_node_bclink {
......@@ -83,6 +86,7 @@ struct tipc_node_bclink {
u32 deferred_size;
struct sk_buff_head deferred_queue;
struct sk_buff *reasm_buf;
int inputq_map;
bool recv_permitted;
};
......@@ -92,6 +96,9 @@ struct tipc_node_bclink {
* @lock: spinlock governing access to structure
* @net: the applicable net namespace
* @hash: links to adjacent nodes in unsorted hash chain
* @inputq: pointer to input queue containing messages for msg event
* @namedq: pointer to name table input queue with name table messages
* @curr_link: the link holding the node lock, if any
* @active_links: pointers to active links to node
* @links: pointers to all links to node
* @action_flags: bit mask of different types of node actions
......@@ -109,10 +116,12 @@ struct tipc_node {
spinlock_t lock;
struct net *net;
struct hlist_node hash;
struct sk_buff_head *inputq;
struct sk_buff_head *namedq;
struct tipc_link *active_links[2];
u32 act_mtus[2];
struct tipc_link *links[MAX_BEARERS];
unsigned int action_flags;
int action_flags;
struct tipc_node_bclink bclink;
struct list_head list;
int link_cnt;
......@@ -120,7 +129,6 @@ struct tipc_node {
u32 signature;
u32 link_id;
struct list_head publ_list;
struct sk_buff_head waiting_sks;
struct list_head conn_sks;
struct rcu_head rcu;
};
......
This diff is collapsed.
/* net/tipc/socket.h: Include file for TIPC socket code
*
* Copyright (c) 2014, Ericsson AB
* Copyright (c) 2014-2015, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
......@@ -42,16 +42,16 @@
#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2)
#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
int tipc_socket_init(void);
void tipc_socket_stop(void);
int tipc_sock_create_local(struct net *net, int type, struct socket **res);
void tipc_sock_release_local(struct socket *sock);
int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
int flags);
int tipc_sk_rcv(struct net *net, struct sk_buff *buf);
int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
struct sk_buff *tipc_sk_socks_show(struct net *net);
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf);
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq);
void tipc_sk_reinit(struct net *net);
int tipc_sk_rht_init(struct net *net);
void tipc_sk_rht_destroy(struct net *net);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment