Commit 0c73ffc7 authored by David S. Miller's avatar David S. Miller

Merge branch 'QRTR-flow-control-improvements'

Bjorn Andersson says:

====================
QRTR flow control improvements

In order to prevent overconsumption of resources on the remote side QRTR
implements a flow control mechanism.

Move the handling of the incoming confirm_rx to the receiving process to
ensure incoming flow is controlled. Then implement outgoing flow
control, using the recommended algorithm of counting outstanding
non-confirmed messages and blocking when hitting a limit. The last three
patches refactors the node assignment and port lookup, in order to
remove the worker in the receive path.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7786a1af e04df98a
......@@ -8,6 +8,8 @@
#include <linux/qrtr.h>
#include <linux/termios.h> /* For TIOCINQ/OUTQ */
#include <linux/numa.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <net/sock.h>
......@@ -97,10 +99,11 @@ static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
static unsigned int qrtr_local_nid = NUMA_NO_NODE;
/* for node ids */
static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
static RADIX_TREE(qrtr_nodes, GFP_ATOMIC);
static DEFINE_SPINLOCK(qrtr_nodes_lock);
/* broadcast list */
static LIST_HEAD(qrtr_all_nodes);
/* lock for qrtr_nodes, qrtr_all_nodes and node reference */
/* lock for qrtr_all_nodes and node reference */
static DEFINE_MUTEX(qrtr_node_lock);
/* local port allocation management */
......@@ -113,8 +116,9 @@ static DEFINE_MUTEX(qrtr_port_lock);
* @ep: endpoint
* @ref: reference count for node
* @nid: node id
* @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port
* @qrtr_tx_lock: lock for qrtr_tx_flow inserts
* @rx_queue: receive queue
* @work: scheduled work struct for recv work
* @item: list item for broadcast list
*/
struct qrtr_node {
......@@ -123,17 +127,36 @@ struct qrtr_node {
struct kref ref;
unsigned int nid;
struct radix_tree_root qrtr_tx_flow;
struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */
struct sk_buff_head rx_queue;
struct work_struct work;
struct list_head item;
};
/**
* struct qrtr_tx_flow - tx flow control
* @resume_tx: waiters for a resume tx from the remote
* @pending: number of waiting senders
* @tx_failed: indicates that a message with confirm_rx flag was lost
*/
struct qrtr_tx_flow {
struct wait_queue_head resume_tx;
int pending;
int tx_failed;
};
#define QRTR_TX_FLOW_HIGH 10
#define QRTR_TX_FLOW_LOW 5
static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
int type, struct sockaddr_qrtr *from,
struct sockaddr_qrtr *to);
static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
int type, struct sockaddr_qrtr *from,
struct sockaddr_qrtr *to);
static struct qrtr_sock *qrtr_port_lookup(int port);
static void qrtr_port_put(struct qrtr_sock *ipc);
/* Release node resources and free the node.
*
......@@ -143,15 +166,25 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
static void __qrtr_node_release(struct kref *kref)
{
struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
struct radix_tree_iter iter;
unsigned long flags;
void __rcu **slot;
spin_lock_irqsave(&qrtr_nodes_lock, flags);
if (node->nid != QRTR_EP_NID_AUTO)
radix_tree_delete(&qrtr_nodes, node->nid);
spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
list_del(&node->item);
mutex_unlock(&qrtr_node_lock);
cancel_work_sync(&node->work);
skb_queue_purge(&node->rx_queue);
/* Free tx flow counters */
radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot);
kfree(*slot);
}
kfree(node);
}
......@@ -171,6 +204,126 @@ static void qrtr_node_release(struct qrtr_node *node)
kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock);
}
/**
* qrtr_tx_resume() - reset flow control counter
* @node: qrtr_node that the QRTR_TYPE_RESUME_TX packet arrived on
* @skb: resume_tx packet
*/
static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb)
{
struct qrtr_ctrl_pkt *pkt = (struct qrtr_ctrl_pkt *)skb->data;
u64 remote_node = le32_to_cpu(pkt->client.node);
u32 remote_port = le32_to_cpu(pkt->client.port);
struct qrtr_tx_flow *flow;
unsigned long key;
key = remote_node << 32 | remote_port;
rcu_read_lock();
flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
rcu_read_unlock();
if (flow) {
spin_lock(&flow->resume_tx.lock);
flow->pending = 0;
spin_unlock(&flow->resume_tx.lock);
wake_up_interruptible_all(&flow->resume_tx);
}
consume_skb(skb);
}
/**
* qrtr_tx_wait() - flow control for outgoing packets
* @node: qrtr_node that the packet is to be send to
* @dest_node: node id of the destination
* @dest_port: port number of the destination
* @type: type of message
*
* The flow control scheme is based around the low and high "watermarks". When
* the low watermark is passed the confirm_rx flag is set on the outgoing
* message, which will trigger the remote to send a control message of the type
* QRTR_TYPE_RESUME_TX to reset the counter. If the high watermark is hit
* further transmision should be paused.
*
* Return: 1 if confirm_rx should be set, 0 otherwise or errno failure
*/
static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port,
int type)
{
unsigned long key = (u64)dest_node << 32 | dest_port;
struct qrtr_tx_flow *flow;
int confirm_rx = 0;
int ret;
/* Never set confirm_rx on non-data packets */
if (type != QRTR_TYPE_DATA)
return 0;
mutex_lock(&node->qrtr_tx_lock);
flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
if (!flow) {
flow = kzalloc(sizeof(*flow), GFP_KERNEL);
if (flow) {
init_waitqueue_head(&flow->resume_tx);
radix_tree_insert(&node->qrtr_tx_flow, key, flow);
}
}
mutex_unlock(&node->qrtr_tx_lock);
/* Set confirm_rx if we where unable to find and allocate a flow */
if (!flow)
return 1;
spin_lock_irq(&flow->resume_tx.lock);
ret = wait_event_interruptible_locked_irq(flow->resume_tx,
flow->pending < QRTR_TX_FLOW_HIGH ||
flow->tx_failed ||
!node->ep);
if (ret < 0) {
confirm_rx = ret;
} else if (!node->ep) {
confirm_rx = -EPIPE;
} else if (flow->tx_failed) {
flow->tx_failed = 0;
confirm_rx = 1;
} else {
flow->pending++;
confirm_rx = flow->pending == QRTR_TX_FLOW_LOW;
}
spin_unlock_irq(&flow->resume_tx.lock);
return confirm_rx;
}
/**
* qrtr_tx_flow_failed() - flag that tx of confirm_rx flagged messages failed
* @node: qrtr_node that the packet is to be send to
* @dest_node: node id of the destination
* @dest_port: port number of the destination
*
* Signal that the transmission of a message with confirm_rx flag failed. The
* flow's "pending" counter will keep incrementing towards QRTR_TX_FLOW_HIGH,
* at which point transmission would stall forever waiting for the resume TX
* message associated with the dropped confirm_rx message.
* Work around this by marking the flow as having a failed transmission and
* cause the next transmission attempt to be sent with the confirm_rx.
*/
static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node,
int dest_port)
{
unsigned long key = (u64)dest_node << 32 | dest_port;
struct qrtr_tx_flow *flow;
rcu_read_lock();
flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
rcu_read_unlock();
if (flow) {
spin_lock_irq(&flow->resume_tx.lock);
flow->tx_failed = 1;
spin_unlock_irq(&flow->resume_tx.lock);
}
}
/* Pass an outgoing packet socket buffer to the endpoint driver. */
static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
int type, struct sockaddr_qrtr *from,
......@@ -179,6 +332,13 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
struct qrtr_hdr_v1 *hdr;
size_t len = skb->len;
int rc = -ENODEV;
int confirm_rx;
confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type);
if (confirm_rx < 0) {
kfree_skb(skb);
return confirm_rx;
}
hdr = skb_push(skb, sizeof(*hdr));
hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
......@@ -194,7 +354,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
}
hdr->size = cpu_to_le32(len);
hdr->confirm_rx = 0;
hdr->confirm_rx = !!confirm_rx;
skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
......@@ -205,6 +365,11 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
kfree_skb(skb);
mutex_unlock(&node->ep_lock);
/* Need to ensure that a subsequent message carries the otherwise lost
* confirm_rx flag if we dropped this one */
if (rc && confirm_rx)
qrtr_tx_flow_failed(node, to->sq_node, to->sq_port);
return rc;
}
......@@ -215,11 +380,12 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
{
struct qrtr_node *node;
unsigned long flags;
mutex_lock(&qrtr_node_lock);
spin_lock_irqsave(&qrtr_nodes_lock, flags);
node = radix_tree_lookup(&qrtr_nodes, nid);
node = qrtr_node_acquire(node);
mutex_unlock(&qrtr_node_lock);
spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
return node;
}
......@@ -231,13 +397,15 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
*/
static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
{
unsigned long flags;
if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO)
return;
mutex_lock(&qrtr_node_lock);
spin_lock_irqsave(&qrtr_nodes_lock, flags);
radix_tree_insert(&qrtr_nodes, nid, node);
node->nid = nid;
mutex_unlock(&qrtr_node_lock);
spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
}
/**
......@@ -253,6 +421,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
struct qrtr_node *node = ep->node;
const struct qrtr_hdr_v1 *v1;
const struct qrtr_hdr_v2 *v2;
struct qrtr_sock *ipc;
struct sk_buff *skb;
struct qrtr_cb *cb;
unsigned int size;
......@@ -311,13 +480,26 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
if (len != ALIGN(size, 4) + hdrlen)
goto err;
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
cb->type != QRTR_TYPE_RESUME_TX)
goto err;
skb_put_data(skb, data + hdrlen, size);
skb_queue_tail(&node->rx_queue, skb);
schedule_work(&node->work);
qrtr_node_assign(node, cb->src_node);
if (cb->type == QRTR_TYPE_RESUME_TX) {
qrtr_tx_resume(node, skb);
} else {
ipc = qrtr_port_lookup(cb->dst_port);
if (!ipc)
goto err;
if (sock_queue_rcv_skb(&ipc->sk, skb))
goto err;
qrtr_port_put(ipc);
}
return 0;
......@@ -352,61 +534,6 @@ static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
return skb;
}
static struct qrtr_sock *qrtr_port_lookup(int port);
static void qrtr_port_put(struct qrtr_sock *ipc);
/* Handle and route a received packet.
*
* This will auto-reply with resume-tx packet as necessary.
*/
static void qrtr_node_rx_work(struct work_struct *work)
{
struct qrtr_node *node = container_of(work, struct qrtr_node, work);
struct qrtr_ctrl_pkt *pkt;
struct sockaddr_qrtr dst;
struct sockaddr_qrtr src;
struct sk_buff *skb;
while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
struct qrtr_sock *ipc;
struct qrtr_cb *cb;
int confirm;
cb = (struct qrtr_cb *)skb->cb;
src.sq_node = cb->src_node;
src.sq_port = cb->src_port;
dst.sq_node = cb->dst_node;
dst.sq_port = cb->dst_port;
confirm = !!cb->confirm_rx;
qrtr_node_assign(node, cb->src_node);
ipc = qrtr_port_lookup(cb->dst_port);
if (!ipc) {
kfree_skb(skb);
} else {
if (sock_queue_rcv_skb(&ipc->sk, skb))
kfree_skb(skb);
qrtr_port_put(ipc);
}
if (confirm) {
skb = qrtr_alloc_ctrl_packet(&pkt);
if (!skb)
break;
pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
pkt->client.node = cpu_to_le32(dst.sq_node);
pkt->client.port = cpu_to_le32(dst.sq_port);
if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
&dst, &src))
break;
}
}
}
/**
* qrtr_endpoint_register() - register a new endpoint
* @ep: endpoint to register
......@@ -426,13 +553,15 @@ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
if (!node)
return -ENOMEM;
INIT_WORK(&node->work, qrtr_node_rx_work);
kref_init(&node->ref);
mutex_init(&node->ep_lock);
skb_queue_head_init(&node->rx_queue);
node->nid = QRTR_EP_NID_AUTO;
node->ep = ep;
INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL);
mutex_init(&node->qrtr_tx_lock);
qrtr_node_assign(node, nid);
mutex_lock(&qrtr_node_lock);
......@@ -453,8 +582,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
struct qrtr_node *node = ep->node;
struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
struct radix_tree_iter iter;
struct qrtr_ctrl_pkt *pkt;
struct qrtr_tx_flow *flow;
struct sk_buff *skb;
void __rcu **slot;
mutex_lock(&node->ep_lock);
node->ep = NULL;
......@@ -467,6 +599,14 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
}
/* Wake up any transmitters waiting for resume-tx from the node */
mutex_lock(&node->qrtr_tx_lock);
radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
flow = *slot;
wake_up_interruptible_all(&flow->resume_tx);
}
mutex_unlock(&node->qrtr_tx_lock);
qrtr_node_release(node);
ep->node = NULL;
}
......@@ -483,11 +623,11 @@ static struct qrtr_sock *qrtr_port_lookup(int port)
if (port == QRTR_PORT_CTRL)
port = 0;
mutex_lock(&qrtr_port_lock);
rcu_read_lock();
ipc = idr_find(&qrtr_ports, port);
if (ipc)
sock_hold(&ipc->sk);
mutex_unlock(&qrtr_port_lock);
rcu_read_unlock();
return ipc;
}
......@@ -529,6 +669,10 @@ static void qrtr_port_remove(struct qrtr_sock *ipc)
mutex_lock(&qrtr_port_lock);
idr_remove(&qrtr_ports, port);
mutex_unlock(&qrtr_port_lock);
/* Ensure that if qrtr_port_lookup() did enter the RCU read section we
* wait for it to up increment the refcount */
synchronize_rcu();
}
/* Assign port number to socket.
......@@ -816,6 +960,34 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
return rc;
}
static int qrtr_send_resume_tx(struct qrtr_cb *cb)
{
struct sockaddr_qrtr remote = { AF_QIPCRTR, cb->src_node, cb->src_port };
struct sockaddr_qrtr local = { AF_QIPCRTR, cb->dst_node, cb->dst_port };
struct qrtr_ctrl_pkt *pkt;
struct qrtr_node *node;
struct sk_buff *skb;
int ret;
node = qrtr_node_lookup(remote.sq_node);
if (!node)
return -EINVAL;
skb = qrtr_alloc_ctrl_packet(&pkt);
if (!skb)
return -ENOMEM;
pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
pkt->client.node = cpu_to_le32(cb->dst_node);
pkt->client.port = cpu_to_le32(cb->dst_port);
ret = qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &local, &remote);
qrtr_node_release(node);
return ret;
}
static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
......@@ -838,6 +1010,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
release_sock(sk);
return rc;
}
cb = (struct qrtr_cb *)skb->cb;
copied = skb->len;
if (copied > size) {
......@@ -851,7 +1024,6 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
rc = copied;
if (addr) {
cb = (struct qrtr_cb *)skb->cb;
addr->sq_family = AF_QIPCRTR;
addr->sq_node = cb->src_node;
addr->sq_port = cb->src_port;
......@@ -859,6 +1031,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
}
out:
if (cb->confirm_rx)
qrtr_send_resume_tx(cb);
skb_free_datagram(sk, skb);
release_sock(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment