Commit 61675fea authored by David S. Miller's avatar David S. Miller

Merge branch 'xen-netback-next'

Zoltan Kiss says:

====================
xen-netback: Changes around carrier handling

This series starts using carrier off as a way to purge packets when the guest is
not able (or willing) to receive them. It is a much faster way to get rid of
packets waiting for an overwhelmed guest.
The first patch changes current netback code where it relies currently on
netif_carrier_ok.
The second turns off the carrier if the guest times out on a queue, and only
turn it on again if that queue (or queues) resurrects.
====================
Signed-off-by: default avatarZoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents aef4f5b6 f34a4cf9
...@@ -176,9 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */ ...@@ -176,9 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xen_netif_rx_back_ring rx; struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue; struct sk_buff_head rx_queue;
RING_IDX rx_last_skb_slots; RING_IDX rx_last_skb_slots;
bool rx_queue_purge; unsigned long status;
struct timer_list wake_queue; struct timer_list rx_stalled;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
...@@ -198,6 +198,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */ ...@@ -198,6 +198,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xenvif_stats stats; struct xenvif_stats stats;
}; };
enum state_bit_shift {
/* This bit marks that the vif is connected */
VIF_STATUS_CONNECTED,
/* This bit signals the RX thread that queuing was stopped (in
* start_xmit), and either the timer fired or an RX interrupt came
*/
QUEUE_STATUS_RX_PURGE_EVENT,
/* This bit tells the interrupt handler that this queue was the reason
* for the carrier off, so it should kick the thread. Only queues which
* brought it down can turn on the carrier.
*/
QUEUE_STATUS_RX_STALLED
};
struct xenvif { struct xenvif {
/* Unique identifier for this interface. */ /* Unique identifier for this interface. */
domid_t domid; domid_t domid;
...@@ -220,6 +234,7 @@ struct xenvif { ...@@ -220,6 +234,7 @@ struct xenvif {
* frontend is rogue. * frontend is rogue.
*/ */
bool disabled; bool disabled;
unsigned long status;
/* Queues */ /* Queues */
struct xenvif_queue *queues; struct xenvif_queue *queues;
......
...@@ -55,7 +55,8 @@ static inline void xenvif_stop_queue(struct xenvif_queue *queue) ...@@ -55,7 +55,8 @@ static inline void xenvif_stop_queue(struct xenvif_queue *queue)
int xenvif_schedulable(struct xenvif *vif) int xenvif_schedulable(struct xenvif *vif)
{ {
return netif_running(vif->dev) && netif_carrier_ok(vif->dev); return netif_running(vif->dev) &&
test_bit(VIF_STATUS_CONNECTED, &vif->status);
} }
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
...@@ -77,8 +78,12 @@ int xenvif_poll(struct napi_struct *napi, int budget) ...@@ -77,8 +78,12 @@ int xenvif_poll(struct napi_struct *napi, int budget)
/* This vif is rogue, we pretend we've there is nothing to do /* This vif is rogue, we pretend we've there is nothing to do
* for this vif to deschedule it from NAPI. But this interface * for this vif to deschedule it from NAPI. But this interface
* will be turned off in thread context later. * will be turned off in thread context later.
* Also, if a guest doesn't post enough slots to receive data on one of
* its queues, the carrier goes down and NAPI is descheduled here so
* the guest can't send more packets until it's ready to receive.
*/ */
if (unlikely(queue->vif->disabled)) { if (unlikely(queue->vif->disabled ||
!netif_carrier_ok(queue->vif->dev))) {
napi_complete(napi); napi_complete(napi);
return 0; return 0;
} }
...@@ -96,7 +101,16 @@ int xenvif_poll(struct napi_struct *napi, int budget) ...@@ -96,7 +101,16 @@ int xenvif_poll(struct napi_struct *napi, int budget)
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{ {
struct xenvif_queue *queue = dev_id; struct xenvif_queue *queue = dev_id;
struct netdev_queue *net_queue =
netdev_get_tx_queue(queue->vif->dev, queue->id);
/* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
* the carrier went down and this queue was previously blocked
*/
if (unlikely(netif_tx_queue_stopped(net_queue) ||
(!netif_carrier_ok(queue->vif->dev) &&
test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue); xenvif_kick_thread(queue);
return IRQ_HANDLED; return IRQ_HANDLED;
...@@ -124,16 +138,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue) ...@@ -124,16 +138,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
} }
/* Callback to wake the queue and drain it on timeout */ /* Callback to wake the queue's thread and turn the carrier off on timeout */
static void xenvif_wake_queue_callback(unsigned long data) static void xenvif_rx_stalled(unsigned long data)
{ {
struct xenvif_queue *queue = (struct xenvif_queue *)data; struct xenvif_queue *queue = (struct xenvif_queue *)data;
if (xenvif_queue_stopped(queue)) { if (xenvif_queue_stopped(queue)) {
netdev_err(queue->vif->dev, "draining TX queue\n"); set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
queue->rx_queue_purge = true;
xenvif_kick_thread(queue); xenvif_kick_thread(queue);
xenvif_wake_queue(queue);
} }
} }
...@@ -182,11 +194,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -182,11 +194,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
* drain. * drain.
*/ */
if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
queue->wake_queue.function = xenvif_wake_queue_callback; queue->rx_stalled.function = xenvif_rx_stalled;
queue->wake_queue.data = (unsigned long)queue; queue->rx_stalled.data = (unsigned long)queue;
xenvif_stop_queue(queue); xenvif_stop_queue(queue);
mod_timer(&queue->wake_queue, mod_timer(&queue->rx_stalled,
jiffies + rx_drain_timeout_jiffies); jiffies + rx_drain_timeout_jiffies);
} }
skb_queue_tail(&queue->rx_queue, skb); skb_queue_tail(&queue->rx_queue, skb);
...@@ -267,7 +279,7 @@ static void xenvif_down(struct xenvif *vif) ...@@ -267,7 +279,7 @@ static void xenvif_down(struct xenvif *vif)
static int xenvif_open(struct net_device *dev) static int xenvif_open(struct net_device *dev)
{ {
struct xenvif *vif = netdev_priv(dev); struct xenvif *vif = netdev_priv(dev);
if (netif_carrier_ok(dev)) if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
xenvif_up(vif); xenvif_up(vif);
netif_tx_start_all_queues(dev); netif_tx_start_all_queues(dev);
return 0; return 0;
...@@ -276,7 +288,7 @@ static int xenvif_open(struct net_device *dev) ...@@ -276,7 +288,7 @@ static int xenvif_open(struct net_device *dev)
static int xenvif_close(struct net_device *dev) static int xenvif_close(struct net_device *dev)
{ {
struct xenvif *vif = netdev_priv(dev); struct xenvif *vif = netdev_priv(dev);
if (netif_carrier_ok(dev)) if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
xenvif_down(vif); xenvif_down(vif);
netif_tx_stop_all_queues(dev); netif_tx_stop_all_queues(dev);
return 0; return 0;
...@@ -514,7 +526,7 @@ int xenvif_init_queue(struct xenvif_queue *queue) ...@@ -514,7 +526,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
} }
init_timer(&queue->wake_queue); init_timer(&queue->rx_stalled);
netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll, netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
XENVIF_NAPI_WEIGHT); XENVIF_NAPI_WEIGHT);
...@@ -528,6 +540,7 @@ void xenvif_carrier_on(struct xenvif *vif) ...@@ -528,6 +540,7 @@ void xenvif_carrier_on(struct xenvif *vif)
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
dev_set_mtu(vif->dev, ETH_DATA_LEN); dev_set_mtu(vif->dev, ETH_DATA_LEN);
netdev_update_features(vif->dev); netdev_update_features(vif->dev);
set_bit(VIF_STATUS_CONNECTED, &vif->status);
netif_carrier_on(vif->dev); netif_carrier_on(vif->dev);
if (netif_running(vif->dev)) if (netif_running(vif->dev))
xenvif_up(vif); xenvif_up(vif);
...@@ -625,9 +638,11 @@ void xenvif_carrier_off(struct xenvif *vif) ...@@ -625,9 +638,11 @@ void xenvif_carrier_off(struct xenvif *vif)
struct net_device *dev = vif->dev; struct net_device *dev = vif->dev;
rtnl_lock(); rtnl_lock();
netif_carrier_off(dev); /* discard queued packets */ if (test_and_clear_bit(VIF_STATUS_CONNECTED, &vif->status)) {
if (netif_running(dev)) netif_carrier_off(dev); /* discard queued packets */
xenvif_down(vif); if (netif_running(dev))
xenvif_down(vif);
}
rtnl_unlock(); rtnl_unlock();
} }
...@@ -656,14 +671,13 @@ void xenvif_disconnect(struct xenvif *vif) ...@@ -656,14 +671,13 @@ void xenvif_disconnect(struct xenvif *vif)
unsigned int num_queues = vif->num_queues; unsigned int num_queues = vif->num_queues;
unsigned int queue_index; unsigned int queue_index;
if (netif_carrier_ok(vif->dev)) xenvif_carrier_off(vif);
xenvif_carrier_off(vif);
for (queue_index = 0; queue_index < num_queues; ++queue_index) { for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index]; queue = &vif->queues[queue_index];
if (queue->task) { if (queue->task) {
del_timer_sync(&queue->wake_queue); del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task); kthread_stop(queue->task);
queue->task = NULL; queue->task = NULL;
} }
...@@ -705,16 +719,12 @@ void xenvif_free(struct xenvif *vif) ...@@ -705,16 +719,12 @@ void xenvif_free(struct xenvif *vif)
/* Here we want to avoid timeout messages if an skb can be legitimately /* Here we want to avoid timeout messages if an skb can be legitimately
* stuck somewhere else. Realistically this could be an another vif's * stuck somewhere else. Realistically this could be an another vif's
* internal or QDisc queue. That another vif also has this * internal or QDisc queue. That another vif also has this
* rx_drain_timeout_msecs timeout, but the timer only ditches the * rx_drain_timeout_msecs timeout, so give it time to drain out.
* internal queue. After that, the QDisc queue can put in worst case * Although if that other guest wakes up just before its timeout happens
* XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's * and takes only one skb from QDisc, it can hold onto other skbs for a
* internal queue, so we need several rounds of such timeouts until we * longer period.
* can be sure that no another vif should have skb's from us. We are
* not sending more skb's, so newly stuck packets are not interesting
* for us here.
*/ */
unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
unregister_netdev(vif->dev); unregister_netdev(vif->dev);
......
...@@ -1869,8 +1869,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) ...@@ -1869,8 +1869,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
static inline int rx_work_todo(struct xenvif_queue *queue) static inline int rx_work_todo(struct xenvif_queue *queue)
{ {
return (!skb_queue_empty(&queue->rx_queue) && return (!skb_queue_empty(&queue->rx_queue) &&
xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) || xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
queue->rx_queue_purge;
} }
static inline int tx_work_todo(struct xenvif_queue *queue) static inline int tx_work_todo(struct xenvif_queue *queue)
...@@ -1935,6 +1934,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue) ...@@ -1935,6 +1934,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue)
xenvif_wake_queue(queue); xenvif_wake_queue(queue);
} }
/* Only called from the queue's thread, it handles the situation when the guest
* doesn't post enough requests on the receiving ring.
* First xenvif_start_xmit disables QDisc and start a timer, and then either the
* timer fires, or the guest send an interrupt after posting new request. If it
* is the timer, the carrier is turned off here.
* */
static void xenvif_rx_purge_event(struct xenvif_queue *queue)
{
/* Either the last unsuccesful skb or at least 1 slot should fit */
int needed = queue->rx_last_skb_slots ?
queue->rx_last_skb_slots : 1;
/* It is assumed that if the guest post new slots after this, the RX
* interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
* the thread again
*/
set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
if (!xenvif_rx_ring_slots_available(queue, needed)) {
rtnl_lock();
if (netif_carrier_ok(queue->vif->dev)) {
/* Timer fired and there are still no slots. Turn off
* everything except the interrupts
*/
netif_carrier_off(queue->vif->dev);
skb_queue_purge(&queue->rx_queue);
queue->rx_last_skb_slots = 0;
if (net_ratelimit())
netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
} else {
/* Probably an another queue already turned the carrier
* off, make sure nothing is stucked in the internal
* queue of this queue
*/
skb_queue_purge(&queue->rx_queue);
queue->rx_last_skb_slots = 0;
}
rtnl_unlock();
} else if (!netif_carrier_ok(queue->vif->dev)) {
unsigned int num_queues = queue->vif->num_queues;
unsigned int i;
/* The carrier was down, but an interrupt kicked
* the thread again after new requests were
* posted
*/
clear_bit(QUEUE_STATUS_RX_STALLED,
&queue->status);
rtnl_lock();
netif_carrier_on(queue->vif->dev);
netif_tx_wake_all_queues(queue->vif->dev);
rtnl_unlock();
for (i = 0; i < num_queues; i++) {
struct xenvif_queue *temp = &queue->vif->queues[i];
xenvif_napi_schedule_or_enable_events(temp);
}
if (net_ratelimit())
netdev_err(queue->vif->dev, "Carrier on again\n");
} else {
/* Queuing were stopped, but the guest posted
* new requests and sent an interrupt
*/
clear_bit(QUEUE_STATUS_RX_STALLED,
&queue->status);
del_timer_sync(&queue->rx_stalled);
xenvif_start_queue(queue);
}
}
int xenvif_kthread_guest_rx(void *data) int xenvif_kthread_guest_rx(void *data)
{ {
struct xenvif_queue *queue = data; struct xenvif_queue *queue = data;
...@@ -1944,8 +2012,12 @@ int xenvif_kthread_guest_rx(void *data) ...@@ -1944,8 +2012,12 @@ int xenvif_kthread_guest_rx(void *data)
wait_event_interruptible(queue->wq, wait_event_interruptible(queue->wq,
rx_work_todo(queue) || rx_work_todo(queue) ||
queue->vif->disabled || queue->vif->disabled ||
test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
kthread_should_stop()); kthread_should_stop());
if (kthread_should_stop())
break;
/* This frontend is found to be rogue, disable it in /* This frontend is found to be rogue, disable it in
* kthread context. Currently this is only set when * kthread context. Currently this is only set when
* netback finds out frontend sends malformed packet, * netback finds out frontend sends malformed packet,
...@@ -1953,26 +2025,23 @@ int xenvif_kthread_guest_rx(void *data) ...@@ -1953,26 +2025,23 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is * context so we defer it here, if this thread is
* associated with queue 0. * associated with queue 0.
*/ */
if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0)) if (unlikely(queue->vif->disabled && queue->id == 0))
xenvif_carrier_off(queue->vif); xenvif_carrier_off(queue->vif);
else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
if (kthread_should_stop()) &queue->status))) {
break; xenvif_rx_purge_event(queue);
} else if (!netif_carrier_ok(queue->vif->dev)) {
if (queue->rx_queue_purge) { /* Another queue stalled and turned the carrier off, so
* purge the internal queue of queues which were not
* blocked
*/
skb_queue_purge(&queue->rx_queue); skb_queue_purge(&queue->rx_queue);
queue->rx_queue_purge = false; queue->rx_last_skb_slots = 0;
} }
if (!skb_queue_empty(&queue->rx_queue)) if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue); xenvif_rx_action(queue);
if (skb_queue_empty(&queue->rx_queue) &&
xenvif_queue_stopped(queue)) {
del_timer_sync(&queue->wake_queue);
xenvif_start_queue(queue);
}
cond_resched(); cond_resched();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment