Commit 4b9c7768 authored by David S. Miller's avatar David S. Miller

Merge branch 'sched-refactor-NOLOCK-qdiscs'

Paolo Abeni says:

====================
sched: refactor NOLOCK qdiscs

With the introduction of NOLOCK qdiscs, pfifo_fast performances in the
uncontended scenario degraded measurably, especially after the commit
eb82a994 ("net: sched, fix OOO packets with pfifo_fast").

This series restore the pfifo_fast performances in such scenario back the
previous level, mainly reducing the number of atomic operations required to
perform the qdisc_run() call. Even performances in the contended scenario
increase measurably.

Note: This series is on top of:

sched: manipulate __QDISC_STATE_RUNNING in qdisc_run_* helpers
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b9f672af 021a17ed
......@@ -97,6 +97,11 @@ static inline bool skb_array_empty_any(struct skb_array *a)
return ptr_ring_empty_any(&a->ring);
}
static inline struct sk_buff *__skb_array_consume(struct skb_array *a)
{
return __ptr_ring_consume(&a->ring);
}
static inline struct sk_buff *skb_array_consume(struct skb_array *a)
{
return ptr_ring_consume(&a->ring);
......
......@@ -30,7 +30,6 @@ struct qdisc_rate_table {
enum qdisc_state_t {
__QDISC_STATE_SCHED,
__QDISC_STATE_DEACTIVATED,
__QDISC_STATE_RUNNING,
};
struct qdisc_size_table {
......@@ -102,6 +101,7 @@ struct Qdisc {
refcount_t refcnt;
spinlock_t busylock ____cacheline_aligned_in_smp;
spinlock_t seqlock;
};
static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
......@@ -111,17 +111,17 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
refcount_inc(&qdisc->refcnt);
}
static inline bool qdisc_is_running(const struct Qdisc *qdisc)
static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK)
return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
return spin_is_locked(&qdisc->seqlock);
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
if (test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state))
if (!spin_trylock(&qdisc->seqlock))
return false;
} else if (qdisc_is_running(qdisc)) {
return false;
......@@ -138,7 +138,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
{
write_seqcount_end(&qdisc->running);
if (qdisc->flags & TCQ_F_NOLOCK)
clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
spin_unlock(&qdisc->seqlock);
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
......
......@@ -656,7 +656,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (__skb_array_empty(q))
continue;
skb = skb_array_consume_bh(q);
skb = __skb_array_consume(q);
}
if (likely(skb)) {
qdisc_qstats_cpu_backlog_dec(qdisc, skb);
......@@ -697,7 +697,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
if (!q->ring.queue)
continue;
while ((skb = skb_array_consume_bh(q)) != NULL)
while ((skb = __skb_array_consume(q)) != NULL)
kfree_skb(skb);
}
......@@ -858,6 +858,11 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
seqcount_init(&sch->running);
lockdep_set_class(&sch->running,
dev->qdisc_running_key ?: &qdisc_running_key);
......@@ -1097,6 +1102,10 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
bool nolock = qdisc->flags & TCQ_F_NOLOCK;
if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
if (!(qdisc->flags & TCQ_F_BUILTIN))
......@@ -1106,6 +1115,8 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment