Commit c7cba832 authored by David S. Miller's avatar David S. Miller

Merge branch 'net_sched-allow-use-of-hrtimer-slack'

Eric Dumazet says:

====================
net_sched: allow use of hrtimer slack

Packet schedulers have used hrtimers with exact expiry times.

Some of them can afford having a slack, in order to reduce
the number of timer interrupts and feed bigger batches
to increase efficiency.

FQ for example does not care if throttled packets are
sent with an additional (small) delay.

Original observation of having maybe too many interrupts
was made by Willem de Bruijn.

v2: added strict netlink checking (Jakub Kicinski)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 24ee8651 583396f4
...@@ -75,7 +75,15 @@ struct qdisc_watchdog { ...@@ -75,7 +75,15 @@ struct qdisc_watchdog {
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
clockid_t clockid); clockid_t clockid);
void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
u64 delta_ns);
static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd,
u64 expires)
{
return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL);
}
static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
psched_time_t expires) psched_time_t expires)
......
...@@ -911,6 +911,8 @@ enum { ...@@ -911,6 +911,8 @@ enum {
TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
TCA_FQ_TIMER_SLACK, /* timer slack */
__TCA_FQ_MAX __TCA_FQ_MAX
}; };
......
...@@ -618,21 +618,28 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) ...@@ -618,21 +618,28 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
} }
EXPORT_SYMBOL(qdisc_watchdog_init); EXPORT_SYMBOL(qdisc_watchdog_init);
void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
u64 delta_ns)
{ {
if (test_bit(__QDISC_STATE_DEACTIVATED, if (test_bit(__QDISC_STATE_DEACTIVATED,
&qdisc_root_sleeping(wd->qdisc)->state)) &qdisc_root_sleeping(wd->qdisc)->state))
return; return;
if (wd->last_expires == expires) if (hrtimer_is_queued(&wd->timer)) {
return; /* If timer is already set in [expires, expires + delta_ns],
* do not reprogram it.
*/
if (wd->last_expires - expires <= delta_ns)
return;
}
wd->last_expires = expires; wd->last_expires = expires;
hrtimer_start(&wd->timer, hrtimer_start_range_ns(&wd->timer,
ns_to_ktime(expires), ns_to_ktime(expires),
HRTIMER_MODE_ABS_PINNED); delta_ns,
HRTIMER_MODE_ABS_PINNED);
} }
EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
{ {
......
...@@ -121,6 +121,8 @@ struct fq_sched_data { ...@@ -121,6 +121,8 @@ struct fq_sched_data {
u64 stat_flows_plimit; u64 stat_flows_plimit;
u64 stat_pkts_too_long; u64 stat_pkts_too_long;
u64 stat_allocation_errors; u64 stat_allocation_errors;
u32 timer_slack; /* hrtimer slack in ns */
struct qdisc_watchdog watchdog; struct qdisc_watchdog watchdog;
}; };
...@@ -504,8 +506,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -504,8 +506,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
head = &q->old_flows; head = &q->old_flows;
if (!head->first) { if (!head->first) {
if (q->time_next_delayed_flow != ~0ULL) if (q->time_next_delayed_flow != ~0ULL)
qdisc_watchdog_schedule_ns(&q->watchdog, qdisc_watchdog_schedule_range_ns(&q->watchdog,
q->time_next_delayed_flow); q->time_next_delayed_flow,
q->timer_slack);
return NULL; return NULL;
} }
} }
...@@ -735,6 +738,8 @@ static int fq_resize(struct Qdisc *sch, u32 log) ...@@ -735,6 +738,8 @@ static int fq_resize(struct Qdisc *sch, u32 log)
} }
static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK },
[TCA_FQ_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_PLIMIT] = { .type = NLA_U32 },
[TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
[TCA_FQ_QUANTUM] = { .type = NLA_U32 }, [TCA_FQ_QUANTUM] = { .type = NLA_U32 },
...@@ -747,6 +752,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { ...@@ -747,6 +752,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
}; };
static int fq_change(struct Qdisc *sch, struct nlattr *opt, static int fq_change(struct Qdisc *sch, struct nlattr *opt,
...@@ -833,6 +839,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, ...@@ -833,6 +839,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
q->ce_threshold = (u64)NSEC_PER_USEC * q->ce_threshold = (u64)NSEC_PER_USEC *
nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]); nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
if (tb[TCA_FQ_TIMER_SLACK])
q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
if (!err) { if (!err) {
sch_tree_unlock(sch); sch_tree_unlock(sch);
err = fq_resize(sch, fq_log); err = fq_resize(sch, fq_log);
...@@ -884,6 +893,8 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, ...@@ -884,6 +893,8 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->orphan_mask = 1024 - 1; q->orphan_mask = 1024 - 1;
q->low_rate_threshold = 550000 / 8; q->low_rate_threshold = 550000 / 8;
q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
/* Default ce_threshold of 4294 seconds */ /* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
...@@ -924,7 +935,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) ...@@ -924,7 +935,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
q->low_rate_threshold) || q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
goto nla_put_failure; goto nla_put_failure;
return nla_nest_end(skb, opts); return nla_nest_end(skb, opts);
...@@ -947,7 +959,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) ...@@ -947,7 +959,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.flows_plimit = q->stat_flows_plimit; st.flows_plimit = q->stat_flows_plimit;
st.pkts_too_long = q->stat_pkts_too_long; st.pkts_too_long = q->stat_pkts_too_long;
st.allocation_errors = q->stat_allocation_errors; st.allocation_errors = q->stat_allocation_errors;
st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack -
ktime_get_ns();
st.flows = q->flows; st.flows = q->flows;
st.inactive_flows = q->inactive_flows; st.inactive_flows = q->inactive_flows;
st.throttled_flows = q->throttled_flows; st.throttled_flows = q->throttled_flows;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment