Commit 8596e589 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'timers-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer updates from Thomas Gleixner:
 "Updates for timekeeping, timers and related drivers:

  Core code:

   - Cure a couple of correctness issues in the posix CPU timer code to
     prevent that the tick dependency for NOHZ full is kept alive for no
     reason.

   - Avoid expensive double reprogramming of the clockevent device in
     hrtimer_start_range_ns().

   - Avoid pointless SMP function calls when the clock was set to avoid
     disturbing CPUs which do not have any affected timers queued.

   - Make the clocksource watchdog test work correctly when CONFIG_HZ is
     less than 100.

  Drivers:

   - Prefer the ARM architected timer over the Exynos timer which is way
     more expensive to access.

   - Add device tree bindings for new Ingenic SoCs

   - The usual improvements and cleanups all over the place"

* tag 'timers-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits)
  clocksource: Make clocksource watchdog test safe for slow-HZ systems
  dt-bindings: timer: Add ABIs for new Ingenic SoCs
  clocksource/drivers/fttmr010: Pass around less pointers
  clocksource/drivers/mediatek: Optimize systimer irq clear flow on shutdown
  clocksource/drivers/ingenic: Use bitfield macro helpers
  clocksource/drivers/sh_cmt: Fix wrong setting if don't request IRQ for clock source channel
  dt-bindings: timer: convert rockchip,rk-timer.txt to YAML
  clocksource/drivers/exynos_mct: Mark MCT device as CLOCK_EVT_FEAT_PERCPU
  clocksource/drivers/exynos_mct: Prioritise Arm arch timer on arm64
  hrtimer: Unbreak hrtimer_force_reprogram()
  hrtimer: Use raw_cpu_ptr() in clock_was_set()
  hrtimer: Avoid more SMP function calls in clock_was_set()
  hrtimer: Avoid unnecessary SMP function calls in clock_was_set()
  hrtimer: Add bases argument to clock_was_set()
  time/timekeeping: Avoid invoking clock_was_set() twice
  timekeeping: Distangle resume and clock-was-set events
  timerfd: Provide timerfd_resume()
  hrtimer: Force clock_was_set() handling for the HIGHRES=n, NOHZ=y case
  hrtimer: Ensure timerfd notification for HIGHRES=n
  hrtimer: Consolidate reprogramming code
  ...
parents bed91667 d25a0252
Rockchip rk timer
Required properties:
- compatible: should be:
"rockchip,rv1108-timer", "rockchip,rk3288-timer": for Rockchip RV1108
"rockchip,rk3036-timer", "rockchip,rk3288-timer": for Rockchip RK3036
"rockchip,rk3066-timer", "rockchip,rk3288-timer": for Rockchip RK3066
"rockchip,rk3188-timer", "rockchip,rk3288-timer": for Rockchip RK3188
"rockchip,rk3228-timer", "rockchip,rk3288-timer": for Rockchip RK3228
"rockchip,rk3229-timer", "rockchip,rk3288-timer": for Rockchip RK3229
"rockchip,rk3288-timer": for Rockchip RK3288
"rockchip,rk3368-timer", "rockchip,rk3288-timer": for Rockchip RK3368
"rockchip,rk3399-timer": for Rockchip RK3399
- reg: base address of the timer register starting with TIMERS CONTROL register
- interrupts: should contain the interrupts for Timer0
- clocks : must contain an entry for each entry in clock-names
- clock-names : must include the following entries:
"timer", "pclk"
Example:
timer: timer@ff810000 {
compatible = "rockchip,rk3288-timer";
reg = <0xff810000 0x20>;
interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&xin24m>, <&cru PCLK_TIMER>;
clock-names = "timer", "pclk";
};
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
$id: http://devicetree.org/schemas/timer/rockchip,rk-timer.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Rockchip Timer Device Tree Bindings
maintainers:
- Daniel Lezcano <daniel.lezcano@linaro.org>
properties:
compatible:
oneOf:
- const: rockchip,rk3288-timer
- const: rockchip,rk3399-timer
- items:
- enum:
- rockchip,rv1108-timer
- rockchip,rk3036-timer
- rockchip,rk3066-timer
- rockchip,rk3188-timer
- rockchip,rk3228-timer
- rockchip,rk3229-timer
- rockchip,rk3288-timer
- rockchip,rk3368-timer
- rockchip,px30-timer
- const: rockchip,rk3288-timer
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
minItems: 2
maxItems: 2
clock-names:
items:
- const: pclk
- const: timer
required:
- compatible
- reg
- interrupts
- clocks
- clock-names
additionalProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/clock/rk3288-cru.h>
timer: timer@ff810000 {
compatible = "rockchip,rk3288-timer";
reg = <0xff810000 0x20>;
interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru PCLK_TIMER>, <&xin24m>;
clock-names = "pclk", "timer";
};
...@@ -51,6 +51,15 @@ ...@@ -51,6 +51,15 @@
#define TICK_BASE_CNT 1 #define TICK_BASE_CNT 1
#ifdef CONFIG_ARM
/* Use values higher than ARM arch timer. See 6282edb72bed. */
#define MCT_CLKSOURCE_RATING 450
#define MCT_CLKEVENTS_RATING 500
#else
#define MCT_CLKSOURCE_RATING 350
#define MCT_CLKEVENTS_RATING 350
#endif
enum { enum {
MCT_INT_SPI, MCT_INT_SPI,
MCT_INT_PPI MCT_INT_PPI
...@@ -206,7 +215,7 @@ static void exynos4_frc_resume(struct clocksource *cs) ...@@ -206,7 +215,7 @@ static void exynos4_frc_resume(struct clocksource *cs)
static struct clocksource mct_frc = { static struct clocksource mct_frc = {
.name = "mct-frc", .name = "mct-frc",
.rating = 450, /* use value higher than ARM arch timer */ .rating = MCT_CLKSOURCE_RATING,
.read = exynos4_frc_read, .read = exynos4_frc_read,
.mask = CLOCKSOURCE_MASK(32), .mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS, .flags = CLOCK_SOURCE_IS_CONTINUOUS,
...@@ -456,8 +465,9 @@ static int exynos4_mct_starting_cpu(unsigned int cpu) ...@@ -456,8 +465,9 @@ static int exynos4_mct_starting_cpu(unsigned int cpu)
evt->set_state_oneshot = set_state_shutdown; evt->set_state_oneshot = set_state_shutdown;
evt->set_state_oneshot_stopped = set_state_shutdown; evt->set_state_oneshot_stopped = set_state_shutdown;
evt->tick_resume = set_state_shutdown; evt->tick_resume = set_state_shutdown;
evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
evt->rating = 500; /* use value higher than ARM arch timer */ CLOCK_EVT_FEAT_PERCPU;
evt->rating = MCT_CLKEVENTS_RATING,
exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* Copyright (c) 2020 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com> * Copyright (c) 2020 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
*/ */
#include <linux/bitfield.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/clk.h> #include <linux/clk.h>
#include <linux/clk-provider.h> #include <linux/clk-provider.h>
...@@ -34,8 +35,6 @@ ...@@ -34,8 +35,6 @@
/* bits within the OSTCCR register */ /* bits within the OSTCCR register */
#define OSTCCR_PRESCALE1_MASK 0x3 #define OSTCCR_PRESCALE1_MASK 0x3
#define OSTCCR_PRESCALE2_MASK 0xc #define OSTCCR_PRESCALE2_MASK 0xc
#define OSTCCR_PRESCALE1_LSB 0
#define OSTCCR_PRESCALE2_LSB 2
/* bits within the OSTCR register */ /* bits within the OSTCR register */
#define OSTCR_OST1CLR BIT(0) #define OSTCR_OST1CLR BIT(0)
...@@ -98,7 +97,7 @@ static unsigned long ingenic_ost_percpu_timer_recalc_rate(struct clk_hw *hw, ...@@ -98,7 +97,7 @@ static unsigned long ingenic_ost_percpu_timer_recalc_rate(struct clk_hw *hw,
prescale = readl(ost_clk->ost->base + info->ostccr_reg); prescale = readl(ost_clk->ost->base + info->ostccr_reg);
prescale = (prescale & OSTCCR_PRESCALE1_MASK) >> OSTCCR_PRESCALE1_LSB; prescale = FIELD_GET(OSTCCR_PRESCALE1_MASK, prescale);
return parent_rate >> (prescale * 2); return parent_rate >> (prescale * 2);
} }
...@@ -112,7 +111,7 @@ static unsigned long ingenic_ost_global_timer_recalc_rate(struct clk_hw *hw, ...@@ -112,7 +111,7 @@ static unsigned long ingenic_ost_global_timer_recalc_rate(struct clk_hw *hw,
prescale = readl(ost_clk->ost->base + info->ostccr_reg); prescale = readl(ost_clk->ost->base + info->ostccr_reg);
prescale = (prescale & OSTCCR_PRESCALE2_MASK) >> OSTCCR_PRESCALE2_LSB; prescale = FIELD_GET(OSTCCR_PRESCALE2_MASK, prescale);
return parent_rate >> (prescale * 2); return parent_rate >> (prescale * 2);
} }
...@@ -151,7 +150,8 @@ static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long re ...@@ -151,7 +150,8 @@ static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long re
int val; int val;
val = readl(ost_clk->ost->base + info->ostccr_reg); val = readl(ost_clk->ost->base + info->ostccr_reg);
val = (val & ~OSTCCR_PRESCALE1_MASK) | (prescale << OSTCCR_PRESCALE1_LSB); val &= ~OSTCCR_PRESCALE1_MASK;
val |= FIELD_PREP(OSTCCR_PRESCALE1_MASK, prescale);
writel(val, ost_clk->ost->base + info->ostccr_reg); writel(val, ost_clk->ost->base + info->ostccr_reg);
return 0; return 0;
...@@ -166,7 +166,8 @@ static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long re ...@@ -166,7 +166,8 @@ static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long re
int val; int val;
val = readl(ost_clk->ost->base + info->ostccr_reg); val = readl(ost_clk->ost->base + info->ostccr_reg);
val = (val & ~OSTCCR_PRESCALE2_MASK) | (prescale << OSTCCR_PRESCALE2_LSB); val &= ~OSTCCR_PRESCALE2_MASK;
val |= FIELD_PREP(OSTCCR_PRESCALE2_MASK, prescale);
writel(val, ost_clk->ost->base + info->ostccr_reg); writel(val, ost_clk->ost->base + info->ostccr_reg);
return 0; return 0;
......
...@@ -579,7 +579,8 @@ static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag) ...@@ -579,7 +579,8 @@ static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag)
ch->flags |= flag; ch->flags |= flag;
/* setup timeout if no clockevent */ /* setup timeout if no clockevent */
if ((flag == FLAG_CLOCKSOURCE) && (!(ch->flags & FLAG_CLOCKEVENT))) if (ch->cmt->num_channels == 1 &&
flag == FLAG_CLOCKSOURCE && (!(ch->flags & FLAG_CLOCKEVENT)))
__sh_cmt_set_next(ch, ch->max_match_value); __sh_cmt_set_next(ch, ch->max_match_value);
out: out:
raw_spin_unlock_irqrestore(&ch->lock, flags); raw_spin_unlock_irqrestore(&ch->lock, flags);
...@@ -621,8 +622,10 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs) ...@@ -621,8 +622,10 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs)
static u64 sh_cmt_clocksource_read(struct clocksource *cs) static u64 sh_cmt_clocksource_read(struct clocksource *cs)
{ {
struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); struct sh_cmt_channel *ch = cs_to_sh_cmt(cs);
unsigned long flags;
u32 has_wrapped; u32 has_wrapped;
if (ch->cmt->num_channels == 1) {
unsigned long flags;
u64 value; u64 value;
u32 raw; u32 raw;
...@@ -635,6 +638,9 @@ static u64 sh_cmt_clocksource_read(struct clocksource *cs) ...@@ -635,6 +638,9 @@ static u64 sh_cmt_clocksource_read(struct clocksource *cs)
raw_spin_unlock_irqrestore(&ch->lock, flags); raw_spin_unlock_irqrestore(&ch->lock, flags);
return value + raw; return value + raw;
}
return sh_cmt_get_counter(ch, &has_wrapped);
} }
static int sh_cmt_clocksource_enable(struct clocksource *cs) static int sh_cmt_clocksource_enable(struct clocksource *cs)
...@@ -697,7 +703,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch, ...@@ -697,7 +703,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch,
cs->disable = sh_cmt_clocksource_disable; cs->disable = sh_cmt_clocksource_disable;
cs->suspend = sh_cmt_clocksource_suspend; cs->suspend = sh_cmt_clocksource_suspend;
cs->resume = sh_cmt_clocksource_resume; cs->resume = sh_cmt_clocksource_resume;
cs->mask = CLOCKSOURCE_MASK(sizeof(u64) * 8); cs->mask = CLOCKSOURCE_MASK(ch->cmt->info->width);
cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n", dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n",
......
...@@ -271,9 +271,7 @@ static irqreturn_t ast2600_timer_interrupt(int irq, void *dev_id) ...@@ -271,9 +271,7 @@ static irqreturn_t ast2600_timer_interrupt(int irq, void *dev_id)
} }
static int __init fttmr010_common_init(struct device_node *np, static int __init fttmr010_common_init(struct device_node *np,
bool is_aspeed, bool is_aspeed, bool is_ast2600)
int (*timer_shutdown)(struct clock_event_device *),
irq_handler_t irq_handler)
{ {
struct fttmr010 *fttmr010; struct fttmr010 *fttmr010;
int irq; int irq;
...@@ -374,8 +372,6 @@ static int __init fttmr010_common_init(struct device_node *np, ...@@ -374,8 +372,6 @@ static int __init fttmr010_common_init(struct device_node *np,
fttmr010->tick_rate); fttmr010->tick_rate);
} }
fttmr010->timer_shutdown = timer_shutdown;
/* /*
* Setup clockevent timer (interrupt-driven) on timer 1. * Setup clockevent timer (interrupt-driven) on timer 1.
*/ */
...@@ -383,8 +379,18 @@ static int __init fttmr010_common_init(struct device_node *np, ...@@ -383,8 +379,18 @@ static int __init fttmr010_common_init(struct device_node *np,
writel(0, fttmr010->base + TIMER1_LOAD); writel(0, fttmr010->base + TIMER1_LOAD);
writel(0, fttmr010->base + TIMER1_MATCH1); writel(0, fttmr010->base + TIMER1_MATCH1);
writel(0, fttmr010->base + TIMER1_MATCH2); writel(0, fttmr010->base + TIMER1_MATCH2);
ret = request_irq(irq, irq_handler, IRQF_TIMER,
"FTTMR010-TIMER1", &fttmr010->clkevt); if (is_ast2600) {
fttmr010->timer_shutdown = ast2600_timer_shutdown;
ret = request_irq(irq, ast2600_timer_interrupt,
IRQF_TIMER, "FTTMR010-TIMER1",
&fttmr010->clkevt);
} else {
fttmr010->timer_shutdown = fttmr010_timer_shutdown;
ret = request_irq(irq, fttmr010_timer_interrupt,
IRQF_TIMER, "FTTMR010-TIMER1",
&fttmr010->clkevt);
}
if (ret) { if (ret) {
pr_err("FTTMR010-TIMER1 no IRQ\n"); pr_err("FTTMR010-TIMER1 no IRQ\n");
goto out_unmap; goto out_unmap;
...@@ -432,23 +438,17 @@ static int __init fttmr010_common_init(struct device_node *np, ...@@ -432,23 +438,17 @@ static int __init fttmr010_common_init(struct device_node *np,
static __init int ast2600_timer_init(struct device_node *np) static __init int ast2600_timer_init(struct device_node *np)
{ {
return fttmr010_common_init(np, true, return fttmr010_common_init(np, true, true);
ast2600_timer_shutdown,
ast2600_timer_interrupt);
} }
static __init int aspeed_timer_init(struct device_node *np) static __init int aspeed_timer_init(struct device_node *np)
{ {
return fttmr010_common_init(np, true, return fttmr010_common_init(np, true, false);
fttmr010_timer_shutdown,
fttmr010_timer_interrupt);
} }
static __init int fttmr010_timer_init(struct device_node *np) static __init int fttmr010_timer_init(struct device_node *np)
{ {
return fttmr010_common_init(np, false, return fttmr010_common_init(np, false, false);
fttmr010_timer_shutdown,
fttmr010_timer_interrupt);
} }
TIMER_OF_DECLARE(fttmr010, "faraday,fttmr010", fttmr010_timer_init); TIMER_OF_DECLARE(fttmr010, "faraday,fttmr010", fttmr010_timer_init);
......
...@@ -60,9 +60,9 @@ ...@@ -60,9 +60,9 @@
* SYST_CON_EN: Clock enable. Shall be set to * SYST_CON_EN: Clock enable. Shall be set to
* - Start timer countdown. * - Start timer countdown.
* - Allow timeout ticks being updated. * - Allow timeout ticks being updated.
* - Allow changing interrupt functions. * - Allow changing interrupt status,like clear irq pending.
* *
* SYST_CON_IRQ_EN: Set to allow interrupt. * SYST_CON_IRQ_EN: Set to enable interrupt.
* *
* SYST_CON_IRQ_CLR: Set to clear interrupt. * SYST_CON_IRQ_CLR: Set to clear interrupt.
*/ */
...@@ -75,6 +75,7 @@ static void __iomem *gpt_sched_reg __read_mostly; ...@@ -75,6 +75,7 @@ static void __iomem *gpt_sched_reg __read_mostly;
static void mtk_syst_ack_irq(struct timer_of *to) static void mtk_syst_ack_irq(struct timer_of *to)
{ {
/* Clear and disable interrupt */ /* Clear and disable interrupt */
writel(SYST_CON_EN, SYST_CON_REG(to));
writel(SYST_CON_IRQ_CLR | SYST_CON_EN, SYST_CON_REG(to)); writel(SYST_CON_IRQ_CLR | SYST_CON_EN, SYST_CON_REG(to));
} }
...@@ -111,6 +112,9 @@ static int mtk_syst_clkevt_next_event(unsigned long ticks, ...@@ -111,6 +112,9 @@ static int mtk_syst_clkevt_next_event(unsigned long ticks,
static int mtk_syst_clkevt_shutdown(struct clock_event_device *clkevt) static int mtk_syst_clkevt_shutdown(struct clock_event_device *clkevt)
{ {
/* Clear any irq */
mtk_syst_ack_irq(to_timer_of(clkevt));
/* Disable timer */ /* Disable timer */
writel(0, SYST_CON_REG(to_timer_of(clkevt))); writel(0, SYST_CON_REG(to_timer_of(clkevt)));
......
...@@ -115,6 +115,22 @@ void timerfd_clock_was_set(void) ...@@ -115,6 +115,22 @@ void timerfd_clock_was_set(void)
rcu_read_unlock(); rcu_read_unlock();
} }
static void timerfd_resume_work(struct work_struct *work)
{
timerfd_clock_was_set();
}
static DECLARE_WORK(timerfd_work, timerfd_resume_work);
/*
* Invoked from timekeeping_resume(). Defer the actual update to work so
* timerfd_clock_was_set() runs in task context.
*/
void timerfd_resume(void)
{
schedule_work(&timerfd_work);
}
static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
{ {
if (ctx->might_cancel) { if (ctx->might_cancel) {
......
...@@ -13,4 +13,23 @@ ...@@ -13,4 +13,23 @@
#define OST_CLK_PERCPU_TIMER2 3 #define OST_CLK_PERCPU_TIMER2 3
#define OST_CLK_PERCPU_TIMER3 4 #define OST_CLK_PERCPU_TIMER3 4
#define OST_CLK_EVENT_TIMER 1
#define OST_CLK_EVENT_TIMER0 0
#define OST_CLK_EVENT_TIMER1 1
#define OST_CLK_EVENT_TIMER2 2
#define OST_CLK_EVENT_TIMER3 3
#define OST_CLK_EVENT_TIMER4 4
#define OST_CLK_EVENT_TIMER5 5
#define OST_CLK_EVENT_TIMER6 6
#define OST_CLK_EVENT_TIMER7 7
#define OST_CLK_EVENT_TIMER8 8
#define OST_CLK_EVENT_TIMER9 9
#define OST_CLK_EVENT_TIMER10 10
#define OST_CLK_EVENT_TIMER11 11
#define OST_CLK_EVENT_TIMER12 12
#define OST_CLK_EVENT_TIMER13 13
#define OST_CLK_EVENT_TIMER14 14
#define OST_CLK_EVENT_TIMER15 15
#endif /* __DT_BINDINGS_CLOCK_INGENIC_OST_H__ */ #endif /* __DT_BINDINGS_CLOCK_INGENIC_OST_H__ */
...@@ -318,16 +318,12 @@ struct clock_event_device; ...@@ -318,16 +318,12 @@ struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev); extern void hrtimer_interrupt(struct clock_event_device *dev);
extern void clock_was_set_delayed(void);
extern unsigned int hrtimer_resolution; extern unsigned int hrtimer_resolution;
#else #else
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC #define hrtimer_resolution (unsigned int)LOW_RES_NSEC
static inline void clock_was_set_delayed(void) { }
#endif #endif
static inline ktime_t static inline ktime_t
...@@ -351,13 +347,13 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) ...@@ -351,13 +347,13 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
timer->base->get_time()); timer->base->get_time());
} }
extern void clock_was_set(void);
#ifdef CONFIG_TIMERFD #ifdef CONFIG_TIMERFD
extern void timerfd_clock_was_set(void); extern void timerfd_clock_was_set(void);
extern void timerfd_resume(void);
#else #else
static inline void timerfd_clock_was_set(void) { } static inline void timerfd_clock_was_set(void) { }
static inline void timerfd_resume(void) { }
#endif #endif
extern void hrtimers_resume(void);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device); DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
......
...@@ -82,12 +82,19 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head, ...@@ -82,12 +82,19 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
return timerqueue_add(head, &ctmr->node); return timerqueue_add(head, &ctmr->node);
} }
static inline void cpu_timer_dequeue(struct cpu_timer *ctmr) static inline bool cpu_timer_queued(struct cpu_timer *ctmr)
{ {
if (ctmr->head) { return !!ctmr->head;
}
static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr)
{
if (cpu_timer_queued(ctmr)) {
timerqueue_del(ctmr->head, &ctmr->node); timerqueue_del(ctmr->head, &ctmr->node);
ctmr->head = NULL; ctmr->head = NULL;
return true;
} }
return false;
} }
static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr)
......
...@@ -714,6 +714,12 @@ static inline void unlock_task_sighand(struct task_struct *task, ...@@ -714,6 +714,12 @@ static inline void unlock_task_sighand(struct task_struct *task,
spin_unlock_irqrestore(&task->sighand->siglock, *flags); spin_unlock_irqrestore(&task->sighand->siglock, *flags);
} }
#ifdef CONFIG_LOCKDEP
extern void lockdep_assert_task_sighand_held(struct task_struct *task);
#else
static inline void lockdep_assert_task_sighand_held(struct task_struct *task) { }
#endif
static inline unsigned long task_rlimit(const struct task_struct *task, static inline unsigned long task_rlimit(const struct task_struct *task,
unsigned int limit) unsigned int limit)
{ {
......
...@@ -1413,6 +1413,21 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, ...@@ -1413,6 +1413,21 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
return sighand; return sighand;
} }
#ifdef CONFIG_LOCKDEP
void lockdep_assert_task_sighand_held(struct task_struct *task)
{
struct sighand_struct *sighand;
rcu_read_lock();
sighand = rcu_dereference(task->sighand);
if (sighand)
lockdep_assert_held(&sighand->siglock);
else
WARN_ON_ONCE(1);
rcu_read_unlock();
}
#endif
/* /*
* send signal info to all the members of a group * send signal info to all the members of a group
*/ */
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include <linux/prandom.h> #include <linux/prandom.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include "tick-internal.h"
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>"); MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
...@@ -34,9 +36,6 @@ static u64 wdtest_jiffies_read(struct clocksource *cs) ...@@ -34,9 +36,6 @@ static u64 wdtest_jiffies_read(struct clocksource *cs)
return (u64)jiffies; return (u64)jiffies;
} }
/* Assume HZ > 100. */
#define JIFFIES_SHIFT 8
static struct clocksource clocksource_wdtest_jiffies = { static struct clocksource clocksource_wdtest_jiffies = {
.name = "wdtest-jiffies", .name = "wdtest-jiffies",
.rating = 1, /* lowest valid rating*/ .rating = 1, /* lowest valid rating*/
......
...@@ -306,12 +306,12 @@ void clocksource_verify_percpu(struct clocksource *cs) ...@@ -306,12 +306,12 @@ void clocksource_verify_percpu(struct clocksource *cs)
return; return;
cpumask_clear(&cpus_ahead); cpumask_clear(&cpus_ahead);
cpumask_clear(&cpus_behind); cpumask_clear(&cpus_behind);
get_online_cpus(); cpus_read_lock();
preempt_disable(); preempt_disable();
clocksource_verify_choose_cpus(); clocksource_verify_choose_cpus();
if (cpumask_weight(&cpus_chosen) == 0) { if (cpumask_weight(&cpus_chosen) == 0) {
preempt_enable(); preempt_enable();
put_online_cpus(); cpus_read_unlock();
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
return; return;
} }
...@@ -337,7 +337,7 @@ void clocksource_verify_percpu(struct clocksource *cs) ...@@ -337,7 +337,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
cs_nsec_min = cs_nsec; cs_nsec_min = cs_nsec;
} }
preempt_enable(); preempt_enable();
put_online_cpus(); cpus_read_unlock();
if (!cpumask_empty(&cpus_ahead)) if (!cpumask_empty(&cpus_ahead))
pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n", pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
cpumask_pr_args(&cpus_ahead), testcpu, cs->name); cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
......
...@@ -652,21 +652,10 @@ static inline int hrtimer_hres_active(void) ...@@ -652,21 +652,10 @@ static inline int hrtimer_hres_active(void)
return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
} }
/* static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
* Reprogram the event source with checking both queues for the struct hrtimer *next_timer,
* next event ktime_t expires_next)
* Called with interrupts disabled and base->lock held
*/
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{ {
ktime_t expires_next;
expires_next = hrtimer_update_next_event(cpu_base);
if (skip_equal && expires_next == cpu_base->expires_next)
return;
cpu_base->expires_next = expires_next; cpu_base->expires_next = expires_next;
/* /*
...@@ -689,7 +678,25 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) ...@@ -689,7 +678,25 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
return; return;
tick_program_event(cpu_base->expires_next, 1); tick_program_event(expires_next, 1);
}
/*
* Reprogram the event source with checking both queues for the
* next event
* Called with interrupts disabled and base->lock held
*/
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
ktime_t expires_next;
expires_next = hrtimer_update_next_event(cpu_base);
if (skip_equal && expires_next == cpu_base->expires_next)
return;
__hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
} }
/* High resolution timer related functions */ /* High resolution timer related functions */
...@@ -720,23 +727,7 @@ static inline int hrtimer_is_hres_enabled(void) ...@@ -720,23 +727,7 @@ static inline int hrtimer_is_hres_enabled(void)
return hrtimer_hres_enabled; return hrtimer_hres_enabled;
} }
/* static void retrigger_next_event(void *arg);
* Retrigger next event is called after clock was set
*
* Called with interrupts disabled via on_each_cpu()
*/
static void retrigger_next_event(void *arg)
{
struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
if (!__hrtimer_hres_active(base))
return;
raw_spin_lock(&base->lock);
hrtimer_update_base(base);
hrtimer_force_reprogram(base, 0);
raw_spin_unlock(&base->lock);
}
/* /*
* Switch to high resolution mode * Switch to high resolution mode
...@@ -758,29 +749,54 @@ static void hrtimer_switch_to_hres(void) ...@@ -758,29 +749,54 @@ static void hrtimer_switch_to_hres(void)
retrigger_next_event(NULL); retrigger_next_event(NULL);
} }
static void clock_was_set_work(struct work_struct *work) #else
{
clock_was_set();
}
static DECLARE_WORK(hrtimer_work, clock_was_set_work); static inline int hrtimer_is_hres_enabled(void) { return 0; }
static inline void hrtimer_switch_to_hres(void) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
/* /*
* Called from timekeeping and resume code to reprogram the hrtimer * Retrigger next event is called after clock was set with interrupts
* interrupt device on all cpus. * disabled through an SMP function call or directly from low level
* resume code.
*
* This is only invoked when:
* - CONFIG_HIGH_RES_TIMERS is enabled.
* - CONFIG_NOHZ_COMMON is enabled
*
* For the other cases this function is empty and because the call sites
* are optimized out it vanishes as well, i.e. no need for lots of
* #ifdeffery.
*/ */
void clock_was_set_delayed(void) static void retrigger_next_event(void *arg)
{ {
schedule_work(&hrtimer_work); struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
}
#else
static inline int hrtimer_is_hres_enabled(void) { return 0; } /*
static inline void hrtimer_switch_to_hres(void) { } * When high resolution mode or nohz is active, then the offsets of
static inline void retrigger_next_event(void *arg) { } * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
* next tick will take care of that.
*
* If high resolution mode is active then the next expiring timer
* must be reevaluated and the clock event device reprogrammed if
* necessary.
*
* In the NOHZ case the update of the offset and the reevaluation
* of the next expiring timer is enough. The return from the SMP
* function call will take care of the reprogramming in case the
* CPU was in a NOHZ idle sleep.
*/
if (!__hrtimer_hres_active(base) && !tick_nohz_active)
return;
#endif /* CONFIG_HIGH_RES_TIMERS */ raw_spin_lock(&base->lock);
hrtimer_update_base(base);
if (__hrtimer_hres_active(base))
hrtimer_force_reprogram(base, 0);
else
hrtimer_update_next_event(base);
raw_spin_unlock(&base->lock);
}
/* /*
* When a timer is enqueued and expires earlier than the already enqueued * When a timer is enqueued and expires earlier than the already enqueued
...@@ -835,75 +851,161 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) ...@@ -835,75 +851,161 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
if (base->cpu_base != cpu_base) if (base->cpu_base != cpu_base)
return; return;
if (expires >= cpu_base->expires_next)
return;
/* /*
* If the hrtimer interrupt is running, then it will * If the hrtimer interrupt is running, then it will reevaluate the
* reevaluate the clock bases and reprogram the clock event * clock bases and reprogram the clock event device.
* device. The callbacks are always executed in hard interrupt
* context so we don't need an extra check for a running
* callback.
*/ */
if (cpu_base->in_hrtirq) if (cpu_base->in_hrtirq)
return; return;
if (expires >= cpu_base->expires_next)
return;
/* Update the pointer to the next expiring timer */
cpu_base->next_timer = timer; cpu_base->next_timer = timer;
cpu_base->expires_next = expires;
__hrtimer_reprogram(cpu_base, timer, expires);
}
static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
unsigned int active)
{
struct hrtimer_clock_base *base;
unsigned int seq;
ktime_t expires;
/* /*
* If hres is not active, hardware does not have to be * Update the base offsets unconditionally so the following
* programmed yet. * checks whether the SMP function call is required works.
* *
* If a hang was detected in the last timer interrupt then we * The update is safe even when the remote CPU is in the hrtimer
* do not schedule a timer which is earlier than the expiry * interrupt or the hrtimer soft interrupt and expiring affected
* which we enforced in the hang detection. We want the system * bases. Either it will see the update before handling a base or
* to make progress. * it will see it when it finishes the processing and reevaluates
* the next expiring timer.
*/ */
if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) seq = cpu_base->clock_was_set_seq;
return; hrtimer_update_base(cpu_base);
/*
* If the sequence did not change over the update then the
* remote CPU already handled it.
*/
if (seq == cpu_base->clock_was_set_seq)
return false;
/*
* If the remote CPU is currently handling an hrtimer interrupt, it
* will reevaluate the first expiring timer of all clock bases
* before reprogramming. Nothing to do here.
*/
if (cpu_base->in_hrtirq)
return false;
/* /*
* Program the timer hardware. We enforce the expiry for * Walk the affected clock bases and check whether the first expiring
* events which are already in the past. * timer in a clock base is moving ahead of the first expiring timer of
* @cpu_base. If so, the IPI must be invoked because per CPU clock
* event devices cannot be remotely reprogrammed.
*/ */
tick_program_event(expires, 1); active &= cpu_base->active_bases;
for_each_active_base(base, cpu_base, active) {
struct timerqueue_node *next;
next = timerqueue_getnext(&base->active);
expires = ktime_sub(next->expires, base->offset);
if (expires < cpu_base->expires_next)
return true;
/* Extra check for softirq clock bases */
if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
continue;
if (cpu_base->softirq_activated)
continue;
if (expires < cpu_base->softirq_expires_next)
return true;
}
return false;
} }
/* /*
* Clock realtime was set * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
* * CLOCK_BOOTTIME (for late sleep time injection).
* Change the offset of the realtime clock vs. the monotonic
* clock.
* *
* We might have to reprogram the high resolution timer interrupt. On * This requires to update the offsets for these clocks
* SMP we call the architecture specific code to retrigger _all_ high * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
* resolution timer interrupts. On UP we just disable interrupts and * also requires to eventually reprogram the per CPU clock event devices
* call the high resolution interrupt code. * when the change moves an affected timer ahead of the first expiring
*/ * timer on that CPU. Obviously remote per CPU clock event devices cannot
void clock_was_set(void) * be reprogrammed. The other reason why an IPI has to be sent is when the
{ * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
#ifdef CONFIG_HIGH_RES_TIMERS * in the tick, which obviously might be stopped, so this has to bring out
/* Retrigger the CPU local events everywhere */ * the remote CPU which might sleep in idle to get this sorted.
*/
void clock_was_set(unsigned int bases)
{
struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
cpumask_var_t mask;
int cpu;
if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
goto out_timerfd;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
on_each_cpu(retrigger_next_event, NULL, 1); on_each_cpu(retrigger_next_event, NULL, 1);
#endif goto out_timerfd;
}
/* Avoid interrupting CPUs if possible */
cpus_read_lock();
for_each_online_cpu(cpu) {
unsigned long flags;
cpu_base = &per_cpu(hrtimer_bases, cpu);
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (update_needs_ipi(cpu_base, bases))
cpumask_set_cpu(cpu, mask);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
}
preempt_disable();
smp_call_function_many(mask, retrigger_next_event, NULL, 1);
preempt_enable();
cpus_read_unlock();
free_cpumask_var(mask);
out_timerfd:
timerfd_clock_was_set(); timerfd_clock_was_set();
} }
static void clock_was_set_work(struct work_struct *work)
{
clock_was_set(CLOCK_SET_WALL);
}
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
/*
* Called from timekeeping code to reprogram the hrtimer interrupt device
* on all cpus and to notify timerfd.
*/
void clock_was_set_delayed(void)
{
schedule_work(&hrtimer_work);
}
/* /*
* During resume we might have to reprogram the high resolution timer * Called during resume either directly from via timekeeping_resume()
* interrupt on all online CPUs. However, all other CPUs will be * or in the case of s2idle from tick_unfreeze() to ensure that the
* stopped with IRQs interrupts disabled so the clock_was_set() call * hrtimers are up to date.
* must be deferred.
*/ */
void hrtimers_resume(void) void hrtimers_resume_local(void)
{ {
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
/* Retrigger on the local CPU */ /* Retrigger on the local CPU */
retrigger_next_event(NULL); retrigger_next_event(NULL);
/* And schedule a retrigger for all others */
clock_was_set_delayed();
} }
/* /*
...@@ -1030,12 +1132,13 @@ static void __remove_hrtimer(struct hrtimer *timer, ...@@ -1030,12 +1132,13 @@ static void __remove_hrtimer(struct hrtimer *timer,
* remove hrtimer, called with base lock held * remove hrtimer, called with base lock held
*/ */
static inline int static inline int
remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
bool restart, bool keep_local)
{ {
u8 state = timer->state; u8 state = timer->state;
if (state & HRTIMER_STATE_ENQUEUED) { if (state & HRTIMER_STATE_ENQUEUED) {
int reprogram; bool reprogram;
/* /*
* Remove the timer and force reprogramming when high * Remove the timer and force reprogramming when high
...@@ -1048,8 +1151,16 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest ...@@ -1048,8 +1151,16 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
debug_deactivate(timer); debug_deactivate(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
/*
* If the timer is not restarted then reprogramming is
* required if the timer is local. If it is local and about
* to be restarted, avoid programming it twice (on removal
* and a moment later when it's requeued).
*/
if (!restart) if (!restart)
state = HRTIMER_STATE_INACTIVE; state = HRTIMER_STATE_INACTIVE;
else
reprogram &= !keep_local;
__remove_hrtimer(timer, base, state, reprogram); __remove_hrtimer(timer, base, state, reprogram);
return 1; return 1;
...@@ -1103,9 +1214,31 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, ...@@ -1103,9 +1214,31 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base) struct hrtimer_clock_base *base)
{ {
struct hrtimer_clock_base *new_base; struct hrtimer_clock_base *new_base;
bool force_local, first;
/*
* If the timer is on the local cpu base and is the first expiring
* timer then this might end up reprogramming the hardware twice
* (on removal and on enqueue). To avoid that by prevent the
* reprogram on removal, keep the timer local to the current CPU
* and enforce reprogramming after it is queued no matter whether
* it is the new first expiring timer again or not.
*/
force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
force_local &= base->cpu_base->next_timer == timer;
/* Remove an active timer from the queue: */ /*
remove_hrtimer(timer, base, true); * Remove an active timer from the queue. In case it is not queued
* on the current CPU, make sure that remove_hrtimer() updates the
* remote data correctly.
*
* If it's on the current CPU and the first expiring timer, then
* skip reprogramming, keep the timer local and enforce
* reprogramming later if it was the first expiring timer. This
* avoids programming the underlying clock event twice (once at
* removal and once after enqueue).
*/
remove_hrtimer(timer, base, true, force_local);
if (mode & HRTIMER_MODE_REL) if (mode & HRTIMER_MODE_REL)
tim = ktime_add_safe(tim, base->get_time()); tim = ktime_add_safe(tim, base->get_time());
...@@ -1115,9 +1248,24 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, ...@@ -1115,9 +1248,24 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
hrtimer_set_expires_range_ns(timer, tim, delta_ns); hrtimer_set_expires_range_ns(timer, tim, delta_ns);
/* Switch the timer base, if necessary: */ /* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); if (!force_local) {
new_base = switch_hrtimer_base(timer, base,
mode & HRTIMER_MODE_PINNED);
} else {
new_base = base;
}
first = enqueue_hrtimer(timer, new_base, mode);
if (!force_local)
return first;
return enqueue_hrtimer(timer, new_base, mode); /*
* Timer was forced to stay on the current CPU to avoid
* reprogramming on removal and enqueue. Force reprogram the
* hardware by evaluating the new first expiring timer.
*/
hrtimer_force_reprogram(new_base->cpu_base, 1);
return 0;
} }
/** /**
...@@ -1183,7 +1331,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) ...@@ -1183,7 +1331,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
base = lock_hrtimer_base(timer, &flags); base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer)) if (!hrtimer_callback_running(timer))
ret = remove_hrtimer(timer, base, false); ret = remove_hrtimer(timer, base, false, false);
unlock_hrtimer_base(timer, &flags); unlock_hrtimer_base(timer, &flags);
......
...@@ -10,28 +10,9 @@ ...@@ -10,28 +10,9 @@
#include <linux/init.h> #include <linux/init.h>
#include "timekeeping.h" #include "timekeeping.h"
#include "tick-internal.h"
/* Since jiffies uses a simple TICK_NSEC multiplier
* conversion, the .shift value could be zero. However
* this would make NTP adjustments impossible as they are
* in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
* shift both the nominator and denominator the same
* amount, and give ntp adjustments in units of 1/2^8
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. TICK_NSEC grows as HZ
* shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#if HZ < 34
#define JIFFIES_SHIFT 6
#elif HZ < 67
#define JIFFIES_SHIFT 7
#else
#define JIFFIES_SHIFT 8
#endif
static u64 jiffies_read(struct clocksource *cs) static u64 jiffies_read(struct clocksource *cs)
{ {
return (u64) jiffies; return (u64) jiffies;
......
...@@ -291,6 +291,8 @@ static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) ...@@ -291,6 +291,8 @@ static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
struct posix_cputimers *pct = &tsk->signal->posix_cputimers; struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
lockdep_assert_task_sighand_held(tsk);
/* Check if cputimer isn't running. This is accessed without locking. */ /* Check if cputimer isn't running. This is accessed without locking. */
if (!READ_ONCE(pct->timers_active)) { if (!READ_ONCE(pct->timers_active)) {
struct task_cputime sum; struct task_cputime sum;
...@@ -405,6 +407,55 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) ...@@ -405,6 +407,55 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
return 0; return 0;
} }
static struct posix_cputimer_base *timer_base(struct k_itimer *timer,
struct task_struct *tsk)
{
int clkidx = CPUCLOCK_WHICH(timer->it_clock);
if (CPUCLOCK_PERTHREAD(timer->it_clock))
return tsk->posix_cputimers.bases + clkidx;
else
return tsk->signal->posix_cputimers.bases + clkidx;
}
/*
* Force recalculating the base earliest expiration on the next tick.
* This will also re-evaluate the need to keep around the process wide
* cputime counter and tick dependency and eventually shut these down
* if necessary.
*/
static void trigger_base_recalc_expires(struct k_itimer *timer,
struct task_struct *tsk)
{
struct posix_cputimer_base *base = timer_base(timer, tsk);
base->nextevt = 0;
}
/*
* Dequeue the timer and reset the base if it was its earliest expiration.
* It makes sure the next tick recalculates the base next expiration so we
* don't keep the costly process wide cputime counter around for a random
* amount of time, along with the tick dependency.
*
* If another timer gets queued between this and the next tick, its
* expiration will update the base next event if necessary on the next
* tick.
*/
static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
{
struct cpu_timer *ctmr = &timer->it.cpu;
struct posix_cputimer_base *base;
if (!cpu_timer_dequeue(ctmr))
return;
base = timer_base(timer, p);
if (cpu_timer_getexpires(ctmr) == base->nextevt)
trigger_base_recalc_expires(timer, p);
}
/* /*
* Clean up a CPU-clock timer that is about to be destroyed. * Clean up a CPU-clock timer that is about to be destroyed.
* This is called from timer deletion with the timer already locked. * This is called from timer deletion with the timer already locked.
...@@ -439,7 +490,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer) ...@@ -439,7 +490,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
if (timer->it.cpu.firing) if (timer->it.cpu.firing)
ret = TIMER_RETRY; ret = TIMER_RETRY;
else else
cpu_timer_dequeue(ctmr); disarm_timer(timer, p);
unlock_task_sighand(p, &flags); unlock_task_sighand(p, &flags);
} }
...@@ -498,15 +549,9 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) ...@@ -498,15 +549,9 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
*/ */
static void arm_timer(struct k_itimer *timer, struct task_struct *p) static void arm_timer(struct k_itimer *timer, struct task_struct *p)
{ {
int clkidx = CPUCLOCK_WHICH(timer->it_clock); struct posix_cputimer_base *base = timer_base(timer, p);
struct cpu_timer *ctmr = &timer->it.cpu; struct cpu_timer *ctmr = &timer->it.cpu;
u64 newexp = cpu_timer_getexpires(ctmr); u64 newexp = cpu_timer_getexpires(ctmr);
struct posix_cputimer_base *base;
if (CPUCLOCK_PERTHREAD(timer->it_clock))
base = p->posix_cputimers.bases + clkidx;
else
base = p->signal->posix_cputimers.bases + clkidx;
if (!cpu_timer_enqueue(&base->tqhead, ctmr)) if (!cpu_timer_enqueue(&base->tqhead, ctmr))
return; return;
...@@ -703,7 +748,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ...@@ -703,7 +748,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
timer->it_overrun_last = 0; timer->it_overrun_last = 0;
timer->it_overrun = -1; timer->it_overrun = -1;
if (new_expires != 0 && !(val < new_expires)) { if (val >= new_expires) {
if (new_expires != 0) {
/* /*
* The designated time already passed, so we notify * The designated time already passed, so we notify
* immediately, even if the thread never runs to * immediately, even if the thread never runs to
...@@ -712,7 +758,19 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ...@@ -712,7 +758,19 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
cpu_timer_fire(timer); cpu_timer_fire(timer);
} }
ret = 0; /*
* Make sure we don't keep around the process wide cputime
* counter or the tick dependency if they are not necessary.
*/
sighand = lock_task_sighand(p, &flags);
if (!sighand)
goto out;
if (!cpu_timer_queued(ctmr))
trigger_base_recalc_expires(timer, p);
unlock_task_sighand(p, &flags);
}
out: out:
rcu_read_unlock(); rcu_read_unlock();
if (old) if (old)
...@@ -1346,8 +1404,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, ...@@ -1346,8 +1404,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
} }
} }
if (!*newval)
return;
*newval += now; *newval += now;
} }
......
...@@ -336,7 +336,7 @@ void posixtimer_rearm(struct kernel_siginfo *info) ...@@ -336,7 +336,7 @@ void posixtimer_rearm(struct kernel_siginfo *info)
int posix_timer_event(struct k_itimer *timr, int si_private) int posix_timer_event(struct k_itimer *timr, int si_private)
{ {
enum pid_type type; enum pid_type type;
int ret = -1; int ret;
/* /*
* FIXME: if ->sigq is queued we can race with * FIXME: if ->sigq is queued we can race with
* dequeue_signal()->posixtimer_rearm(). * dequeue_signal()->posixtimer_rearm().
......
...@@ -470,6 +470,13 @@ void tick_resume_local(void) ...@@ -470,6 +470,13 @@ void tick_resume_local(void)
else else
tick_resume_oneshot(); tick_resume_oneshot();
} }
/*
* Ensure that hrtimers are up to date and the clockevents device
* is reprogrammed correctly when high resolution timers are
* enabled.
*/
hrtimers_resume_local();
} }
/** /**
......
...@@ -165,3 +165,35 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); ...@@ -165,3 +165,35 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
void timer_clear_idle(void); void timer_clear_idle(void);
#define CLOCK_SET_WALL \
(BIT(HRTIMER_BASE_REALTIME) | BIT(HRTIMER_BASE_REALTIME_SOFT) | \
BIT(HRTIMER_BASE_TAI) | BIT(HRTIMER_BASE_TAI_SOFT))
#define CLOCK_SET_BOOT \
(BIT(HRTIMER_BASE_BOOTTIME) | BIT(HRTIMER_BASE_BOOTTIME_SOFT))
void clock_was_set(unsigned int bases);
void clock_was_set_delayed(void);
void hrtimers_resume_local(void);
/* Since jiffies uses a simple TICK_NSEC multiplier
* conversion, the .shift value could be zero. However
* this would make NTP adjustments impossible as they are
* in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
* shift both the nominator and denominator the same
* amount, and give ntp adjustments in units of 1/2^8
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. TICK_NSEC grows as HZ
* shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#if HZ < 34
#define JIFFIES_SHIFT 6
#elif HZ < 67
#define JIFFIES_SHIFT 7
#else
#define JIFFIES_SHIFT 8
#endif
...@@ -1323,8 +1323,8 @@ int do_settimeofday64(const struct timespec64 *ts) ...@@ -1323,8 +1323,8 @@ int do_settimeofday64(const struct timespec64 *ts)
write_seqcount_end(&tk_core.seq); write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags); raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* signal hrtimers about time change */ /* Signal hrtimers about time change */
clock_was_set(); clock_was_set(CLOCK_SET_WALL);
if (!ret) if (!ret)
audit_tk_injoffset(ts_delta); audit_tk_injoffset(ts_delta);
...@@ -1371,8 +1371,8 @@ static int timekeeping_inject_offset(const struct timespec64 *ts) ...@@ -1371,8 +1371,8 @@ static int timekeeping_inject_offset(const struct timespec64 *ts)
write_seqcount_end(&tk_core.seq); write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags); raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* signal hrtimers about time change */ /* Signal hrtimers about time change */
clock_was_set(); clock_was_set(CLOCK_SET_WALL);
return ret; return ret;
} }
...@@ -1746,8 +1746,8 @@ void timekeeping_inject_sleeptime64(const struct timespec64 *delta) ...@@ -1746,8 +1746,8 @@ void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
write_seqcount_end(&tk_core.seq); write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags); raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* signal hrtimers about time change */ /* Signal hrtimers about time change */
clock_was_set(); clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
} }
#endif #endif
...@@ -1810,8 +1810,10 @@ void timekeeping_resume(void) ...@@ -1810,8 +1810,10 @@ void timekeeping_resume(void)
touch_softlockup_watchdog(); touch_softlockup_watchdog();
/* Resume the clockevent device(s) and hrtimers */
tick_resume(); tick_resume();
hrtimers_resume(); /* Notify timerfd as resume is equivalent to clock_was_set() */
timerfd_resume();
} }
int timekeeping_suspend(void) int timekeeping_suspend(void)
...@@ -2125,7 +2127,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, ...@@ -2125,7 +2127,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
* timekeeping_advance - Updates the timekeeper to the current time and * timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length * current NTP tick length
*/ */
static void timekeeping_advance(enum timekeeping_adv_mode mode) static bool timekeeping_advance(enum timekeeping_adv_mode mode)
{ {
struct timekeeper *real_tk = &tk_core.timekeeper; struct timekeeper *real_tk = &tk_core.timekeeper;
struct timekeeper *tk = &shadow_timekeeper; struct timekeeper *tk = &shadow_timekeeper;
...@@ -2196,9 +2198,8 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode) ...@@ -2196,9 +2198,8 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode)
write_seqcount_end(&tk_core.seq); write_seqcount_end(&tk_core.seq);
out: out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags); raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
if (clock_set)
/* Have to call _delayed version, since in irq context*/ return !!clock_set;
clock_was_set_delayed();
} }
/** /**
...@@ -2207,7 +2208,8 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode) ...@@ -2207,7 +2208,8 @@ static void timekeeping_advance(enum timekeeping_adv_mode mode)
*/ */
void update_wall_time(void) void update_wall_time(void)
{ {
timekeeping_advance(TK_ADV_TICK); if (timekeeping_advance(TK_ADV_TICK))
clock_was_set_delayed();
} }
/** /**
...@@ -2387,8 +2389,9 @@ int do_adjtimex(struct __kernel_timex *txc) ...@@ -2387,8 +2389,9 @@ int do_adjtimex(struct __kernel_timex *txc)
{ {
struct timekeeper *tk = &tk_core.timekeeper; struct timekeeper *tk = &tk_core.timekeeper;
struct audit_ntp_data ad; struct audit_ntp_data ad;
unsigned long flags; bool clock_set = false;
struct timespec64 ts; struct timespec64 ts;
unsigned long flags;
s32 orig_tai, tai; s32 orig_tai, tai;
int ret; int ret;
...@@ -2423,6 +2426,7 @@ int do_adjtimex(struct __kernel_timex *txc) ...@@ -2423,6 +2426,7 @@ int do_adjtimex(struct __kernel_timex *txc)
if (tai != orig_tai) { if (tai != orig_tai) {
__timekeeping_set_tai_offset(tk, tai); __timekeeping_set_tai_offset(tk, tai);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
clock_set = true;
} }
tk_update_leap_state(tk); tk_update_leap_state(tk);
...@@ -2433,10 +2437,10 @@ int do_adjtimex(struct __kernel_timex *txc) ...@@ -2433,10 +2437,10 @@ int do_adjtimex(struct __kernel_timex *txc)
/* Update the multiplier immediately if frequency was set directly */ /* Update the multiplier immediately if frequency was set directly */
if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
timekeeping_advance(TK_ADV_FREQ); clock_set |= timekeeping_advance(TK_ADV_FREQ);
if (tai != orig_tai) if (clock_set)
clock_was_set(); clock_was_set(CLOCK_REALTIME);
ntp_notify_cmos_timer(); ntp_notify_cmos_timer();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment