Commit fe5ac724 authored by Paul E. McKenney's avatar Paul E. McKenney

rcu: Remove nohz_full full-system-idle state machine

The NO_HZ_FULL_SYSIDLE full-system-idle capability was added in 2013
by commit 0edd1b17 ("nohz_full: Add full-system-idle state machine"),
but has not been used.  This commit therefore removes it.

If it turns out to be needed later, this commit can always be reverted.
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f7a10a97
......@@ -2520,11 +2520,7 @@ It is similarly socially unacceptable to interrupt an
<tt>nohz_full</tt> CPU running in userspace.
RCU must therefore track <tt>nohz_full</tt> userspace
execution.
And in
<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
kernels, RCU must separately track idle CPUs on the one hand and
CPUs that are either idle or executing in userspace on the other.
In both cases, RCU must be able to sample state at two points in
RCU must therefore be able to sample state at two points in
time, and be able to determine whether or not some other CPU spent
any time idle and/or executing in userspace.
......
......@@ -854,15 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define kfree_rcu(ptr, rcu_head) \
__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
/* Only for use by adaptive-ticks code. */
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
bool rcu_sys_is_idle(void);
void rcu_sysidle_force_exit(void);
#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
static inline bool rcu_sys_is_idle(void) { return false; }
static inline void rcu_sysidle_force_exit(void) { }
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
/*
* Place this after a lock-acquisition primitive to guarantee that
......
......@@ -270,10 +270,6 @@ void rcu_bh_qs(void)
static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
.dynticks_idle = ATOMIC_INIT(1),
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
};
/*
......@@ -546,10 +542,7 @@ module_param(jiffies_till_sched_qs, ulong, 0644);
static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp,
int (*f)(struct rcu_data *rsp, bool *isidle,
unsigned long *maxj),
bool *isidle, unsigned long *maxj);
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(void);
......@@ -854,7 +847,6 @@ void rcu_idle_enter(void)
local_irq_save(flags);
rcu_eqs_enter(false);
rcu_sysidle_enter(0);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
......@@ -904,7 +896,6 @@ void rcu_irq_exit(void)
trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
rdtp->dynticks_nesting--;
}
rcu_sysidle_enter(1);
}
/*
......@@ -986,7 +977,6 @@ void rcu_idle_exit(void)
local_irq_save(flags);
rcu_eqs_exit(false);
rcu_sysidle_exit(0);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);
......@@ -1038,7 +1028,6 @@ void rcu_irq_enter(void)
trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
else
rcu_eqs_exit_common(oldval, true);
rcu_sysidle_exit(1);
}
/*
......@@ -1217,11 +1206,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
* credit them with an implicit quiescent state. Return 1 if this CPU
* is in dynticks idle mode, which is an extended quiescent state.
*/
static int dyntick_save_progress_counter(struct rcu_data *rdp,
bool *isidle, unsigned long *maxj)
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
rcu_sysidle_check_cpu(rdp, isidle, maxj);
if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
......@@ -1238,8 +1225,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
* idle state since the last call to dyntick_save_progress_counter()
* for this same CPU, or by virtue of having been offline.
*/
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
bool *isidle, unsigned long *maxj)
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{
unsigned long jtsq;
bool *rnhqp;
......@@ -2105,25 +2091,16 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
*/
static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
{
bool isidle = false;
unsigned long maxj;
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
rsp->n_force_qs++;
if (first_time) {
/* Collect dyntick-idle snapshots. */
if (is_sysidle_rcu_state(rsp)) {
isidle = true;
maxj = jiffies - ULONG_MAX / 4;
}
force_qs_rnp(rsp, dyntick_save_progress_counter,
&isidle, &maxj);
rcu_sysidle_report_gp(rsp, isidle, maxj);
force_qs_rnp(rsp, dyntick_save_progress_counter);
} else {
/* Handle dyntick-idle and offline CPUs. */
isidle = true;
force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
}
/* Clear flag to prevent immediate re-entry. */
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
......@@ -2895,10 +2872,7 @@ void rcu_check_callbacks(int user)
*
* The caller must have suppressed start of new grace periods.
*/
static void force_qs_rnp(struct rcu_state *rsp,
int (*f)(struct rcu_data *rsp, bool *isidle,
unsigned long *maxj),
bool *isidle, unsigned long *maxj)
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
{
int cpu;
unsigned long flags;
......@@ -2937,7 +2911,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
for_each_leaf_node_possible_cpu(rnp, cpu) {
unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
if ((rnp->qsmask & bit) != 0) {
if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
if (f(per_cpu_ptr(rsp->rda, cpu)))
mask |= bit;
}
}
......@@ -3793,7 +3767,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
!init_nocb_callback_list(rdp))
rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
rcu_sysidle_init_percpu_data(rdp->dynticks);
rcu_dynticks_eqs_online();
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
......
......@@ -45,14 +45,6 @@ struct rcu_dynticks {
bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */
unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */
bool rcu_urgent_qs; /* GP old need light quiescent state. */
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
long long dynticks_idle_nesting;
/* irq/process nesting level from idle. */
atomic_t dynticks_idle; /* Even value for idle, else odd. */
/* "Idle" excludes userspace execution. */
unsigned long dynticks_idle_jiffies;
/* End of last non-NMI non-idle period. */
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
#ifdef CONFIG_RCU_FAST_NO_HZ
bool all_lazy; /* Are all CPU's CBs lazy? */
unsigned long nonlazy_posted;
......@@ -529,15 +521,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
static bool init_nocb_callback_list(struct rcu_data *rdp);
static void rcu_sysidle_enter(int irq);
static void rcu_sysidle_exit(int irq);
static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
unsigned long *maxj);
static bool is_sysidle_rcu_state(struct rcu_state *rsp);
static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
unsigned long maxj);
static void rcu_bind_gp_kthread(void);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
static void rcu_dynticks_task_enter(void);
static void rcu_dynticks_task_exit(void);
......
This diff is collapsed.
......@@ -126,56 +126,6 @@ config NO_HZ_FULL_ALL
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
config NO_HZ_FULL_SYSIDLE
bool "Detect full-system idle state for full dynticks system"
depends on NO_HZ_FULL
default n
help
At least one CPU must keep the scheduling-clock tick running for
timekeeping purposes whenever there is a non-idle CPU, where
"non-idle" also includes dynticks CPUs as long as they are
running non-idle tasks. Because the underlying adaptive-tick
support cannot distinguish between all CPUs being idle and
all CPUs each running a single task in dynticks mode, the
underlying support simply ensures that there is always a CPU
handling the scheduling-clock tick, whether or not all CPUs
are idle. This Kconfig option enables scalable detection of
the all-CPUs-idle state, thus allowing the scheduling-clock
tick to be disabled when all CPUs are idle. Note that scalable
detection of the all-CPUs-idle state means that larger systems
will be slower to declare the all-CPUs-idle state.
Say Y if you would like to help debug all-CPUs-idle detection.
Say N if you are unsure.
config NO_HZ_FULL_SYSIDLE_SMALL
int "Number of CPUs above which large-system approach is used"
depends on NO_HZ_FULL_SYSIDLE
range 1 NR_CPUS
default 8
help
The full-system idle detection mechanism takes a lazy approach
on large systems, as is required to attain decent scalability.
However, on smaller systems, scalability is not anywhere near as
large a concern as is energy efficiency. The sysidle subsystem
therefore uses a fast but non-scalable algorithm for small
systems and a lazier but scalable algorithm for large systems.
This Kconfig parameter defines the number of CPUs in the largest
system that will be considered to be "small".
The default value will be fine in most cases. Battery-powered
systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
numbers of CPUs, and (3) are suffering from battery-lifetime
problems due to long sysidle latencies might wish to experiment
with larger values for this Kconfig parameter. On the other
hand, they might be even better served by disabling NO_HZ_FULL
entirely, given that NO_HZ_FULL is intended for HPC and
real-time workloads that at present do not tend to be run on
battery-powered systems.
Take the default if you are unsure.
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
......
......@@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ_FULL_ALL=n
CONFIG_NO_HZ_FULL_SYSIDLE=y
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
......
......@@ -18,7 +18,6 @@ CONFIG_PROVE_RCU
In common code tested by TREE_RCU test cases.
CONFIG_NO_HZ_FULL_SYSIDLE
CONFIG_RCU_NOCB_CPU
Meaningless for TINY_RCU.
......
......@@ -9,8 +9,7 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
CONFIG_HOTPLUG_CPU -- Do half. (Every second.)
CONFIG_HZ_PERIODIC -- Do one.
CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
CONFIG_PREEMPT -- Do half. (First three and #8.)
CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
......@@ -48,10 +47,6 @@ CONFIG_64BIT
Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
CONFIG_NO_HZ_FULL_SYSIDLE_SMALL
Defer until Frederic uses this.
CONFIG_PREEMPT_COUNT
CONFIG_PREEMPT_RCU
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment