Commit 8747a291 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnar:
 "The main RCU subsystem changes in this cycle were:

  - Miscellaneous fixes, perhaps most notably removing obsolete code
    whose only purpose in life was to gather information for the
    now-removed RCU debugfs facility. Other notable changes include
    removing NO_HZ_FULL_ALL in favor of the nohz_full kernel boot
    parameter, minor optimizations for expedited grace periods, some
    added tracing, creating an RCU-specific workqueue using Tejun's new
    WQ_MEM_RECLAIM flag, and several cleanups to code and comments.

  - SRCU cleanups and optimizations.

  - Torture-test updates, perhaps most notably the adding of ARMv8
    support, but also including numerous cleanups and usability fixes"

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  rcu: Create RCU-specific workqueues with rescuers
  torture: Provide more sensible nreader/nwriter defaults for rcuperf
  torture: Grace periods do not piggyback off of themselves
  torture: Adjust rcuperf trace processing to allow for workqueues
  torture: Default jitter off when running rcuperf
  torture: Specify qemu memory size with --memory argument
  rcutorture: Add basic ARM64 support to run scripts
  rcutorture: Update kvm.sh header comment
  rcutorture: Record which grace-period primitives are tested
  rcutorture: Re-enable testing of dynamic expediting
  rcutorture: Avoid fake-writer use of undefined primitives
  rcutorture: Abstract function and module names
  rcutorture: Replace multi-instance kzalloc() with kcalloc()
  rcu: Remove SRCU throttling
  srcu: Remove dead code in srcu_gp_end()
  srcu: Reduce scans of srcu_data in counter wrap check
  srcu: Prevent sdp->srcu_gp_seq_needed_exp counter wrap
  srcu: Abstract function name
  rcu: Make expedited RCU CPU selection avoid unnecessary stores
  rcu: Trace expedited GP delays due to transitioning CPUs
  ...
parents cc67ccec c4fb5f37
......@@ -131,13 +131,6 @@ error message, and the boot CPU will be removed from the mask. Note that
this means that your system must have at least two CPUs in order for
CONFIG_NO_HZ_FULL=y to do anything for you.
Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies
that all CPUs other than the boot CPU are adaptive-ticks CPUs. This
Kconfig parameter will be overridden by the "nohz_full=" boot parameter,
so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and
the "nohz_full=1" boot parameter is specified, the boot parameter will
prevail so that only CPU 1 will be an adaptive-ticks CPU.
Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
This is covered in the "RCU IMPLICATIONS" section below.
......
......@@ -214,10 +214,12 @@ do { \
#endif
/*
* init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
* initialization and destruction of rcu_head on the stack. rcu_head structures
* allocated dynamically in the heap or defined statically don't need any
* initialization.
* The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls
* are needed for dynamic initialization and destruction of rcu_head
* on the stack, and init_rcu_head()/destroy_rcu_head() are needed for
* dynamic initialization and destruction of statically allocated rcu_head
* structures. However, rcu_head structures allocated dynamically in the
* heap don't need any initialization.
*/
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
void init_rcu_head(struct rcu_head *head);
......
......@@ -217,7 +217,7 @@ struct ustat {
*
* This guarantee is important for few reasons:
* - future call_rcu_lazy() will make use of lower bits in the pointer;
* - the structure shares storage spacer in struct page with @compound_head,
* - the structure shares storage space in struct page with @compound_head,
* which encode PageTail() in bit 0. The guarantee is needed to avoid
* false-positive PageTail().
*/
......
......@@ -179,6 +179,10 @@ TRACE_EVENT(rcu_grace_period_init,
*
* "snap": Captured snapshot of expedited grace period sequence number.
* "start": Started a real expedited grace period.
* "reset": Started resetting the tree
* "select": Started selecting the CPUs to wait on.
* "selectofl": Selected CPU partially offline.
* "startwait": Started waiting on selected CPUs.
* "end": Ended a real expedited grace period.
* "endwake": Woke piggybackers up.
* "done": Someone else did the expedited grace period for us.
......
......@@ -77,12 +77,18 @@ static inline void rcu_seq_start(unsigned long *sp)
WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
}
/* Compute the end-of-grace-period value for the specified sequence number. */
static inline unsigned long rcu_seq_endval(unsigned long *sp)
{
return (*sp | RCU_SEQ_STATE_MASK) + 1;
}
/* Adjust sequence number for end of update-side operation. */
static inline void rcu_seq_end(unsigned long *sp)
{
smp_mb(); /* Ensure update-side operation before counter increment. */
WARN_ON_ONCE(!rcu_seq_state(*sp));
WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
WRITE_ONCE(*sp, rcu_seq_endval(sp));
}
/* Take a snapshot of the update side's sequence number. */
......@@ -295,9 +301,19 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* Iterate over all possible CPUs in a leaf RCU node.
*/
#define for_each_leaf_node_possible_cpu(rnp, cpu) \
for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
cpu <= rnp->grphi; \
cpu = cpumask_next((cpu), cpu_possible_mask))
for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
(cpu) <= rnp->grphi; \
(cpu) = cpumask_next((cpu), cpu_possible_mask))
/*
* Iterate over all CPUs in a leaf RCU node's specified mask.
*/
#define rcu_find_next_bit(rnp, cpu, mask) \
((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
(cpu) <= rnp->grphi; \
(cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
/*
* Wrappers for the rcu_node::lock acquire and release.
......@@ -337,7 +353,7 @@ do { \
} while (0)
#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \
raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
#define raw_spin_trylock_rcu_node(p) \
({ \
......@@ -348,6 +364,9 @@ do { \
___locked; \
})
#define raw_lockdep_assert_held_rcu_node(p) \
lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
#ifdef CONFIG_TINY_RCU
......@@ -356,24 +375,20 @@ static inline bool rcu_gp_is_normal(void) { return true; }
static inline bool rcu_gp_is_expedited(void) { return false; }
static inline void rcu_expedite_gp(void) { }
static inline void rcu_unexpedite_gp(void) { }
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
void rcupdate_announce_bootup_oddness(void);
void rcu_request_urgent_qs_task(struct task_struct *t);
#endif /* #else #ifdef CONFIG_TINY_RCU */
#define RCU_SCHEDULER_INACTIVE 0
#define RCU_SCHEDULER_INIT 1
#define RCU_SCHEDULER_RUNNING 2
#ifdef CONFIG_TINY_RCU
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
#else /* #ifdef CONFIG_TINY_RCU */
void rcu_request_urgent_qs_task(struct task_struct *t);
#endif /* #else #ifdef CONFIG_TINY_RCU */
enum rcutorture_type {
RCU_FLAVOR,
RCU_BH_FLAVOR,
......@@ -470,6 +485,7 @@ void show_rcu_gp_kthreads(void);
void rcu_force_quiescent_state(void);
void rcu_bh_force_quiescent_state(void);
void rcu_sched_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
#endif /* #else #ifdef CONFIG_TINY_RCU */
#ifdef CONFIG_RCU_NOCB_CPU
......
......@@ -61,11 +61,30 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
#define VERBOSE_PERFOUT_ERRSTRING(s) \
do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
/*
* The intended use cases for the nreaders and nwriters module parameters
* are as follows:
*
* 1. Specify only the nr_cpus kernel boot parameter. This will
* set both nreaders and nwriters to the value specified by
* nr_cpus for a mixed reader/writer test.
*
* 2. Specify the nr_cpus kernel boot parameter, but set
* rcuperf.nreaders to zero. This will set nwriters to the
* value specified by nr_cpus for an update-only test.
*
* 3. Specify the nr_cpus kernel boot parameter, but set
* rcuperf.nwriters to zero. This will set nreaders to the
* value specified by nr_cpus for a read-only test.
*
* Various other use cases may of course be specified.
*/
torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
torture_param(int, nreaders, 0, "Number of RCU reader threads");
torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, nwriters, -1, "Number of RCU updater threads");
torture_param(bool, shutdown, !IS_ENABLED(MODULE),
"Shutdown at end of performance tests.");
......
......@@ -909,34 +909,38 @@ rcu_torture_writer(void *arg)
int nsynctypes = 0;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite) {
if (!can_expedite)
pr_alert("%s" TORTURE_FLAG
" GP expediting controlled from boot/sysfs for %s,\n",
" GP expediting controlled from boot/sysfs for %s.\n",
torture_type, cur_ops->name);
pr_alert("%s" TORTURE_FLAG
" Disabled dynamic grace-period expediting.\n",
torture_type);
}
/* Initialize synctype[] array. If none set, take default. */
if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) {
synctype[nsynctypes++] = RTWS_COND_GET;
else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
if (gp_exp1 && cur_ops->exp_sync)
pr_info("%s: Testing conditional GPs.\n", __func__);
} else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) {
pr_alert("%s: gp_cond without primitives.\n", __func__);
}
if (gp_exp1 && cur_ops->exp_sync) {
synctype[nsynctypes++] = RTWS_EXP_SYNC;
else if (gp_exp && !cur_ops->exp_sync)
pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
if (gp_normal1 && cur_ops->deferred_free)
pr_info("%s: Testing expedited GPs.\n", __func__);
} else if (gp_exp && !cur_ops->exp_sync) {
pr_alert("%s: gp_exp without primitives.\n", __func__);
}
if (gp_normal1 && cur_ops->deferred_free) {
synctype[nsynctypes++] = RTWS_DEF_FREE;
else if (gp_normal && !cur_ops->deferred_free)
pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
if (gp_sync1 && cur_ops->sync)
pr_info("%s: Testing asynchronous GPs.\n", __func__);
} else if (gp_normal && !cur_ops->deferred_free) {
pr_alert("%s: gp_normal without primitives.\n", __func__);
}
if (gp_sync1 && cur_ops->sync) {
synctype[nsynctypes++] = RTWS_SYNC;
else if (gp_sync && !cur_ops->sync)
pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
pr_info("%s: Testing normal GPs.\n", __func__);
} else if (gp_sync && !cur_ops->sync) {
pr_alert("%s: gp_sync without primitives.\n", __func__);
}
if (WARN_ONCE(nsynctypes == 0,
"rcu_torture_writer: No update-side primitives.\n")) {
/*
......@@ -1011,6 +1015,9 @@ rcu_torture_writer(void *arg)
rcu_unexpedite_gp();
if (++expediting > 3)
expediting = -expediting;
} else if (!can_expedite) { /* Disabled during boot, recheck. */
can_expedite = !rcu_gp_is_expedited() &&
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
stutter_wait("rcu_torture_writer");
......@@ -1021,6 +1028,10 @@ rcu_torture_writer(void *arg)
while (can_expedite && expediting++ < 0)
rcu_unexpedite_gp();
WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
if (!can_expedite)
pr_alert("%s" TORTURE_FLAG
" Dynamic grace-period expediting was disabled.\n",
torture_type);
rcu_torture_writer_state = RTWS_STOPPING;
torture_kthread_stopping("rcu_torture_writer");
return 0;
......@@ -1045,13 +1056,13 @@ rcu_torture_fakewriter(void *arg)
torture_random(&rand) % (nfakewriters * 8) == 0) {
cur_ops->cb_barrier();
} else if (gp_normal == gp_exp) {
if (torture_random(&rand) & 0x80)
if (cur_ops->sync && torture_random(&rand) & 0x80)
cur_ops->sync();
else
else if (cur_ops->exp_sync)
cur_ops->exp_sync();
} else if (gp_normal) {
} else if (gp_normal && cur_ops->sync) {
cur_ops->sync();
} else {
} else if (cur_ops->exp_sync) {
cur_ops->exp_sync();
}
stutter_wait("rcu_torture_fakewriter");
......@@ -1557,11 +1568,10 @@ static int rcu_torture_barrier_init(void)
atomic_set(&barrier_cbs_count, 0);
atomic_set(&barrier_cbs_invoked, 0);
barrier_cbs_tasks =
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
kcalloc(n_barrier_cbs, sizeof(barrier_cbs_tasks[0]),
GFP_KERNEL);
barrier_cbs_wq =
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
GFP_KERNEL);
kcalloc(n_barrier_cbs, sizeof(barrier_cbs_wq[0]), GFP_KERNEL);
if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
return -ENOMEM;
for (i = 0; i < n_barrier_cbs; i++) {
......@@ -1674,7 +1684,7 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
* next grace period. Unlikely, but can happen. If it
* does happen, the debug-objects subsystem won't have splatted.
*/
pr_alert("rcutorture: duplicated callback was invoked.\n");
pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
}
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
......@@ -1691,7 +1701,7 @@ static void rcu_test_debug_objects(void)
init_rcu_head_on_stack(&rh1);
init_rcu_head_on_stack(&rh2);
pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n");
pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME);
/* Try to queue the rh2 pair of callbacks for the same grace period. */
preempt_disable(); /* Prevent preemption from interrupting test. */
......@@ -1706,11 +1716,11 @@ static void rcu_test_debug_objects(void)
/* Wait for them all to get done so we can safely return. */
rcu_barrier();
pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n");
pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME);
destroy_rcu_head_on_stack(&rh1);
destroy_rcu_head_on_stack(&rh2);
#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n");
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
}
......@@ -1799,7 +1809,7 @@ rcu_torture_init(void)
if (firsterr)
goto unwind;
if (nfakewriters > 0) {
fakewriter_tasks = kzalloc(nfakewriters *
fakewriter_tasks = kcalloc(nfakewriters,
sizeof(fakewriter_tasks[0]),
GFP_KERNEL);
if (fakewriter_tasks == NULL) {
......@@ -1814,7 +1824,7 @@ rcu_torture_init(void)
if (firsterr)
goto unwind;
}
reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("out of memory");
......
......@@ -386,7 +386,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
WARN_ON(srcu_readers_active(sp))) {
pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
return; /* Caller forgot to stop doing call_srcu()? */
}
free_percpu(sp->sda);
......@@ -439,7 +439,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
struct srcu_data *sdp = this_cpu_ptr(sp->sda);
int state;
lockdep_assert_held(&sp->lock);
lockdep_assert_held(&ACCESS_PRIVATE(sp, lock));
WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&sp->srcu_gp_seq));
......@@ -492,8 +492,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
*/
static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
{
srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
&sdp->work, delay);
srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
}
/*
......@@ -527,11 +526,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
{
unsigned long cbdelay;
bool cbs;
bool last_lvl;
int cpu;
unsigned long flags;
unsigned long gpseq;
int idx;
int idxnext;
unsigned long mask;
struct srcu_data *sdp;
struct srcu_node *snp;
......@@ -555,11 +554,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
/* Initiate callback invocation as needed. */
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
rcu_for_each_node_breadth_first(sp, snp) {
spin_lock_irq_rcu_node(snp);
cbs = false;
if (snp >= sp->level[rcu_num_lvls - 1])
last_lvl = snp >= sp->level[rcu_num_lvls - 1];
if (last_lvl)
cbs = snp->srcu_have_cbs[idx] == gpseq;
snp->srcu_have_cbs[idx] = gpseq;
rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
......@@ -572,13 +571,16 @@ static void srcu_gp_end(struct srcu_struct *sp)
srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
/* Occasionally prevent srcu_data counter wrap. */
if (!(gpseq & counter_wrap_check))
if (!(gpseq & counter_wrap_check) && last_lvl)
for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
sdp = per_cpu_ptr(sp->sda, cpu);
spin_lock_irqsave_rcu_node(sdp, flags);
if (ULONG_CMP_GE(gpseq,
sdp->srcu_gp_seq_needed + 100))
sdp->srcu_gp_seq_needed = gpseq;
if (ULONG_CMP_GE(gpseq,
sdp->srcu_gp_seq_needed_exp + 100))
sdp->srcu_gp_seq_needed_exp = gpseq;
spin_unlock_irqrestore_rcu_node(sdp, flags);
}
}
......@@ -593,9 +595,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
srcu_gp_start(sp);
spin_unlock_irq_rcu_node(sp);
/* Throttle expedited grace periods: Should be rare! */
srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
? 0 : SRCU_INTERVAL);
srcu_reschedule(sp, 0);
} else {
spin_unlock_irq_rcu_node(sp);
}
......@@ -626,7 +626,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
spin_unlock_irqrestore_rcu_node(snp, flags);
}
spin_lock_irqsave_rcu_node(sp, flags);
if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
sp->srcu_gp_seq_needed_exp = s;
spin_unlock_irqrestore_rcu_node(sp, flags);
}
......@@ -691,8 +691,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
srcu_gp_start(sp);
queue_delayed_work(system_power_efficient_wq, &sp->work,
srcu_get_delay(sp));
queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
}
spin_unlock_irqrestore_rcu_node(sp, flags);
}
......@@ -1225,7 +1224,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
spin_unlock_irq_rcu_node(sp);
if (pushgp)
queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
queue_delayed_work(rcu_gp_wq, &sp->work, delay);
}
/*
......
......@@ -1161,7 +1161,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
*/
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
{
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
WRITE_ONCE(rdp->gpwrap, true);
if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
......@@ -1350,6 +1350,7 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
if (rsp->gp_kthread) {
pr_err("RCU grace-period kthread stack dump:\n");
sched_show_task(rsp->gp_kthread);
wake_up_process(rsp->gp_kthread);
}
......@@ -1628,7 +1629,7 @@ void rcu_cpu_stall_reset(void)
static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
struct rcu_node *rnp)
{
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
/*
* If RCU is idle, we just wait for the next grace period.
......@@ -1675,7 +1676,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
bool ret = false;
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
/*
* Pick up grace-period number for new callbacks. If this
......@@ -1803,7 +1804,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
{
bool ret = false;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
if (!rcu_segcblist_pend_cbs(&rdp->cblist))
......@@ -1843,7 +1844,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
if (!rcu_segcblist_pend_cbs(&rdp->cblist))
......@@ -1871,7 +1872,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
bool ret;
bool need_gp;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
/* Handle the ends of any preceding grace periods first. */
if (rdp->completed == rnp->completed &&
......@@ -2296,7 +2297,7 @@ static bool
rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
/*
* Either we have not yet spawned the grace-period
......@@ -2358,7 +2359,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
{
lockdep_assert_held(&rcu_get_root(rsp)->lock);
raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp));
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
......@@ -2383,7 +2384,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
unsigned long oldmask = 0;
struct rcu_node *rnp_c;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
/* Walk up the rcu_node hierarchy. */
for (;;) {
......@@ -2447,7 +2448,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
unsigned long mask;
struct rcu_node *rnp_p;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
......@@ -2592,7 +2593,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
long mask;
struct rcu_node *rnp = rnp_leaf;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
return;
......@@ -2691,7 +2692,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
/* Update counts and requeue any remaining callbacks. */
rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
smp_mb(); /* List handling before counting for rcu_barrier(). */
rdp->n_cbs_invoked += count;
rcu_segcblist_insert_count(&rdp->cblist, &rcl);
/* Reinstate batch limit if we have worked down the excess. */
......@@ -2845,10 +2845,8 @@ static void force_quiescent_state(struct rcu_state *rsp)
!raw_spin_trylock(&rnp->fqslock);
if (rnp_old != NULL)
raw_spin_unlock(&rnp_old->fqslock);
if (ret) {
rsp->n_force_qs_lh++;
if (ret)
return;
}
rnp_old = rnp;
}
/* rnp_old == rcu_get_root(rsp), rnp == NULL. */
......@@ -2857,7 +2855,6 @@ static void force_quiescent_state(struct rcu_state *rsp)
raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
raw_spin_unlock(&rnp_old->fqslock);
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
rsp->n_force_qs_lh++;
raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
return; /* Someone beat us to it. */
}
......@@ -3355,8 +3352,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
{
struct rcu_node *rnp = rdp->mynode;
rdp->n_rcu_pending++;
/* Check for CPU stalls, if enabled. */
check_cpu_stall(rsp, rdp);
......@@ -3365,48 +3360,31 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
return 0;
/* Is the RCU core waiting for a quiescent state from this CPU? */
if (rcu_scheduler_fully_active &&
rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
rdp->n_rp_core_needs_qs++;
} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
rdp->n_rp_report_qs++;
if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
return 1;
}
/* Does this CPU have callbacks ready to invoke? */
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
rdp->n_rp_cb_ready++;
if (rcu_segcblist_ready_cbs(&rdp->cblist))
return 1;
}
/* Has RCU gone idle with this CPU needing another grace period? */
if (cpu_needs_another_gp(rsp, rdp)) {
rdp->n_rp_cpu_needs_gp++;
if (cpu_needs_another_gp(rsp, rdp))
return 1;
}
/* Has another RCU grace period completed? */
if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
rdp->n_rp_gp_completed++;
if (READ_ONCE(rnp->completed) != rdp->completed) /* outside lock */
return 1;
}
/* Has a new RCU grace period started? */
if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */
rdp->n_rp_gp_started++;
unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */
return 1;
}
/* Does this CPU need a deferred NOCB wakeup? */
if (rcu_nocb_need_deferred_wakeup(rdp)) {
rdp->n_rp_nocb_defer_wakeup++;
if (rcu_nocb_need_deferred_wakeup(rdp))
return 1;
}
/* nothing to do */
rdp->n_rp_need_nothing++;
return 0;
}
......@@ -3618,7 +3596,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
long mask;
struct rcu_node *rnp = rnp_leaf;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
for (;;) {
mask = rnp->grpmask;
rnp = rnp->parent;
......@@ -3636,12 +3614,9 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
static void __init
rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
{
unsigned long flags;
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
......@@ -3649,7 +3624,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->cpu = cpu;
rdp->rsp = rsp;
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/*
......@@ -4193,6 +4167,8 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
pr_cont("\n");
}
struct workqueue_struct *rcu_gp_wq;
void __init rcu_init(void)
{
int cpu;
......@@ -4219,6 +4195,10 @@ void __init rcu_init(void)
rcu_cpu_starting(cpu);
rcutree_online_cpu(cpu);
}
/* Create workqueue for expedited GPs and for Tree SRCU. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
}
#include "tree_exp.h"
......
......@@ -146,12 +146,6 @@ struct rcu_node {
/* boosting for this rcu_node structure. */
unsigned int boost_kthread_status;
/* State of boost_kthread_task for tracing. */
unsigned long n_tasks_boosted;
/* Total number of tasks boosted. */
unsigned long n_exp_boosts;
/* Number of tasks boosted for expedited GP. */
unsigned long n_normal_boosts;
/* Number of tasks boosted for normal GP. */
#ifdef CONFIG_RCU_NOCB_CPU
struct swait_queue_head nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
......@@ -184,13 +178,6 @@ union rcu_noqs {
u16 s; /* Set of bits, aggregate OR here. */
};
/* Index values for nxttail array in struct rcu_data. */
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
#define RCU_NEXT_TAIL 3
#define RCU_NEXT_SIZE 4
/* Per-CPU data for read-copy update. */
struct rcu_data {
/* 1) quiescent-state and grace-period handling : */
......@@ -217,8 +204,6 @@ struct rcu_data {
/* different grace periods. */
long qlen_last_fqs_check;
/* qlen at last check for QS forcing */
unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
unsigned long n_force_qs_snap;
/* did other CPU force QS recently? */
long blimit; /* Upper limit on a processed batch */
......@@ -234,18 +219,7 @@ struct rcu_data {
/* Grace period that needs help */
/* from cond_resched(). */
/* 5) __rcu_pending() statistics. */
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
unsigned long n_rp_core_needs_qs;
unsigned long n_rp_report_qs;
unsigned long n_rp_cb_ready;
unsigned long n_rp_cpu_needs_gp;
unsigned long n_rp_gp_completed;
unsigned long n_rp_gp_started;
unsigned long n_rp_nocb_defer_wakeup;
unsigned long n_rp_need_nothing;
/* 6) _rcu_barrier(), OOM callbacks, and expediting. */
/* 5) _rcu_barrier(), OOM callbacks, and expediting. */
struct rcu_head barrier_head;
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
......@@ -256,7 +230,7 @@ struct rcu_data {
atomic_long_t exp_workdone3; /* # done by others #3. */
int exp_dynticks_snap; /* Double-check need for IPI. */
/* 7) Callback offloading. */
/* 6) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
struct rcu_head *nocb_head; /* CBs waiting for kthread. */
struct rcu_head **nocb_tail;
......@@ -283,7 +257,7 @@ struct rcu_data {
/* Leader CPU takes GP-end wakeups. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* 8) RCU CPU stall data. */
/* 7) RCU CPU stall data. */
unsigned int softirq_snap; /* Snapshot of softirq activity. */
/* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
struct irq_work rcu_iw; /* Check for non-irq activity. */
......@@ -374,10 +348,6 @@ struct rcu_state {
/* kthreads, if configured. */
unsigned long n_force_qs; /* Number of calls to */
/* force_quiescent_state(). */
unsigned long n_force_qs_lh; /* ~Number of calls leaving */
/* due to lock unavailable. */
unsigned long n_force_qs_ngp; /* Number of calls leaving */
/* due to no GP active. */
unsigned long gp_start; /* Time at which GP started, */
/* but in jiffies. */
unsigned long gp_activity; /* Time of last GP kthread */
......
......@@ -28,6 +28,15 @@ static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
rcu_seq_start(&rsp->expedited_sequence);
}
/*
* Return then value that expedited-grace-period counter will have
* at the end of the current grace period.
*/
static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
{
return rcu_seq_endval(&rsp->expedited_sequence);
}
/*
* Record the end of an expedited grace period.
*/
......@@ -366,21 +375,30 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
int ret;
struct rcu_node *rnp;
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
sync_exp_reset_tree(rsp);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
for_each_leaf_node_possible_cpu(rnp, cpu) {
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
int snap;
rdp->exp_dynticks_snap =
rcu_dynticks_snap(rdp->dynticks);
if (raw_smp_processor_id() == cpu ||
rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
!(rnp->qsmaskinitnext & rdp->grpmask))
mask_ofl_test |= rdp->grpmask;
!(rnp->qsmaskinitnext & mask)) {
mask_ofl_test |= mask;
} else {
snap = rcu_dynticks_snap(rdtp);
if (rcu_dynticks_in_eqs(snap))
mask_ofl_test |= mask;
else
rdp->exp_dynticks_snap = snap;
}
}
mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
......@@ -394,7 +412,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* IPI the remaining CPUs for expedited quiescent state. */
for_each_leaf_node_possible_cpu(rnp, cpu) {
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
......@@ -417,6 +435,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
(rnp->expmask & mask)) {
/* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
schedule_timeout_uninterruptible(1);
goto retry_ipi;
}
......@@ -443,6 +462,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
struct rcu_node *rnp_root = rcu_get_root(rsp);
int ret;
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
jiffies_stall = rcu_jiffies_till_stall_check();
jiffies_start = jiffies;
......@@ -606,7 +626,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
rew.rew_rsp = rsp;
rew.rew_s = s;
INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
schedule_work(&rew.rew_work);
queue_work(rcu_gp_wq, &rew.rew_work);
}
/* Wait for expedited grace period to complete. */
......
......@@ -180,7 +180,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
(rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
struct task_struct *t = current;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
WARN_ON_ONCE(rdp->mynode != rnp);
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
......@@ -560,8 +560,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
}
t = list_entry(rnp->gp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
/*
* We could be printing a lot while holding a spinlock.
* Avoid triggering hard lockup.
*/
touch_nmi_watchdog();
sched_show_task(t);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
......@@ -957,14 +963,10 @@ static int rcu_boost(struct rcu_node *rnp)
* expedited grace period must boost all blocked tasks, including
* those blocking the pre-existing normal grace period.
*/
if (rnp->exp_tasks != NULL) {
if (rnp->exp_tasks != NULL)
tb = rnp->exp_tasks;
rnp->n_exp_boosts++;
} else {
else
tb = rnp->boost_tasks;
rnp->n_normal_boosts++;
}
rnp->n_tasks_boosted++;
/*
* We boost task t by manufacturing an rt_mutex that appears to
......@@ -1042,7 +1044,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
{
struct task_struct *t;
lockdep_assert_held(&rnp->lock);
raw_lockdep_assert_held_rcu_node(rnp);
if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
......@@ -1677,6 +1679,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
char *ticks_title;
unsigned long ticks_value;
/*
* We could be printing a lot while holding a spinlock. Avoid
* triggering hard lockup.
*/
touch_nmi_watchdog();
if (rsp->gpnum == rdp->gpnum) {
ticks_title = "ticks this GP";
ticks_value = rdp->ticks_this_gp;
......@@ -2235,7 +2243,6 @@ static int rcu_nocb_kthread(void *arg)
smp_mb__before_atomic(); /* _add after CB invocation. */
atomic_long_add(-c, &rdp->nocb_q_count);
atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
rdp->n_nocbs_invoked += c;
}
return 0;
}
......@@ -2312,8 +2319,11 @@ void __init rcu_init_nohz(void)
cpumask_and(rcu_nocb_mask, cpu_possible_mask,
rcu_nocb_mask);
}
pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
cpumask_pr_args(rcu_nocb_mask));
if (cpumask_empty(rcu_nocb_mask))
pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
else
pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
cpumask_pr_args(rcu_nocb_mask));
if (rcu_nocb_poll)
pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
......
......@@ -113,16 +113,6 @@ config NO_HZ_FULL
endchoice
config NO_HZ_FULL_ALL
bool "Full dynticks system on all CPUs by default (except CPU 0)"
depends on NO_HZ_FULL
help
If the user doesn't pass the nohz_full boot option to
define the range of full dynticks CPUs, consider that all
CPUs in the system are full dynticks by default.
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
......
......@@ -405,30 +405,12 @@ static int tick_nohz_cpu_down(unsigned int cpu)
return 0;
}
static int tick_nohz_init_all(void)
{
int err = -1;
#ifdef CONFIG_NO_HZ_FULL_ALL
if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
WARN(1, "NO_HZ: Can't allocate full dynticks cpumask\n");
return err;
}
err = 0;
cpumask_setall(tick_nohz_full_mask);
tick_nohz_full_running = true;
#endif
return err;
}
void __init tick_nohz_init(void)
{
int cpu, ret;
if (!tick_nohz_full_running) {
if (tick_nohz_init_all() < 0)
return;
}
if (!tick_nohz_full_running)
return;
/*
* Full dynticks uses irq work to drive the tick rescheduling on safe
......
......@@ -136,6 +136,9 @@ identify_boot_image () {
qemu-system-x86_64|qemu-system-i386)
echo arch/x86/boot/bzImage
;;
qemu-system-aarch64)
echo arch/arm64/boot/Image
;;
*)
echo vmlinux
;;
......@@ -158,6 +161,9 @@ identify_qemu () {
elif echo $u | grep -q "Intel 80386"
then
echo qemu-system-i386
elif echo $u | grep -q aarch64
then
echo qemu-system-aarch64
elif uname -a | grep -q ppc64
then
echo qemu-system-ppc64
......@@ -176,16 +182,20 @@ identify_qemu () {
# Output arguments for the qemu "-append" string based on CPU type
# and the TORTURE_QEMU_INTERACTIVE environment variable.
identify_qemu_append () {
local console=ttyS0
case "$1" in
qemu-system-x86_64|qemu-system-i386)
echo noapic selinux=0 initcall_debug debug
;;
qemu-system-aarch64)
console=ttyAMA0
;;
esac
if test -n "$TORTURE_QEMU_INTERACTIVE"
then
echo root=/dev/sda
else
echo console=ttyS0
echo console=$console
fi
}
......@@ -197,6 +207,9 @@ identify_qemu_args () {
case "$1" in
qemu-system-x86_64|qemu-system-i386)
;;
qemu-system-aarch64)
echo -machine virt,gic-version=host -cpu host
;;
qemu-system-ppc64)
echo -enable-kvm -M pseries -nodefaults
echo -device spapr-vscsi
......@@ -254,7 +267,7 @@ specify_qemu_cpus () {
echo $2
else
case "$1" in
qemu-system-x86_64|qemu-system-i386)
qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
echo $2 -smp $3
;;
qemu-system-ppc64)
......
......@@ -39,30 +39,31 @@ sed -e 's/us : / : /' |
tr -d '\015' |
awk '
$8 == "start" {
if (starttask != "")
if (startseq != "")
nlost++;
starttask = $1;
starttime = $3;
startseq = $7;
seqtask[startseq] = starttask;
}
$8 == "end" {
if (starttask == $1 && startseq == $7) {
if (startseq == $7) {
curgpdur = $3 - starttime;
gptimes[++n] = curgpdur;
gptaskcnt[starttask]++;
sum += curgpdur;
if (curgpdur > 1000)
print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
starttask = "";
startseq = "";
} else {
# Lost a message or some such, reset.
starttask = "";
startseq = "";
nlost++;
}
}
$8 == "done" {
$8 == "done" && seqtask[$7] != $1 {
piggybackcnt[$1]++;
}
......
......@@ -177,8 +177,8 @@ then
exit 0
fi
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
sleep 10 # Give qemu's pid a chance to reach the file
if test -s "$resdir/qemu_pid"
......
#!/bin/bash
#
# Run a series of 14 tests under KVM. These are not particularly
# well-selected or well-tuned, but are the current set.
#
# Edit the definitions below to set the locations of the various directories,
# as well as the test duration.
# Run a series of tests under KVM. By default, this series is specified
# by the relevant CFLIST file, but can be overridden by the --configs
# command-line argument.
#
# Usage: kvm.sh [ options ]
#
......@@ -44,6 +42,7 @@ TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
resdir=""
......@@ -70,6 +69,7 @@ usage () {
echo " --kconfig Kconfig-options"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
echo " --memory megabytes | nnnG"
echo " --no-initrd"
echo " --qemu-args qemu-arguments"
echo " --qemu-cmd qemu-system-..."
......@@ -147,6 +147,11 @@ do
TORTURE_QEMU_MAC=$2
shift
;;
--memory)
checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
TORTURE_QEMU_MEM=$2
shift
;;
--no-initrd)
TORTURE_INITRD=""; export TORTURE_INITRD
;;
......@@ -174,6 +179,12 @@ do
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
TORTURE_SUITE=$2
shift
if test "$TORTURE_SUITE" = rcuperf
then
# If you really want jitter for rcuperf, specify
# it after specifying rcuperf. (But why?)
jitter=0
fi
;;
*)
echo Unknown argument $1
......@@ -288,6 +299,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
if ! test -e $resdir
......
......@@ -9,5 +9,4 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ_FULL_ALL=y
#CHECK#CONFIG_RCU_EXPERT=n
rcutorture.torture_type=tasks
rcutorture.torture_type=tasks nohz_full=1
......@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ_FULL_ALL=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=n
......
rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4
rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
......@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ_FULL_ALL=n
CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
......
......@@ -20,32 +20,10 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
# rcuperf_param_nreaders bootparam-string
#
# Adds nreaders rcuperf module parameter if not already specified.
rcuperf_param_nreaders () {
if ! echo "$1" | grep -q "rcuperf.nreaders"
then
echo rcuperf.nreaders=-1
fi
}
# rcuperf_param_nwriters bootparam-string
#
# Adds nwriters rcuperf module parameter if not already specified.
rcuperf_param_nwriters () {
if ! echo "$1" | grep -q "rcuperf.nwriters"
then
echo rcuperf.nwriters=-1
fi
}
# per_version_boot_params bootparam-string config-file seconds
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
echo $1 `rcuperf_param_nreaders "$1"` \
`rcuperf_param_nwriters "$1"` \
rcuperf.shutdown=1 \
echo $1 rcuperf.shutdown=1 \
rcuperf.verbose=1
}
This document describes one way to created the rcu-test-image file
This document describes one way to create the rcu-test-image file
that contains the filesystem used by the guest-OS kernel. There are
probably much better ways of doing this, and this filesystem could no
doubt be smaller. It is probably also possible to simply download
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment