Merge branches 'doc.2017.01.15b', 'dyntick.2017.01.23a', 'fixes.2017.01.23a',...

Merge branches 'doc.2017.01.15b', 'dyntick.2017.01.23a', 'fixes.2017.01.23a', 'srcu.2017.01.25a' and 'torture.2017.01.15b' into HEAD doc.2017.01.15b: Documentation updates dyntick.2017.01.23a: Dyntick tracking consolidation fixes.2017.01.23a: Miscellaneous fixes srcu.2017.01.25a: SRCU rewrite, fixes, and verification torture.2017.01.15b: Torture-test updates

Merge branches 'doc.2017.01.15b', 'dyntick.2017.01.23a', 'fixes.2017.01.23a',...
Merge branches 'doc.2017.01.15b', 'dyntick.2017.01.23a', 'fixes.2017.01.23a', 'srcu.2017.01.25a' and 'torture.2017.01.15b' into HEAD doc.2017.01.15b: Documentation updates dyntick.2017.01.23a: Dyntick tracking consolidation fixes.2017.01.23a: Miscellaneous fixes srcu.2017.01.25a: SRCU rewrite, fixes, and verification torture.2017.01.15b: Torture-test updates
31945aa9 · Paul E. McKenney · e3c50dfb · 38d30b33 · 8dc79888 · 7f554a3d
Commit 31945aa9 authored Jan 25, 2017 by Paul E. McKenney
72 changed files
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,7 +237,7 @@ o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of
 The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
-s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 enq=0 sc=21872
 These fields are as follows:
@@ -249,9 +249,6 @@ o	"wd1", "wd2", and "wd3" are the number of times that an attempt
 	completed an expedited grace period that satisfies the attempted
 	request.  "Our work is done."
-o	"n" is number of times that a concurrent CPU-hotplug operation
-	forced a fallback to a normal grace period.
 o	"enq" is the number of quiescent states still outstanding.
 o	"sc" is the number of times that the attempt to start a

--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -3,28 +3,33 @@
 /*
 * Lock-less NULL terminated single linked list
 *
- * If there are multiple producers and multiple consumers, llist_add
+ * Cases where locking is not needed:
- * can be used in producers and llist_del_all can be used in
+ * If there are multiple producers and multiple consumers, llist_add can be
- * consumers.  They can work simultaneously without lock.  But
+ * used in producers and llist_del_all can be used in consumers simultaneously
- * llist_del_first can not be used here.  Because llist_del_first
+ * without locking. Also a single consumer can use llist_del_first while
- * depends on list->first->next does not changed if list->first is not
+ * multiple producers simultaneously use llist_add, without any locking.
- * changed during its operation, but llist_del_first, llist_add,
+ *
- * llist_add (or llist_del_all, llist_add, llist_add) sequence in
+ * Cases where locking is needed:
- * another consumer may violate that.
+ * If we have multiple consumers with llist_del_first used in one consumer, and
- *
+ * llist_del_first or llist_del_all used in other consumers, then a lock is
- * If there are multiple producers and one consumer, llist_add can be
+ * needed.  This is because llist_del_first depends on list->first->next not
- * used in producers and llist_del_all or llist_del_first can be used
+ * changing, but without lock protection, there's no way to be sure about that
- * in the consumer.
+ * if a preemption happens in the middle of the delete operation and on being
- *
+ * preempted back, the list->first is the same as before causing the cmpxchg in
- * This can be summarized as follow:
+ * llist_del_first to succeed. For example, while a llist_del_first operation
+ * is in progress in one consumer, then a llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
+ * consumer may cause violations.
+ *
+ * This can be summarized as follows:
 *
 *           |   add    | del_first |  del_all
 * add       |    -     |     -     |     -
 * del_first |          |     L     |     L
 * del_all   |          |           |     -
 *
- * Where "-" stands for no lock is needed, while "L" stands for lock
+ * Where, a particular row's operation can happen concurrently with a column's
- * is needed.
+ * operation, with "-" being no lock needed, while "L" being lock is needed.
 *
 * The list entries deleted via llist_del_all can be traversed with
 * traversing function such as llist_for_each etc.  But the list

--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1161,5 +1161,17 @@ do { \
 		ftrace_dump(oops_dump_mode); \
 } while (0)
+/*
+ * Place this after a lock-acquisition primitive to guarantee that
+ * an UNLOCK+LOCK pair acts as a full barrier.  This guarantee applies
+ * if the UNLOCK and LOCK are executed by the same CPU or if the
+ * UNLOCK and LOCK operate on the same lock variable.
+ */
+#ifdef CONFIG_PPC
+#define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
+#else /* #ifdef CONFIG_PPC */
+#define smp_mb__after_unlock_lock()	do { } while (0)
+#endif /* #else #ifdef CONFIG_PPC */
 #endif /* __LINUX_RCUPDATE_H */
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,6 +27,12 @@
 #include <linux/cache.h>
+struct rcu_dynticks;
+static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
+{
+	return 0;
+}
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;

--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -33,9 +33,9 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
-struct srcu_struct_array {
+struct srcu_array {
-	unsigned long c[2];
+	unsigned long lock_count[2];
-	unsigned long seq[2];
+	unsigned long unlock_count[2];
 };
 struct rcu_batch {
@@ -46,7 +46,7 @@ struct rcu_batch {
 struct srcu_struct {
 	unsigned long completed;
-	struct srcu_struct_array __percpu *per_cpu_ref;
+	struct srcu_array __percpu *per_cpu_ref;
 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
 	bool running;
 	/* callbacks just queued */
@@ -118,7 +118,7 @@ void process_srcu(struct work_struct *work);
 * See include/linux/percpu-defs.h for the rules on per-CPU variables.
 */
 #define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
 #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
 #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)

--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -385,11 +385,11 @@ TRACE_EVENT(rcu_quiescent_state_report,
 /*
 * Tracepoint for quiescent states detected by force_quiescent_state().
- * These trace events include the type of RCU, the grace-period number
+ * These trace events include the type of RCU, the grace-period number that
- * that was blocked by the CPU, the CPU itself, and the type of quiescent
+ * was blocked by the CPU, the CPU itself, and the type of quiescent state,
- * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline,
+ * which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, "kick"
- * or "kick" when kicking a CPU that has been in dyntick-idle mode for
+ * when kicking a CPU that has been in dyntick-idle mode for too long, or
- * too long.
+ * "rqc" if the CPU got a quiescent state via its rcu_qs_ctr.
 */
 TRACE_EVENT(rcu_fqs,

--- a/init/Kconfig
+++ b/init/Kconfig
@@ -529,7 +529,6 @@ config SRCU
 config TASKS_RCU
 	bool
 	default n
-	depends on !UML
 	select SRCU
 	help
 	  This option enables a task-based RCU implementation that uses
@@ -781,19 +780,6 @@ config RCU_NOCB_CPU_ALL
 endchoice
-config RCU_EXPEDITE_BOOT
-	bool
-	default n
-	help
-	  This option enables expedited grace periods at boot time,
-	  as if rcu_expedite_gp() had been invoked early in boot.
-	  The corresponding rcu_unexpedite_gp() is invoked from
-	  rcu_end_inkernel_boot(), which is intended to be invoked
-	  at the end of the kernel-only boot sequence, just before
-	  init is exec'ed.
-	  Accept the default if unsure.
 endmenu # "RCU Subsystem"
 config BUILD_BIN2C

--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4412,13 +4412,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
 	/* Note: the following can be executed concurrently, so be careful. */
 	printk("\n");
-	printk("===============================\n");
+	pr_err("===============================\n");
-	printk("[ INFO: suspicious RCU usage. ]\n");
+	pr_err("[ ERR: suspicious RCU usage.  ]\n");
 	print_kernel_ident();
-	printk("-------------------------------\n");
+	pr_err("-------------------------------\n");
-	printk("%s:%d %s!\n", file, line, s);
+	pr_err("%s:%d %s!\n", file, line, s);
-	printk("\nother info that might help us debug this:\n\n");
+	pr_err("\nother info that might help us debug this:\n\n");
-	printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+	pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
 	       !rcu_lockdep_current_cpu_online()
 			? "RCU used illegally from offline CPU!\n"
 			: !rcu_is_watching()

--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -780,6 +780,10 @@ static void lock_torture_cleanup(void)
 	else
 		lock_torture_print_module_parms(cxt.cur_ops,
 						"End of test: SUCCESS");
+	kfree(cxt.lwsa);
+	kfree(cxt.lrsa);
 end:
 	torture_cleanup_end();
 }
@@ -924,6 +928,8 @@ static int __init lock_torture_init(void)
 				       GFP_KERNEL);
 		if (reader_tasks == NULL) {
 			VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
+			kfree(writer_tasks);
+			writer_tasks = NULL;
 			firsterr = -ENOMEM;
 			goto unwind;
 		}

--- a/kernel/membarrier.c
+++ b/kernel/membarrier.c
@@ -16,6 +16,7 @@
 #include <linux/syscalls.h>
 #include <linux/membarrier.h>
+#include <linux/tick.h>
 /*
 * Bitmask made from a "or" of all commands within enum membarrier_cmd,
@@ -51,6 +52,9 @@
 */
 SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 {
+	/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
+	if (tick_nohz_full_enabled())
+		return -ENOSYS;
 	if (unlikely(flags))
 		return -EINVAL;
 	switch (cmd) {

--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -564,10 +564,25 @@ static void srcu_torture_stats(void)
 	pr_alert("%s%s per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
+		unsigned long l0, l1;
+		unsigned long u0, u1;
 		long c0, c1;
+		struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
-		c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx];
+		u0 = counts->unlock_count[!idx];
-		c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx];
+		u1 = counts->unlock_count[idx];
+		/*
+		 * Make sure that a lock is always counted if the corresponding
+		 * unlock is counted.
+		 */
+		smp_rmb();
+		l0 = counts->lock_count[!idx];
+		l1 = counts->lock_count[idx];
+		c0 = l0 - u0;
+		c1 = l1 - u1;
 		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
 	}
 	pr_cont("\n");

--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -106,7 +106,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	rcu_batch_init(&sp->batch_check1);
 	rcu_batch_init(&sp->batch_done);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
 	return sp->per_cpu_ref ? 0 : -ENOMEM;
 }
@@ -141,114 +141,77 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 /*
- * Returns approximate total of the readers' ->seq[] values for the
+ * Returns approximate total of the readers' ->lock_count[] values for the
 * rank of per-CPU counters specified by idx.
 */
-static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
+static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
 {
 	int cpu;
 	unsigned long sum = 0;
-	unsigned long t;
 	for_each_possible_cpu(cpu) {
-		t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
+		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-		sum += t;
+		sum += READ_ONCE(cpuc->lock_count[idx]);
 	}
 	return sum;
 }
 /*
- * Returns approximate number of readers active on the specified rank
+ * Returns approximate total of the readers' ->unlock_count[] values for the
- * of the per-CPU ->c[] counters.
+ * rank of per-CPU counters specified by idx.
 */
-static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
+static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
 {
 	int cpu;
 	unsigned long sum = 0;
-	unsigned long t;
 	for_each_possible_cpu(cpu) {
-		t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
+		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-		sum += t;
+		sum += READ_ONCE(cpuc->unlock_count[idx]);
 	}
 	return sum;
 }
 /*
 * Return true if the number of pre-existing readers is determined to
- * be stably zero.  An example unstable zero can occur if the call
+ * be zero.
- * to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
- * but due to task migration, sees the corresponding __srcu_read_unlock()
- * decrement.  This can happen because srcu_readers_active_idx() takes
- * time to sum the array, and might in fact be interrupted or preempted
- * partway through the summation.
 */
 static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
 {
-	unsigned long seq;
+	unsigned long unlocks;
-	seq = srcu_readers_seq_idx(sp, idx);
+	unlocks = srcu_readers_unlock_idx(sp, idx);
 	/*
-	 * The following smp_mb() A pairs with the smp_mb() B located in
+	 * Make sure that a lock is always counted if the corresponding unlock
-	 * __srcu_read_lock().  This pairing ensures that if an
+	 * is counted. Needs to be a smp_mb() as the read side may contain a
-	 * __srcu_read_lock() increments its counter after the summation
+	 * read from a variable that is written to before the synchronize_srcu()
-	 * in srcu_readers_active_idx(), then the corresponding SRCU read-side
+	 * in the write side. In this case smp_mb()s A and B act like the store
-	 * critical section will see any changes made prior to the start
+	 * buffering pattern.
-	 * of the current SRCU grace period.
 	 *
-	 * Also, if the above call to srcu_readers_seq_idx() saw the
+	 * This smp_mb() also pairs with smp_mb() C to prevent accesses after the
-	 * increment of ->seq[], then the call to srcu_readers_active_idx()
+	 * synchronize_srcu() from being executed before the grace period ends.
-	 * must see the increment of ->c[].
 	 */
 	smp_mb(); /* A */
 	/*
-	 * Note that srcu_readers_active_idx() can incorrectly return
+	 * If the locks are the same as the unlocks, then there must have
-	 * zero even though there is a pre-existing reader throughout.
+	 * been no readers on this index at some time in between. This does not
-	 * To see this, suppose that task A is in a very long SRCU
+	 * mean that there are no more readers, as one could have read the
-	 * read-side critical section that started on CPU 0, and that
+	 * current index but not have incremented the lock counter yet.
-	 * no other reader exists, so that the sum of the counters
-	 * is equal to one.  Then suppose that task B starts executing
-	 * srcu_readers_active_idx(), summing up to CPU 1, and then that
-	 * task C starts reading on CPU 0, so that its increment is not
-	 * summed, but finishes reading on CPU 2, so that its decrement
-	 * -is- summed.  Then when task B completes its sum, it will
-	 * incorrectly get zero, despite the fact that task A has been
-	 * in its SRCU read-side critical section the whole time.
-	 *
-	 * We therefore do a validation step should srcu_readers_active_idx()
-	 * return zero.
-	 */
-	if (srcu_readers_active_idx(sp, idx) != 0)
-		return false;
-	/*
-	 * The remainder of this function is the validation step.
-	 * The following smp_mb() D pairs with the smp_mb() C in
-	 * __srcu_read_unlock().  If the __srcu_read_unlock() was seen
-	 * by srcu_readers_active_idx() above, then any destructive
-	 * operation performed after the grace period will happen after
-	 * the corresponding SRCU read-side critical section.
 	 *
-	 * Note that there can be at most NR_CPUS worth of readers using
+	 * Possible bug: There is no guarantee that there haven't been ULONG_MAX
-	 * the old index, which is not enough to overflow even a 32-bit
+	 * increments of ->lock_count[] since the unlocks were counted, meaning
-	 * integer.  (Yes, this does mean that systems having more than
+	 * that this could return true even if there are still active readers.
-	 * a billion or so CPUs need to be 64-bit systems.)  Therefore,
+	 * Since there are no memory barriers around srcu_flip(), the CPU is not
-	 * the sum of the ->seq[] counters cannot possibly overflow.
+	 * required to increment ->completed before running
-	 * Therefore, the only way that the return values of the two
+	 * srcu_readers_unlock_idx(), which means that there could be an
-	 * calls to srcu_readers_seq_idx() can be equal is if there were
+	 * arbitrarily large number of critical sections that execute after
-	 * no increments of the corresponding rank of ->seq[] counts
+	 * srcu_readers_unlock_idx() but use the old value of ->completed.
-	 * in the interim.  But the missed-increment scenario laid out
+	 */
-	 * above includes an increment of the ->seq[] counter by
+	return srcu_readers_lock_idx(sp, idx) == unlocks;
-	 * the corresponding __srcu_read_lock().  Therefore, if this
-	 * scenario occurs, the return values from the two calls to
-	 * srcu_readers_seq_idx() will differ, and thus the validation
-	 * step below suffices.
-	 */
-	smp_mb(); /* D */
-	return srcu_readers_seq_idx(sp, idx) == seq;
 }
 /**
@@ -266,8 +229,12 @@ static bool srcu_readers_active(struct srcu_struct *sp)
 	unsigned long sum = 0;
 	for_each_possible_cpu(cpu) {
-		sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
+		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-		sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
+		sum += READ_ONCE(cpuc->lock_count[0]);
+		sum += READ_ONCE(cpuc->lock_count[1]);
+		sum -= READ_ONCE(cpuc->unlock_count[0]);
+		sum -= READ_ONCE(cpuc->unlock_count[1]);
 	}
 	return sum;
 }
@@ -298,9 +265,8 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->c[idx]);
+	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
-	__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
 	return idx;
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -314,7 +280,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_dec(sp->per_cpu_ref->c[idx]);
+	this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -349,12 +315,21 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
 /*
 * Increment the ->completed counter so that future SRCU readers will
- * use the other rank of the ->c[] and ->seq[] arrays.  This allows
+ * use the other rank of the ->(un)lock_count[] arrays.  This allows
 * us to wait for pre-existing readers in a starvation-free manner.
 */
 static void srcu_flip(struct srcu_struct *sp)
 {
-	sp->completed++;
+	WRITE_ONCE(sp->completed, sp->completed + 1);
+	/*
+	 * Ensure that if the updater misses an __srcu_read_unlock()
+	 * increment, that task's next __srcu_read_lock() will see the
+	 * above counter update.  Note that both this memory barrier
+	 * and the one in srcu_readers_active_idx_check() provide the
+	 * guarantee for __srcu_read_lock().
+	 */
+	smp_mb(); /* D */  /* Pairs with C. */
 }
 /*
@@ -392,6 +367,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	head->next = NULL;
 	head->func = func;
 	spin_lock_irqsave(&sp->queue_lock, flags);
+	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	rcu_batch_queue(&sp->batch_queue, head);
 	if (!sp->running) {
 		sp->running = true;
@@ -425,6 +401,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 	head->next = NULL;
 	head->func = wakeme_after_rcu;
 	spin_lock_irq(&sp->queue_lock);
+	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
 	if (!sp->running) {
 		/* steal the processing owner */
 		sp->running = true;
@@ -444,8 +421,11 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 		spin_unlock_irq(&sp->queue_lock);
 	}
-	if (!done)
+	if (!done) {
 		wait_for_completion(&rcu.completion);
+		smp_mb(); /* Caller's later accesses after GP. */
+	}
 }
 /**
@@ -613,7 +593,8 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
 /*
 * Invoke a limited number of SRCU callbacks that have passed through
 * their grace period.  If there are more to do, SRCU will reschedule
- * the workqueue.
+ * the workqueue.  Note that needed memory barriers have been executed
+ * in this task's context by srcu_readers_active_idx_check().
 */
 static void srcu_invoke_callbacks(struct srcu_struct *sp)
 {

--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -41,8 +41,6 @@
 /* Forward declarations for tiny_plugin.h. */
 struct rcu_ctrlblk;
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
-static void rcu_process_callbacks(struct softirq_action *unused);
 static void __call_rcu(struct rcu_head *head,
 		       rcu_callback_t func,
 		       struct rcu_ctrlblk *rcp);

--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -281,6 +281,116 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 };
+/*
+ * Record entry into an extended quiescent state.  This is only to be
+ * called when not already in an extended quiescent state.
+ */
+static void rcu_dynticks_eqs_enter(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	int special;
+	/*
+	 * CPUs seeing atomic_inc_return() must see prior RCU read-side
+	 * critical sections, and we also must force ordering with the
+	 * next idle sojourn.
+	 */
+	special = atomic_inc_return(&rdtp->dynticks);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
+}
+/*
+ * Record exit from an extended quiescent state.  This is only to be
+ * called from an extended quiescent state.
+ */
+static void rcu_dynticks_eqs_exit(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	int special;
+	/*
+	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
+	 * and we also must force ordering with the next RCU read-side
+	 * critical section.
+	 */
+	special = atomic_inc_return(&rdtp->dynticks);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
+}
+/*
+ * Reset the current CPU's ->dynticks counter to indicate that the
+ * newly onlined CPU is no longer in an extended quiescent state.
+ * This will either leave the counter unchanged, or increment it
+ * to the next non-quiescent value.
+ *
+ * The non-atomic test/increment sequence works because the upper bits
+ * of the ->dynticks counter are manipulated only by the corresponding CPU,
+ * or when the corresponding CPU is offline.
+ */
+static void rcu_dynticks_eqs_online(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	if (atomic_read(&rdtp->dynticks) & 0x1)
+		return;
+	atomic_add(0x1, &rdtp->dynticks);
+}
+/*
+ * Is the current CPU in an extended quiescent state?
+ *
+ * No ordering, as we are sampling CPU-local information.
+ */
+bool rcu_dynticks_curr_cpu_in_eqs(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	return !(atomic_read(&rdtp->dynticks) & 0x1);
+}
+/*
+ * Snapshot the ->dynticks counter with full ordering so as to allow
+ * stable comparison of this counter with past and future snapshots.
+ */
+int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
+{
+	int snap = atomic_add_return(0, &rdtp->dynticks);
+	return snap;
+}
+/*
+ * Return true if the snapshot returned from rcu_dynticks_snap()
+ * indicates that RCU is in an extended quiescent state.
+ */
+static bool rcu_dynticks_in_eqs(int snap)
+{
+	return !(snap & 0x1);
+}
+/*
+ * Return true if the CPU corresponding to the specified rcu_dynticks
+ * structure has spent some time in an extended quiescent state since
+ * rcu_dynticks_snap() returned the specified snapshot.
+ */
+static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
+{
+	return snap != rcu_dynticks_snap(rdtp);
+}
+/*
+ * Do a double-increment of the ->dynticks counter to emulate a
+ * momentary idle-CPU quiescent state.
+ */
+static void rcu_dynticks_momentary_idle(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	int special = atomic_add_return(2, &rdtp->dynticks);
+	/* It is illegal to call this from idle state. */
+	WARN_ON_ONCE(!(special & 0x1));
+}
 DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
 EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
@@ -300,7 +410,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
 static void rcu_momentary_dyntick_idle(void)
 {
 	struct rcu_data *rdp;
-	struct rcu_dynticks *rdtp;
 	int resched_mask;
 	struct rcu_state *rsp;
@@ -327,10 +436,7 @@ static void rcu_momentary_dyntick_idle(void)
 		 * quiescent state, with no need for this CPU to do anything
 		 * further.
 		 */
-		rdtp = this_cpu_ptr(&rcu_dynticks);
+		rcu_dynticks_momentary_idle();
-		smp_mb__before_atomic(); /* Earlier stuff before QS. */
-		atomic_add(2, &rdtp->dynticks);  /* QS. */
-		smp_mb__after_atomic(); /* Later stuff after QS. */
 		break;
 	}
 }
@@ -611,7 +717,7 @@ static int
 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 {
 	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
-	       rdp->nxttail[RCU_DONE_TAIL] != NULL;
+	       rdp->nxttail[RCU_NEXT_TAIL] != NULL;
 }
 /*
@@ -673,7 +779,7 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
 {
 	struct rcu_state *rsp;
 	struct rcu_data *rdp;
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
 	trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@ -692,12 +798,7 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
 		do_nocb_deferred_wakeup(rdp);
 	}
 	rcu_prepare_for_idle();
-	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+	rcu_dynticks_eqs_enter();
-	smp_mb__before_atomic();  /* See above. */
-	atomic_inc(&rdtp->dynticks);
-	smp_mb__after_atomic();  /* Force ordering with next sojourn. */
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     atomic_read(&rdtp->dynticks) & 0x1);
 	rcu_dynticks_task_enter();
 	/*
@@ -826,15 +927,10 @@ void rcu_irq_exit_irqson(void)
 */
 static void rcu_eqs_exit_common(long long oldval, int user)
 {
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
 	rcu_dynticks_task_exit();
-	smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
+	rcu_dynticks_eqs_exit();
-	atomic_inc(&rdtp->dynticks);
-	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-	smp_mb__after_atomic();  /* See above. */
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     !(atomic_read(&rdtp->dynticks) & 0x1));
 	rcu_cleanup_after_idle();
 	trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@ -980,12 +1076,8 @@ void rcu_nmi_enter(void)
 	 * to be in the outermost NMI handler that interrupted an RCU-idle
 	 * period (observation due to Andy Lutomirski).
 	 */
-	if (!(atomic_read(&rdtp->dynticks) & 0x1)) {
+	if (rcu_dynticks_curr_cpu_in_eqs()) {
-		smp_mb__before_atomic();  /* Force delay from prior write. */
+		rcu_dynticks_eqs_exit();
-		atomic_inc(&rdtp->dynticks);
-		/* atomic_inc() before later RCU read-side crit sects */
-		smp_mb__after_atomic();  /* See above. */
-		WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
 		incby = 1;
 	}
 	rdtp->dynticks_nmi_nesting += incby;
@@ -1010,7 +1102,7 @@ void rcu_nmi_exit(void)
 	 * to us!)
 	 */
 	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
-	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+	WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
 	/*
 	 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
@@ -1023,11 +1115,7 @@ void rcu_nmi_exit(void)
 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
 	rdtp->dynticks_nmi_nesting = 0;
-	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
+	rcu_dynticks_eqs_enter();
-	smp_mb__before_atomic();  /* See above. */
-	atomic_inc(&rdtp->dynticks);
-	smp_mb__after_atomic();  /* Force delay to next write. */
-	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 }
 /**
@@ -1040,7 +1128,7 @@ void rcu_nmi_exit(void)
 */
 bool notrace __rcu_is_watching(void)
 {
-	return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
+	return !rcu_dynticks_curr_cpu_in_eqs();
 }
 /**
@@ -1123,9 +1211,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
 static int dyntick_save_progress_counter(struct rcu_data *rdp,
 					 bool *isidle, unsigned long *maxj)
 {
-	rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
 	rcu_sysidle_check_cpu(rdp, isidle, maxj);
-	if ((rdp->dynticks_snap & 0x1) == 0) {
+	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
 		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
 				 rdp->mynode->gpnum))
@@ -1144,12 +1232,10 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 				    bool *isidle, unsigned long *maxj)
 {
-	unsigned int curr;
+	unsigned long jtsq;
 	int *rcrmp;
-	unsigned int snap;
+	unsigned long rjtsc;
+	struct rcu_node *rnp;
-	curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
-	snap = (unsigned int)rdp->dynticks_snap;
 	/*
 	 * If the CPU passed through or entered a dynticks idle phase with
@@ -1159,27 +1245,39 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * read-side critical section that started before the beginning
 	 * of the current RCU grace period.
 	 */
-	if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
+	if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
 		rdp->dynticks_fqs++;
 		return 1;
 	}
+	/* Compute and saturate jiffies_till_sched_qs. */
+	jtsq = jiffies_till_sched_qs;
+	rjtsc = rcu_jiffies_till_stall_check();
+	if (jtsq > rjtsc / 2) {
+		WRITE_ONCE(jiffies_till_sched_qs, rjtsc);
+		jtsq = rjtsc / 2;
+	} else if (jtsq < 1) {
+		WRITE_ONCE(jiffies_till_sched_qs, 1);
+		jtsq = 1;
+	}
 	/*
-	 * Check for the CPU being offline, but only if the grace period
+	 * Has this CPU encountered a cond_resched_rcu_qs() since the
-	 * is old enough.  We don't need to worry about the CPU changing
+	 * beginning of the grace period?  For this to be the case,
-	 * state: If we see it offline even once, it has been through a
+	 * the CPU has to have noticed the current grace period.  This
-	 * quiescent state.
+	 * might not be the case for nohz_full CPUs looping in the kernel.
-	 *
-	 * The reason for insisting that the grace period be at least
-	 * one jiffy old is that CPUs that are not quite online and that
-	 * have just gone offline can still execute RCU read-side critical
-	 * sections.
 	 */
-	if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
+	rnp = rdp->mynode;
-		return 0;  /* Grace period is not old enough. */
+	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
-	barrier();
+	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) &&
-	if (cpu_is_offline(rdp->cpu)) {
+	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
+		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
+		return 1;
+	}
+	/* Check for the CPU being offline. */
+	if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
 		rdp->offline_fqs++;
 		return 1;
@@ -1207,9 +1305,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * warning delay.
 	 */
 	rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
-	if (ULONG_CMP_GE(jiffies,
+	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
-			 rdp->rsp->gp_start + jiffies_till_sched_qs) ||
+	    time_after(jiffies, rdp->rsp->jiffies_resched)) {
-	    ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
 		if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
 			WRITE_ONCE(rdp->cond_resched_completed,
 				   READ_ONCE(rdp->mynode->completed));
@@ -1220,11 +1317,12 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
-	/* And if it has been a really long time, kick the CPU as well. */
+	/*
-	if (ULONG_CMP_GE(jiffies,
+	 * If more than halfway to RCU CPU stall-warning time, do
-			 rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) ||
+	 * a resched_cpu() to try to loosen things up a bit.
-	    ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs))
+	 */
-		resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
+	if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2)
+		resched_cpu(rdp->cpu);
 	return 0;
 }
@@ -1277,7 +1375,10 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
 }
 /*
- * Dump stacks of all tasks running on stalled CPUs.
+ * Dump stacks of all tasks running on stalled CPUs.  First try using
+ * NMIs, but fall back to manual remote stack tracing on architectures
+ * that don't support NMI-based stack dumps.  The NMI-triggered stack
+ * traces are more accurate because they are printed by the target CPU.
 */
 static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
 {
@@ -1287,11 +1388,10 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
 	rcu_for_each_leaf_node(rsp, rnp) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-		if (rnp->qsmask != 0) {
 		for_each_leaf_node_possible_cpu(rnp, cpu)
 			if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
+				if (!trigger_single_cpu_backtrace(cpu))
 					dump_cpu_task(cpu);
-		}
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
@@ -1379,6 +1479,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 	       (long)rsp->gpnum, (long)rsp->completed, totqlen);
 	if (ndetected) {
 		rcu_dump_cpu_stacks(rsp);
+		/* Complain about tasks blocking the grace period. */
+		rcu_print_detail_task_stall(rsp);
 	} else {
 		if (READ_ONCE(rsp->gpnum) != gpnum ||
 		    READ_ONCE(rsp->completed) == gpnum) {
@@ -1395,9 +1498,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 		}
 	}
-	/* Complain about tasks blocking the grace period. */
-	rcu_print_detail_task_stall(rsp);
 	rcu_check_gp_kthread_starvation(rsp);
 	panic_on_rcu_stall();
@@ -2467,10 +2567,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 	rnp = rdp->mynode;
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	if ((rdp->cpu_no_qs.b.norm &&
+	if (rdp->cpu_no_qs.b.norm || rdp->gpnum != rnp->gpnum ||
-	     rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
+	    rnp->completed == rnp->gpnum || rdp->gpwrap) {
-	    rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
-	    rdp->gpwrap) {
 		/*
 		 * The grace period in which this quiescent state was
@@ -2525,8 +2623,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 	 * Was there a quiescent state since the beginning of the grace
 	 * period? If no, then exit and wait for the next call.
 	 */
-	if (rdp->cpu_no_qs.b.norm &&
+	if (rdp->cpu_no_qs.b.norm)
-	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
 		return;
 	/*
@@ -3480,9 +3577,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	    rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
 	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
 		rdp->n_rp_core_needs_qs++;
-	} else if (rdp->core_needs_qs &&
+	} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
-		   (!rdp->cpu_no_qs.b.norm ||
-		    rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
 		rdp->n_rp_report_qs++;
 		return 1;
 	}
@@ -3748,7 +3843,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
 	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
-	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
+	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));
 	rdp->cpu = cpu;
 	rdp->rsp = rsp;
 	rcu_boot_init_nocb_percpu_data(rdp);
@@ -3765,7 +3860,6 @@ static void
 rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 {
 	unsigned long flags;
-	unsigned long mask;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp = rcu_get_root(rsp);
@@ -3778,8 +3872,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 		init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 	rcu_sysidle_init_percpu_data(rdp->dynticks);
-	atomic_set(&rdp->dynticks->dynticks,
+	rcu_dynticks_eqs_online();
-		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
 	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 	/*
@@ -3788,7 +3881,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	 * of the next grace period.
 	 */
 	rnp = rdp->mynode;
-	mask = rdp->grpmask;
 	raw_spin_lock_rcu_node(rnp);		/* irqs already disabled. */
 	if (!rdp->beenonline)
 		WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
@@ -3872,7 +3964,7 @@ void rcu_cpu_starting(unsigned int cpu)
 	struct rcu_state *rsp;
 	for_each_rcu_flavor(rsp) {
-		rdp = this_cpu_ptr(rsp->rda);
+		rdp = per_cpu_ptr(rsp->rda, cpu);
 		rnp = rdp->mynode;
 		mask = rdp->grpmask;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);

--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -521,7 +521,6 @@ struct rcu_state {
 	struct mutex exp_mutex;			/* Serialize expedited GP. */
 	struct mutex exp_wake_mutex;		/* Serialize wakeup. */
 	unsigned long expedited_sequence;	/* Take a ticket. */
-	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
 	struct swait_queue_head expedited_wq;	/* Wait for check-ins. */
 	int ncpus_snap;				/* # CPUs seen last time. */
@@ -595,6 +594,8 @@ extern struct rcu_state rcu_bh_state;
 extern struct rcu_state rcu_preempt_state;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
 #ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
@@ -687,18 +688,6 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
 }
 #endif /* #ifdef CONFIG_RCU_TRACE */
-/*
- * Place this after a lock-acquisition primitive to guarantee that
- * an UNLOCK+LOCK pair act as a full barrier.  This guarantee applies
- * if the UNLOCK and LOCK are executed by the same CPU or if the
- * UNLOCK and LOCK operate on the same lock variable.
- */
-#ifdef CONFIG_PPC
-#define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
-#else /* #ifdef CONFIG_PPC */
-#define smp_mb__after_unlock_lock()	do { } while (0)
-#endif /* #else #ifdef CONFIG_PPC */
 /*
 * Wrappers for the rcu_node::lock acquire and release.
 *

--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -20,16 +20,26 @@
 * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 */
-/* Wrapper functions for expedited grace periods.  */
+/*
+ * Record the start of an expedited grace period.
+ */
 static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
 {
 	rcu_seq_start(&rsp->expedited_sequence);
 }
+/*
+ * Record the end of an expedited grace period.
+ */
 static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
 {
 	rcu_seq_end(&rsp->expedited_sequence);
 	smp_mb(); /* Ensure that consecutive grace periods serialize. */
 }
+/*
+ * Take a snapshot of the expedited-grace-period counter.
+ */
 static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
 {
 	unsigned long s;
@@ -39,6 +49,12 @@ static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
 	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
 	return s;
 }
+/*
+ * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
+ * if a full expedited grace period has elapsed since that snapshot
+ * was taken.
+ */
 static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
 {
 	return rcu_seq_done(&rsp->expedited_sequence, s);
@@ -356,12 +372,11 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
 		mask_ofl_test = 0;
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
 			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 			rdp->exp_dynticks_snap =
-				atomic_add_return(0, &rdtp->dynticks);
+				rcu_dynticks_snap(rdp->dynticks);
 			if (raw_smp_processor_id() == cpu ||
-			    !(rdp->exp_dynticks_snap & 0x1) ||
+			    rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
 			    !(rnp->qsmaskinitnext & rdp->grpmask))
 				mask_ofl_test |= rdp->grpmask;
 		}
@@ -380,13 +395,12 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
 			unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
 			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 			if (!(mask_ofl_ipi & mask))
 				continue;
 retry_ipi:
-			if (atomic_add_return(0, &rdtp->dynticks) !=
+			if (rcu_dynticks_in_eqs_since(rdp->dynticks,
-			    rdp->exp_dynticks_snap) {
+						      rdp->exp_dynticks_snap)) {
 				mask_ofl_test |= mask;
 				continue;
 			}
@@ -623,6 +637,11 @@ void synchronize_sched_expedited(void)
 {
 	struct rcu_state *rsp = &rcu_sched_state;
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
 	/* If only one CPU, this is automatically a grace period. */
 	if (rcu_blocking_is_gp())
 		return;
@@ -692,6 +711,11 @@ void synchronize_rcu_expedited(void)
 {
 	struct rcu_state *rsp = rcu_state_p;
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
 		return;
 	_synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);

--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1643,7 +1643,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
 	       "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
 	       ticks_value, ticks_title,
-	       atomic_read(&rdtp->dynticks) & 0xfff,
+	       rcu_dynticks_snap(rdtp) & 0xfff,
 	       rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
 	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
 	       READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
@@ -2366,8 +2366,9 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
 	}
 	/*
-	 * Each pass through this loop sets up one rcu_data structure and
+	 * Each pass through this loop sets up one rcu_data structure.
-	 * spawns one rcu_nocb_kthread().
+	 * Should the corresponding CPU come online in the future, then
+	 * we will spawn the needed set of rcu_nocb_kthread() kthreads.
 	 */
 	for_each_cpu(cpu, rcu_nocb_mask) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);

--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -124,7 +124,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
 		   rdp->core_needs_qs);
 	seq_printf(m, " dt=%d/%llx/%d df=%lu",
-		   atomic_read(&rdp->dynticks->dynticks),
+		   rcu_dynticks_snap(rdp->dynticks),
 		   rdp->dynticks->dynticks_nesting,
 		   rdp->dynticks->dynticks_nmi_nesting,
 		   rdp->dynticks_fqs);
@@ -194,9 +194,8 @@ static int show_rcuexp(struct seq_file *m, void *v)
 		s2 += atomic_long_read(&rdp->exp_workdone2);
 		s3 += atomic_long_read(&rdp->exp_workdone3);
 	}
-	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu enq=%d sc=%lu\n",
 		   rsp->expedited_sequence, s0, s1, s2, s3,
-		   atomic_long_read(&rsp->expedited_normal),
 		   atomic_read(&rsp->expedited_need_qs),
 		   rsp->expedited_sequence / 2);
 	return 0;

--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -132,8 +132,7 @@ bool rcu_gp_is_normal(void)
 }
 EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
-static atomic_t rcu_expedited_nesting =
+static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
-	ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
 /*
 * Should normal grace-period primitives be expedited?  Intended for
@@ -182,7 +181,6 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
 */
 void rcu_end_inkernel_boot(void)
 {
-	if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
 	rcu_unexpedite_gp();
 	if (rcu_normal_after_boot)
 		WRITE_ONCE(rcu_normal, 1);

--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1450,6 +1450,7 @@ config RCU_CPU_STALL_TIMEOUT
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on DEBUG_KERNEL
+	default y if TREE_RCU
 	select TRACE_CLOCK
 	help
 	  This option provides tracing in RCU which presents stats

--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
 CONFIG_RCU_TORTURE_TEST=y
 CONFIG_PRINTK_TIME=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -7,6 +7,7 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_TRACE=n
+#CHECK#CONFIG_RCU_STALL_COMMON=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_PREEMPT_COUNT=n
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -8,7 +8,8 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_TRACE=y
 CONFIG_PROVE_LOCKING=y
+CONFIG_PROVE_RCU_REPEATEDLY=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_PREEMPT_COUNT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -16,3 +16,6 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -20,3 +20,7 @@ CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -17,3 +17,6 @@ CONFIG_RCU_BOOST=y
 CONFIG_RCU_KTHREAD_PRIO=2
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -19,3 +19,7 @@ CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
+CONFIG_RCU_EQS_DEBUG=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -19,3 +19,6 @@ CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -20,3 +20,6 @@ CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -19,3 +19,6 @@ CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -17,8 +17,8 @@ CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=y
 CONFIG_RCU_NOCB_CPU_ALL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_PROVE_LOCKING=y
+CONFIG_PROVE_LOCKING=n
-#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -14,6 +14,7 @@ CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
 CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
 CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
+CONFIG_PROVE_RCU_REPEATEDLY -- Do one.
 CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
 CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
 CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
@@ -25,7 +26,12 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one.
 CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
 CONFIG_RCU_TRACE -- Do half.
 CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
-!RCU_EXPERT -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios.
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios.
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios.
 RCU-bh: Do one with PREEMPT and one with !PREEMPT.
 RCU-sched: Do one with PREEMPT but not BOOST.
@@ -72,7 +78,30 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
 	Always used in KVM testing.
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
+	Inspection suffices, ignore.
 CONFIG_PREEMPT_RCU
 CONFIG_TREE_RCU
+CONFIG_TINY_RCU
+	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
+CONFIG_SPARSE_RCU_POINTER
+	Makes sense only for sparse runs, not for kernel builds.
+CONFIG_SRCU
+CONFIG_TASKS_RCU
+	Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
+CONFIG_RCU_TRACE
+	Implied by CONFIG_RCU_TRACE for Tree RCU.
-	These are controlled by CONFIG_PREEMPT.
+boot parameters ignored: TBD
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
+srcu.c
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
+all: srcu.c store_buffering
+LINUX_SOURCE = ../../../../../..
+modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
+		      $(LINUX_SOURCE)/kernel/rcu/srcu.c
+modified_srcu_output = include/linux/srcu.h srcu.c
+include/linux/srcu.h: srcu.c
+srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
+	awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
+store_buffering:
+	@cd tests/store_buffering; make
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+srcu.h
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
+#include <LINUX_SOURCE/linux/kconfig.h>
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
+/*
+ * This header has been modifies to remove definitions of types that
+ * are defined in standard userspace headers or are problematic for some
+ * other reason.
+ */
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+#define __EXPORTED_HEADERS__
+#include <uapi/linux/types.h>
+#ifndef __ASSEMBLY__
+#define DECLARE_BITMAP(name, bits) \
+	unsigned long name[BITS_TO_LONGS(bits)]
+typedef __u32 __kernel_dev_t;
+/* bsd */
+typedef unsigned char		u_char;
+typedef unsigned short		u_short;
+typedef unsigned int		u_int;
+typedef unsigned long		u_long;
+/* sysv */
+typedef unsigned char		unchar;
+typedef unsigned short		ushort;
+typedef unsigned int		uint;
+typedef unsigned long		ulong;
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+typedef		__u8		u_int8_t;
+typedef		__s8		int8_t;
+typedef		__u16		u_int16_t;
+typedef		__s16		int16_t;
+typedef		__u32		u_int32_t;
+typedef		__s32		int32_t;
+#endif /* !(__BIT_TYPES_DEFINED__) */
+typedef		__u8		uint8_t;
+typedef		__u16		uint16_t;
+typedef		__u32		uint32_t;
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 __u64 __attribute__((aligned(8)))
+#define aligned_be64 __be64 __attribute__((aligned(8)))
+#define aligned_le64 __le64 __attribute__((aligned(8)))
+/**
+ * The type used for indexing onto a disc or disc partition.
+ *
+ * Linux always considers sectors to be 512 bytes long independently
+ * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
+ */
+#ifdef CONFIG_LBDAF
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+/*
+ * The type of an index into the pagecache.
+ */
+#define pgoff_t unsigned long
+/*
+ * A dma_addr_t can hold any valid DMA address, i.e., any address returned
+ * by the DMA API.
+ *
+ * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
+ * bits wide.  Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
+ * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
+ * so they don't care about the size of the actual bus addresses.
+ */
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+typedef phys_addr_t resource_size_t;
+/*
+ * This type is the placeholder for a hardware interrupt number. It has to be
+ * big enough to enclose whatever representation is used by a given platform.
+ */
+typedef unsigned long irq_hw_number_t;
+typedef struct {
+	int counter;
+} atomic_t;
+#ifdef CONFIG_64BIT
+typedef struct {
+	long counter;
+} atomic64_t;
+#endif
+struct list_head {
+	struct list_head *next, *prev;
+};
+struct hlist_head {
+	struct hlist_node *first;
+};
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+/**
+ * struct callback_head - callback structure for use with RCU and task_work
+ * @next: next update requests in a list
+ * @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ *  - future call_rcu_lazy() will make use of lower bits in the pointer;
+ *  - the structure shares storage spacer in struct page with @compound_head,
+ *    which encode PageTail() in bit 0. The guarantee is needed to avoid
+ *    false-positive PageTail().
+ */
+struct callback_head {
+	struct callback_head *next;
+	void (*func)(struct callback_head *head);
+} __attribute__((aligned(sizeof(void *))));
+#define rcu_head callback_head
+typedef void (*rcu_callback_t)(struct rcu_head *head);
+typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+#endif /*  __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+#!/bin/awk -f
+# Modify SRCU for formal verification. The first argument should be srcu.h and
+# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
+# current directory.
+BEGIN {
+	if (ARGC != 5) {
+		print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
+		exit 1;
+	}
+	h_output = ARGV[3];
+	c_output = ARGV[4];
+	ARGC = 3;
+	# Tokenize using FS and not RS as FS supports regular expressions. Each
+	# record is one line of source, except that backslashed lines are
+	# combined. Comments are treated as field separators, as are quotes.
+	quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
+	comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
+	FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
+	inside_srcu_struct = 0;
+	inside_srcu_init_def = 0;
+	srcu_init_param_name = "";
+	in_macro = 0;
+	brace_nesting = 0;
+	paren_nesting = 0;
+	# Allow the manipulation of the last field separator after has been
+	# seen.
+	last_fs = "";
+	# Whether the last field separator was intended to be output.
+	last_fs_print = 0;
+	# rcu_batches stores the initialization for each instance of struct
+	# rcu_batch
+	in_comment = 0;
+	outputfile = "";
+}
+{
+	prev_outputfile = outputfile;
+	if (FILENAME ~ /\.h$/) {
+		outputfile = h_output;
+		if (FNR != NR) {
+			print "Incorrect file order" > "/dev/stderr";
+			exit 1;
+		}
+	}
+	else
+		outputfile = c_output;
+	if (prev_outputfile && outputfile != prev_outputfile) {
+		new_outputfile = outputfile;
+		outputfile = prev_outputfile;
+		update_fieldsep("", 0);
+		outputfile = new_outputfile;
+	}
+}
+# Combine the next line into $0.
+function combine_line() {
+	ret = getline next_line;
+	if (ret == 0) {
+		# Don't allow two consecutive getlines at the end of the file
+		if (eof_found) {
+			print "Error: expected more input." > "/dev/stderr";
+			exit 1;
+		} else {
+			eof_found = 1;
+		}
+	} else if (ret == -1) {
+		print "Error reading next line of file" FILENAME > "/dev/stderr";
+		exit 1;
+	}
+	$0 = $0 "\n" next_line;
+}
+# Combine backslashed lines and multiline comments.
+function combine_backslashes() {
+	while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
+		combine_line();
+	}
+}
+function read_line() {
+	combine_line();
+	combine_backslashes();
+}
+# Print out field separators and update variables that depend on them. Only
+# print if p is true. Call with sep="" and p=0 to print out the last field
+# separator.
+function update_fieldsep(sep, p) {
+	# Count braces
+	sep_tmp = sep;
+	gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
+	while (1)
+	{
+		if (sub("[^{}()]*\\{", "", sep_tmp)) {
+			brace_nesting++;
+			continue;
+		}
+		if (sub("[^{}()]*\\}", "", sep_tmp)) {
+			brace_nesting--;
+			if (brace_nesting < 0) {
+				print "Unbalanced braces!" > "/dev/stderr";
+				exit 1;
+			}
+			continue;
+		}
+		if (sub("[^{}()]*\\(", "", sep_tmp)) {
+			paren_nesting++;
+			continue;
+		}
+		if (sub("[^{}()]*\\)", "", sep_tmp)) {
+			paren_nesting--;
+			if (paren_nesting < 0) {
+				print "Unbalanced parenthesis!" > "/dev/stderr";
+				exit 1;
+			}
+			continue;
+		}
+		break;
+	}
+	if (last_fs_print)
+		printf("%s", last_fs) > outputfile;
+	last_fs = sep;
+	last_fs_print = p;
+}
+# Shifts the fields down by n positions. Calls next if there are no more. If p
+# is true then print out field separators.
+function shift_fields(n, p) {
+	do {
+		if (match($0, FS) > 0) {
+			update_fieldsep(substr($0, RSTART, RLENGTH), p);
+			if (RSTART + RLENGTH <= length())
+				$0 = substr($0, RSTART + RLENGTH);
+			else
+				$0 = "";
+		} else {
+			update_fieldsep("", 0);
+			print "" > outputfile;
+			next;
+		}
+	} while (--n > 0);
+}
+# Shifts and prints the first n fields.
+function print_fields(n) {
+	do {
+		update_fieldsep("", 0);
+		printf("%s", $1) > outputfile;
+		shift_fields(1, 1);
+	} while (--n > 0);
+}
+{
+	combine_backslashes();
+}
+# Print leading FS
+{
+	if (match($0, "^(" FS ")+") > 0) {
+		update_fieldsep(substr($0, RSTART, RLENGTH), 1);
+		if (RSTART + RLENGTH <= length())
+			$0 = substr($0, RSTART + RLENGTH);
+		else
+			$0 = "";
+	}
+}
+# Parse the line.
+{
+	while (NF > 0) {
+		if ($1 == "struct" && NF < 3) {
+			read_line();
+			continue;
+		}
+		if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
+		    brace_nesting == 0 && paren_nesting == 0 &&
+		    $1 == "struct" && $2 == "srcu_struct" &&
+		    $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
+			inside_srcu_struct = 1;
+			print_fields(2);
+			continue;
+		}
+		if (inside_srcu_struct && brace_nesting == 0 &&
+		    paren_nesting == 0) {
+			inside_srcu_struct = 0;
+			update_fieldsep("", 0);
+			for (name in rcu_batches)
+				print "extern struct rcu_batch " name ";" > outputfile;
+		}
+		if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
+			# Move rcu_batches outside of the struct.
+			rcu_batches[$3] = "";
+			shift_fields(3, 1);
+			sub(/;[[:space:]]*$/, "", last_fs);
+			continue;
+		}
+		if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
+		    $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
+			inside_srcu_init_def = 1;
+			srcu_init_param_name = $3;
+			in_macro = 1;
+			print_fields(3);
+			continue;
+		}
+		if (inside_srcu_init_def && brace_nesting == 0 &&
+		    paren_nesting == 0) {
+			inside_srcu_init_def = 0;
+			in_macro = 0;
+			continue;
+		}
+		if (inside_srcu_init_def && brace_nesting == 1 &&
+		    paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
+		    $1 ~ /^[[:alnum:]_]+$/) {
+			name = $1;
+			if (name in rcu_batches) {
+				# Remove the dot.
+				sub(/\.[[:space:]]*$/, "", last_fs);
+				old_record = $0;
+				do
+					shift_fields(1, 0);
+				while (last_fs !~ /,/ || paren_nesting > 0);
+				end_loc = length(old_record) - length($0);
+				end_loc += index(last_fs, ",") - length(last_fs);
+				last_fs = substr(last_fs, index(last_fs, ",") + 1);
+				last_fs_print = 1;
+				match(old_record, "^"name"("FS")+=");
+				start_loc = RSTART + RLENGTH;
+				len = end_loc - start_loc;
+				initializer = substr(old_record, start_loc, len);
+				gsub(srcu_init_param_name "\\.", "", initializer);
+				rcu_batches[name] = initializer;
+				continue;
+			}
+		}
+		# Don't include a nonexistent file
+		if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
+			update_fieldsep("", 0);
+			next;
+		}
+		# Ignore most preprocessor stuff.
+		if (!in_macro && $1 ~ /#/) {
+			break;
+		}
+		if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
+			read_line();
+			continue;
+		}
+		if (brace_nesting > 0 &&
+		    $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
+		    $2 in rcu_batches) {
+			# Make uses of rcu_batches global. Somewhat unreliable.
+			shift_fields(1, 0);
+			print_fields(1);
+			continue;
+		}
+		if ($1 == "static" && NF < 3) {
+			read_line();
+			continue;
+		}
+		if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
+		                       $2 == "void" && $3 == "srcu_flip")) {
+			shift_fields(1, 1);
+			print_fields(2);
+			continue;
+		}
+		# Distinguish between read-side and write-side memory barriers.
+		if ($1 == "smp_mb" && NF < 2) {
+			read_line();
+			continue;
+		}
+		if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
+			barrier_letter = substr($0, RLENGTH, 1);
+			if (barrier_letter ~ /A|D/)
+				new_barrier_name = "sync_smp_mb";
+			else if (barrier_letter ~ /B|C/)
+				new_barrier_name = "rs_smp_mb";
+			else {
+				print "Unrecognized memory barrier." > "/dev/null";
+				exit 1;
+			}
+			shift_fields(1, 1);
+			printf("%s", new_barrier_name) > outputfile;
+			continue;
+		}
+		# Skip definition of rcu_synchronize, since it is already
+		# defined in misc.h. Only present in old versions of srcu.
+		if (brace_nesting == 0 && paren_nesting == 0 &&
+		    $1 == "struct" && $2 == "rcu_synchronize" &&
+		    $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
+			shift_fields(2, 0);
+			while (brace_nesting) {
+				if (NF < 2)
+					read_line();
+				shift_fields(1, 0);
+			}
+		}
+		# Skip definition of wakeme_after_rcu for the same reason
+		if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
+		    $3 == "wakeme_after_rcu") {
+			while (NF < 5)
+				read_line();
+			shift_fields(3, 0);
+			do {
+				while (NF < 3)
+					read_line();
+				shift_fields(1, 0);
+			} while (paren_nesting || brace_nesting);
+		}
+		if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
+			read_line();
+			continue;
+		}
+		# Give srcu_batches_completed the correct type for old SRCU.
+		if (brace_nesting == 0 && $1 == "long" &&
+		    $2 == "srcu_batches_completed") {
+			update_fieldsep("", 0);
+			printf("unsigned ") > outputfile;
+			print_fields(2);
+			continue;
+		}
+		if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
+		    $3 == "srcu_batches_completed") {
+			print_fields(3);
+			continue;
+		}
+		# Just print out the input code by default.
+		print_fields(1);
+	}
+	update_fieldsep("", 0);
+	print > outputfile;
+	next;
+}
+END {
+	update_fieldsep("", 0);
+	if (brace_nesting != 0) {
+		print "Unbalanced braces!" > "/dev/stderr";
+		exit 1;
+	}
+	# Define the rcu_batches
+	for (name in rcu_batches)
+		print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
+}
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
+#ifndef ASSUME_H
+#define ASSUME_H
+/* Provide an assumption macro that can be disabled for gcc. */
+#ifdef RUN
+#define assume(x) \
+	do { \
+		/* Evaluate x to suppress warnings. */ \
+		(void) (x); \
+	} while (0)
+#else
+#define assume(x) __CPROVER_assume(x)
+#endif
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
+#ifndef BARRIERS_H
+#define BARRIERS_H
+#define barrier() __asm__ __volatile__("" : : : "memory")
+#ifdef RUN
+#define smp_mb() __sync_synchronize()
+#define smp_mb__after_unlock_lock() __sync_synchronize()
+#else
+/*
+ * Copied from CBMC's implementation of __sync_synchronize(), which
+ * seems to be disabled by default.
+ */
+#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+				 "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+				    "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#endif
+/*
+ * Allow memory barriers to be disabled in either the read or write side
+ * of SRCU individually.
+ */
+#ifndef NO_SYNC_SMP_MB
+#define sync_smp_mb() smp_mb()
+#else
+#define sync_smp_mb() do {} while (0)
+#endif
+#ifndef NO_READ_SIDE_SMP_MB
+#define rs_smp_mb() smp_mb()
+#else
+#define rs_smp_mb() do {} while (0)
+#endif
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *) &(x))
+#define READ_ONCE(x) ACCESS_ONCE(x)
+#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
+#ifndef BUG_ON_H
+#define BUG_ON_H
+#include <assert.h>
+#define BUG() assert(0)
+#define BUG_ON(x) assert(!(x))
+/* Does it make sense to treat warnings as errors? */
+#define WARN() BUG()
+#define WARN_ON(x) (BUG_ON(x), false)
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
+#include <config.h>
+/* Include all source files. */
+#include "include_srcu.c"
+#include "preempt.c"
+#include "misc.c"
+/* Used by test.c files */
+#include <pthread.h>
+#include <stdlib.h>
+#include <linux/srcu.h>
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
+/* "Cheater" definitions based on restricted Kconfig choices. */
+#undef CONFIG_TINY_RCU
+#undef __CHECKER__
+#undef CONFIG_DEBUG_LOCK_ALLOC
+#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+#undef CONFIG_HOTPLUG_CPU
+#undef CONFIG_MODULES
+#undef CONFIG_NO_HZ_FULL_SYSIDLE
+#undef CONFIG_PREEMPT_COUNT
+#undef CONFIG_PREEMPT_RCU
+#undef CONFIG_PROVE_RCU
+#undef CONFIG_RCU_NOCB_CPU
+#undef CONFIG_RCU_NOCB_CPU_ALL
+#undef CONFIG_RCU_STALL_COMMON
+#undef CONFIG_RCU_TRACE
+#undef CONFIG_RCU_USER_QS
+#undef CONFIG_TASKS_RCU
+#define CONFIG_TREE_RCU
+#define CONFIG_GENERIC_ATOMIC64
+#if NR_CPUS > 1
+#define CONFIG_SMP
+#else
+#undef CONFIG_SMP
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
+#include <config.h>
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include "int_typedefs.h"
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+#ifdef USE_SIMPLE_SYNC_SRCU
+#define synchronize_srcu(sp) synchronize_srcu_original(sp)
+#endif
+#include <srcu.c>
+#ifdef USE_SIMPLE_SYNC_SRCU
+#undef synchronize_srcu
+#include "simple_sync_srcu.c"
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
+#ifndef INT_TYPEDEFS_H
+#define INT_TYPEDEFS_H
+#include <inttypes.h>
+typedef int8_t s8;
+typedef uint8_t u8;
+typedef int16_t s16;
+typedef uint16_t u16;
+typedef int32_t s32;
+typedef uint32_t u32;
+typedef int64_t s64;
+typedef uint64_t u64;
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef int64_t __s64;
+typedef uint64_t __u64;
+#define S8_C(x) INT8_C(x)
+#define U8_C(x) UINT8_C(x)
+#define S16_C(x) INT16_C(x)
+#define U16_C(x) UINT16_C(x)
+#define S32_C(x) INT32_C(x)
+#define U32_C(x) UINT32_C(x)
+#define S64_C(x) INT64_C(x)
+#define U64_C(x) UINT64_C(x)
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
+#ifndef LOCKS_H
+#define LOCKS_H
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include "assume.h"
+#include "bug_on.h"
+#include "preempt.h"
+int nondet_int(void);
+#define __acquire(x)
+#define __acquires(x)
+#define __release(x)
+#define __releases(x)
+/* Only use one lock mechanism. Select which one. */
+#ifdef PTHREAD_LOCK
+struct lock_impl {
+	pthread_mutex_t mutex;
+};
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+	BUG_ON(pthread_mutex_lock(&lock->mutex));
+}
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+	BUG_ON(pthread_mutex_unlock(&lock->mutex));
+}
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+	int err = pthread_mutex_trylock(&lock->mutex);
+	if (!err)
+		return true;
+	else if (err == EBUSY)
+		return false;
+	BUG();
+}
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+	pthread_mutex_init(&lock->mutex, NULL);
+}
+#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
+#else /* !defined(PTHREAD_LOCK) */
+/* Spinlock that assumes that it always gets the lock immediately. */
+struct lock_impl {
+	bool locked;
+};
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+#ifdef RUN
+	/* TODO: Should this be a test and set? */
+	return __sync_bool_compare_and_swap(&lock->locked, false, true);
+#else
+	__CPROVER_atomic_begin();
+	bool old_locked = lock->locked;
+	lock->locked = true;
+	__CPROVER_atomic_end();
+	/* Minimal barrier to prevent accesses leaking out of lock. */
+	__CPROVER_fence("RRfence", "RWfence");
+	return !old_locked;
+#endif
+}
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+	/*
+	 * CBMC doesn't support busy waiting, so just assume that the
+	 * lock is available.
+	 */
+	assume(lock_impl_trylock(lock));
+	/*
+	 * If the lock was already held by this thread then the assumption
+	 * is unsatisfiable (deadlock).
+	 */
+}
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+#ifdef RUN
+	BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
+#else
+	/* Minimal barrier to prevent accesses leaking out of lock. */
+	__CPROVER_fence("RWfence", "WWfence");
+	__CPROVER_atomic_begin();
+	bool old_locked = lock->locked;
+	lock->locked = false;
+	__CPROVER_atomic_end();
+	BUG_ON(!old_locked);
+#endif
+}
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+	lock->locked = false;
+}
+#define LOCK_IMPL_INITIALIZER {.locked = false}
+#endif /* !defined(PTHREAD_LOCK) */
+/*
+ * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
+ * locks of different types.
+ */
+typedef struct {
+	struct lock_impl internal_lock;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
+#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+static inline void spin_lock_init(spinlock_t *lock)
+{
+	lock_impl_init(&lock->internal_lock);
+}
+static inline void spin_lock(spinlock_t *lock)
+{
+	/*
+	 * Spin locks also need to be removed in order to eliminate all
+	 * memory barriers. They are only used by the write side anyway.
+	 */
+#ifndef NO_SYNC_SMP_MB
+	preempt_disable();
+	lock_impl_lock(&lock->internal_lock);
+#endif
+}
+static inline void spin_unlock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+	lock_impl_unlock(&lock->internal_lock);
+	preempt_enable();
+#endif
+}
+/* Don't bother with interrupts */
+#define spin_lock_irq(lock) spin_lock(lock)
+#define spin_unlock_irq(lock) spin_unlock(lock)
+#define spin_lock_irqsave(lock, flags) spin_lock(lock)
+#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
+/*
+ * This is supposed to return an int, but I think that a bool should work as
+ * well.
+ */
+static inline bool spin_trylock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+	preempt_disable();
+	return lock_impl_trylock(&lock->internal_lock);
+#else
+	return true;
+#endif
+}
+struct completion {
+	/* Hopefuly this won't overflow. */
+	unsigned int count;
+};
+#define COMPLETION_INITIALIZER(x) {.count = 0}
+#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
+#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
+static inline void init_completion(struct completion *c)
+{
+	c->count = 0;
+}
+static inline void wait_for_completion(struct completion *c)
+{
+	unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
+	assume(prev_count);
+}
+static inline void complete(struct completion *c)
+{
+	unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
+	BUG_ON(prev_count == UINT_MAX);
+}
+/* This function probably isn't very useful for CBMC. */
+static inline bool try_wait_for_completion(struct completion *c)
+{
+	BUG();
+}
+static inline bool completion_done(struct completion *c)
+{
+	return c->count;
+}
+/* TODO: Implement complete_all */
+static inline void complete_all(struct completion *c)
+{
+	BUG();
+}
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
+#include <config.h>
+#include "misc.h"
+#include "bug_on.h"
+struct rcu_head;
+void wakeme_after_rcu(struct rcu_head *head)
+{
+	BUG();
+}
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
+#ifndef MISC_H
+#define MISC_H
+#include "assume.h"
+#include "int_typedefs.h"
+#include "locks.h"
+#include <linux/types.h>
+/* Probably won't need to deal with bottom halves. */
+static inline void local_bh_disable(void) {}
+static inline void local_bh_enable(void) {}
+#define MODULE_ALIAS(X)
+#define module_param(...)
+#define EXPORT_SYMBOL_GPL(x)
+#define container_of(ptr, type, member) ({			\
+	const typeof(((type *)0)->member) *__mptr = (ptr);	\
+	(type *)((char *)__mptr - offsetof(type, member));	\
+})
+#ifndef USE_SIMPLE_SYNC_SRCU
+/* Abuse udelay to make sure that busy loops terminate. */
+#define udelay(x) assume(0)
+#else
+/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
+#define udelay(x) do { } while (0)
+#endif
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
+#define notrace
+/* Avoid including rcupdate.h */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+void wakeme_after_rcu(struct rcu_head *head);
+#define rcu_lock_acquire(a) do { } while (0)
+#define rcu_lock_release(a) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
+/* Let CBMC non-deterministically choose switch between normal and expedited. */
+bool rcu_gp_is_normal(void);
+bool rcu_gp_is_expedited(void);
+/* Do the same for old versions of rcu. */
+#define rcu_expedited (rcu_gp_is_expedited())
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
+#ifndef PERCPU_H
+#define PERCPU_H
+#include <stddef.h>
+#include "bug_on.h"
+#include "preempt.h"
+#define __percpu
+/* Maximum size of any percpu data. */
+#define PERCPU_OFFSET (4 * sizeof(long))
+/* Ignore alignment, as CBMC doesn't care about false sharing. */
+#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
+static inline void *__alloc_percpu(size_t size, size_t align)
+{
+	BUG();
+	return NULL;
+}
+static inline void free_percpu(void *ptr)
+{
+	BUG();
+}
+#define per_cpu_ptr(ptr, cpu) \
+	((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
+#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
+#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
+#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
+/* Make CBMC use atomics to work around bug. */
+#ifdef RUN
+#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
+#else
+/*
+ * Split the atomic into a read and a write so that it has the least
+ * possible ordering.
+ */
+#define THIS_CPU_ADD_HELPER(ptr, x) \
+	do { \
+		typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
+		typeof(ptr) this_cpu_add_helper_x = (x); \
+		typeof(*ptr) this_cpu_add_helper_temp; \
+		__CPROVER_atomic_begin(); \
+		this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
+		__CPROVER_atomic_end(); \
+		this_cpu_add_helper_temp += this_cpu_add_helper_x; \
+		__CPROVER_atomic_begin(); \
+		*(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
+		__CPROVER_atomic_end(); \
+	} while (0)
+#endif
+/*
+ * For some reason CBMC needs an atomic operation even though this is percpu
+ * data.
+ */
+#define __this_cpu_add(pcp, n) \
+	do { \
+		BUG_ON(preemptible()); \
+		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
+				    (typeof(pcp)) (n)); \
+	} while (0)
+#define this_cpu_add(pcp, n) \
+	do { \
+		int this_cpu_add_impl_cpu = get_cpu(); \
+		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
+				    (typeof(pcp)) (n)); \
+		put_cpu(); \
+	} while (0)
+/*
+ * This will cause a compiler warning because of the cast from char[][] to
+ * type*. This will cause a compile time error if type is too big.
+ */
+#define DEFINE_PER_CPU(type, name) \
+	char name[NR_CPUS][PERCPU_OFFSET]; \
+	typedef char percpu_too_big_##name \
+		[sizeof(type) > PERCPU_OFFSET ? -1 : 1]
+#define for_each_possible_cpu(cpu) \
+	for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
+#include <config.h>
+#include "preempt.h"
+#include "assume.h"
+#include "locks.h"
+/* Support NR_CPUS of at most 64 */
+#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
+#define CPU_PREEMPTION_LOCKS_INIT1 \
+	CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
+#define CPU_PREEMPTION_LOCKS_INIT2 \
+	CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
+#define CPU_PREEMPTION_LOCKS_INIT3 \
+	CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
+#define CPU_PREEMPTION_LOCKS_INIT4 \
+	CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
+#define CPU_PREEMPTION_LOCKS_INIT5 \
+	CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
+/*
+ * Simulate disabling preemption by locking a particular cpu. NR_CPUS
+ * should be the actual number of cpus, not just the maximum.
+ */
+struct lock_impl cpu_preemption_locks[NR_CPUS] = {
+	CPU_PREEMPTION_LOCKS_INIT0
+#if (NR_CPUS - 1) & 1
+	, CPU_PREEMPTION_LOCKS_INIT0
+#endif
+#if (NR_CPUS - 1) & 2
+	, CPU_PREEMPTION_LOCKS_INIT1
+#endif
+#if (NR_CPUS - 1) & 4
+	, CPU_PREEMPTION_LOCKS_INIT2
+#endif
+#if (NR_CPUS - 1) & 8
+	, CPU_PREEMPTION_LOCKS_INIT3
+#endif
+#if (NR_CPUS - 1) & 16
+	, CPU_PREEMPTION_LOCKS_INIT4
+#endif
+#if (NR_CPUS - 1) & 32
+	, CPU_PREEMPTION_LOCKS_INIT5
+#endif
+};
+#undef CPU_PREEMPTION_LOCKS_INIT0
+#undef CPU_PREEMPTION_LOCKS_INIT1
+#undef CPU_PREEMPTION_LOCKS_INIT2
+#undef CPU_PREEMPTION_LOCKS_INIT3
+#undef CPU_PREEMPTION_LOCKS_INIT4
+#undef CPU_PREEMPTION_LOCKS_INIT5
+__thread int thread_cpu_id;
+__thread int preempt_disable_count;
+void preempt_disable(void)
+{
+	BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
+	if (preempt_disable_count++)
+		return;
+	thread_cpu_id = nondet_int();
+	assume(thread_cpu_id >= 0);
+	assume(thread_cpu_id < NR_CPUS);
+	lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
+}
+void preempt_enable(void)
+{
+	BUG_ON(preempt_disable_count < 1);
+	if (--preempt_disable_count)
+		return;
+	lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
+}
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
+#ifndef PREEMPT_H
+#define PREEMPT_H
+#include <stdbool.h>
+#include "bug_on.h"
+/* This flag contains garbage if preempt_disable_count is 0. */
+extern __thread int thread_cpu_id;
+/* Support recursive preemption disabling. */
+extern __thread int preempt_disable_count;
+void preempt_disable(void);
+void preempt_enable(void);
+static inline void preempt_disable_notrace(void)
+{
+	preempt_disable();
+}
+static inline void preempt_enable_no_resched(void)
+{
+	preempt_enable();
+}
+static inline void preempt_enable_notrace(void)
+{
+	preempt_enable();
+}
+static inline int preempt_count(void)
+{
+	return preempt_disable_count;
+}
+static inline bool preemptible(void)
+{
+	return !preempt_count();
+}
+static inline int get_cpu(void)
+{
+	preempt_disable();
+	return thread_cpu_id;
+}
+static inline void put_cpu(void)
+{
+	preempt_enable();
+}
+static inline void might_sleep(void)
+{
+	BUG_ON(preempt_disable_count);
+}
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
+#include <config.h>
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include "int_typedefs.h"
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+#include <linux/srcu.h>
+/* Functions needed from modify_srcu.c */
+bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
+void srcu_flip(struct srcu_struct *sp);
+/* Simpler implementation of synchronize_srcu that ignores batching. */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	int idx;
+	/*
+	 * This code assumes that try_check_zero will succeed anyway,
+	 * so there is no point in multiple tries.
+	 */
+	const int trycount = 1;
+	might_sleep();
+	/* Ignore the lock, as multiple writers aren't working yet anyway. */
+	idx = 1 ^ (sp->completed & 1);
+	/* For comments see srcu_advance_batches. */
+	assume(try_check_zero(sp, idx, trycount));
+	srcu_flip(sp);
+	assume(try_check_zero(sp, idx^1, trycount));
+}
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
+#ifndef WORKQUEUES_H
+#define WORKQUEUES_H
+#include <stdbool.h>
+#include "barriers.h"
+#include "bug_on.h"
+#include "int_typedefs.h"
+#include <linux/types.h>
+/* Stub workqueue implementation. */
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
+struct work_struct {
+/*	atomic_long_t data; */
+	unsigned long data;
+	struct list_head entry;
+	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
+};
+struct timer_list {
+	struct hlist_node	entry;
+	unsigned long		expires;
+	void			(*function)(unsigned long);
+	unsigned long		data;
+	u32			flags;
+	int			slack;
+};
+struct delayed_work {
+	struct work_struct work;
+	struct timer_list timer;
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
+	int cpu;
+};
+static inline bool schedule_work(struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+static inline bool queue_work(struct workqueue_struct *wq,
+			      struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+				      struct delayed_work *dwork,
+				      unsigned long delay)
+{
+	BUG();
+	return true;
+}
+#define INIT_WORK(w, f) \
+	do { \
+		(w)->data = 0; \
+		(w)->func = (f); \
+	} while (0)
+#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
+#define __WORK_INITIALIZER(n, f) { \
+		.data = 0, \
+		.entry = { &(n).entry, &(n).entry }, \
+		.func = f \
+	}
+/* Don't bother initializing timer. */
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
+	.work = __WORK_INITIALIZER((n).work, (f)), \
+	}
+#define DECLARE_WORK(n, f) \
+	struct workqueue_struct n = __WORK_INITIALIZER
+#define DECLARE_DELAYED_WORK(n, f) \
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
+#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
+#endif
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+*.out
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
+CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
+all:
+	for i in ./*.pass; do \
+		echo $$i ; \
+		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
+	done
+	for i in ./*.fail; do \
+		echo $$i ; \
+		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
+	done
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
+test_cbmc_options="-DASSERT_END"
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
+test_cbmc_options="-DFORCE_FAILURE"
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
+test_cbmc_options="-DFORCE_FAILURE_2"
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
+test_cbmc_options="-DFORCE_FAILURE_3"
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
+#include <src/combined_source.c>
+int x;
+int y;
+int __unbuffered_tpr_x;
+int __unbuffered_tpr_y;
+DEFINE_SRCU(ss);
+void rcu_reader(void)
+{
+	int idx;
+#ifndef FORCE_FAILURE_3
+	idx = srcu_read_lock(&ss);
+#endif
+	might_sleep();
+	__unbuffered_tpr_y = READ_ONCE(y);
+#ifdef FORCE_FAILURE
+	srcu_read_unlock(&ss, idx);
+	idx = srcu_read_lock(&ss);
+#endif
+	WRITE_ONCE(x, 1);
+#ifndef FORCE_FAILURE_3
+	srcu_read_unlock(&ss, idx);
+#endif
+	might_sleep();
+}
+void *thread_update(void *arg)
+{
+	WRITE_ONCE(y, 1);
+#ifndef FORCE_FAILURE_2
+	synchronize_srcu(&ss);
+#endif
+	might_sleep();
+	__unbuffered_tpr_x = READ_ONCE(x);
+	return NULL;
+}
+void *thread_process_reader(void *arg)
+{
+	rcu_reader();
+	return NULL;
+}
+int main(int argc, char *argv[])
+{
+	pthread_t tu;
+	pthread_t tpr;
+	if (pthread_create(&tu, NULL, thread_update, NULL))
+		abort();
+	if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
+		abort();
+	if (pthread_join(tu, NULL))
+		abort();
+	if (pthread_join(tpr, NULL))
+		abort();
+	assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
+#ifdef ASSERT_END
+	assert(0);
+#endif
+	return 0;
+}
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
+#!/bin/sh
+# This script expects a mode (either --should-pass or --should-fail) followed by
+# an input file. The script uses the following environment variables. The test C
+# source file is expected to be named test.c in the directory containing the
+# input file.
+#
+# CBMC: The command to run CBMC. Default: cbmc
+# CBMC_FLAGS: Additional flags to pass to CBMC
+# NR_CPUS: Number of cpus to run tests with. Default specified by the test
+# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
+#                 kernel: Version included in the linux kernel source.
+#                 simple: Use try_check_zero directly.
+#
+# The input file is a script that is sourced by this file. It can define any of
+# the following variables to configure the test.
+#
+# test_cbmc_options: Extra options to pass to CBMC.
+# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
+#                The test is expected to pass if it is run with fewer. (Only
+#                useful for .fail files)
+# default_cpus: Quantity of CPUs to use for the test, if not specified on the
+#               command line. Default: Larger of 2 and MIN_CPUS_FAIL.
+set -e
+if test "$#" -ne 2; then
+	echo "Expected one option followed by an input file" 1>&2
+	exit 99
+fi
+if test "x$1" = "x--should-pass"; then
+	should_pass="yes"
+elif test "x$1" = "x--should-fail"; then
+	should_pass="no"
+else
+	echo "Unrecognized argument '$1'" 1>&2
+	# Exit code 99 indicates a hard error.
+	exit 99
+fi
+CBMC=${CBMC:-cbmc}
+SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
+case ${SYNC_SRCU_MODE} in
+kernel) sync_srcu_mode_flags="" ;;
+simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
+*)
+	echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
+	exit 99
+	;;
+esac
+min_cpus_fail=1
+c_file=`dirname "$2"`/test.c
+# Source the input file.
+. $2
+if test ${min_cpus_fail} -gt 2; then
+	default_default_cpus=${min_cpus_fail}
+else
+	default_default_cpus=2
+fi
+default_cpus=${default_cpus:-${default_default_cpus}}
+cpus=${NR_CPUS:-${default_cpus}}
+# Check if there are two few cpus to make the test fail.
+if test $cpus -lt ${min_cpus_fail:-0}; then
+	should_pass="yes"
+fi
+cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
+echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
+if ${CBMC} ${cbmc_opts} "${c_file}"; then
+	# Verification successful. Make sure that it was supposed to verify.
+	test "x${should_pass}" = xyes
+else
+	cbmc_exit_status=$?
+	# An exit status of 10 indicates a failed verification.
+	# (see cbmc_parse_optionst::do_bmc in the CBMC source code)
+	if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
+		:
+	else
+		echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
+		# Parse errors have exit status 6. Any other type of error
+		# should be considered a hard error.
+		if test ${cbmc_exit_status} -ne 6 && \
+		   test ${cbmc_exit_status} -ne 10; then
+			exit 99
+		else
+			exit 1
+		fi
+	fi
+fi