Commit 38e387ee authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] improve preemption on SMP

SMP locking latencies are one of the last architectural problems that cause
millisec-category scheduling delays.  CONFIG_PREEMPT tries to solve some of
the SMP issues but there are still lots of problems remaining: spinlocks
nested at multiple levels, spinning with irqs turned off, and non-nested
spinning with preemption turned off permanently.

The nesting problem goes like this: if a piece of kernel code (e.g.  the MM
or ext3's journalling code) does the following:

	spin_lock(&spinlock_1);
	...
	spin_lock(&spinlock_2);
	...

then even with CONFIG_PREEMPT enabled, current kernels may spin on
spinlock_2 indefinitely.  A number of critical sections break their long
paths by using cond_resched_lock(), but this does not break the path on
SMP, because need_resched() *of the other CPU* is not set so
cond_resched_lock() doesnt notice that a reschedule is due.

to solve this problem i've introduced a new spinlock field,
lock->break_lock, which signals towards the holding CPU that a
spinlock-break is requested by another CPU.  This field is only set if a
CPU is spinning in a spinlock function [at any locking depth], so the
default overhead is zero.  I've extended cond_resched_lock() to check for
this flag - in this case we can also save a reschedule.  I've added the
lock_need_resched(lock) and need_lockbreak(lock) methods to check for the
need to break out of a critical section.

Another latency problem was that the stock kernel, even with CONFIG_PREEMPT
enabled, didnt have any spin-nicely preemption logic for the following,
commonly used SMP locking primitives: read_lock(), spin_lock_irqsave(),
spin_lock_irq(), spin_lock_bh(), read_lock_irqsave(), read_lock_irq(),
read_lock_bh(), write_lock_irqsave(), write_lock_irq(), write_lock_bh().
Only spin_lock() and write_lock() [the two simplest cases] where covered.

In addition to the preemption latency problems, the _irq() variants in the
above list didnt do any IRQ-enabling while spinning - possibly resulting in
excessive irqs-off sections of code!

preempt-smp.patch fixes all these latency problems by spinning irq-nicely
(if possible) and by requesting lock-breaks if needed.  Two
architecture-level changes were necessary for this: the addition of the
break_lock field to spinlock_t and rwlock_t, and the addition of the
_raw_read_trylock() function.

Testing done by Mark H Johnson and myself indicate SMP latencies comparable
to the UP kernel - while they were basically indefinitely high without this
patch.

i successfully test-compiled and test-booted this patch ontop of BK-curr
using the following .config combinations: SMP && PREEMPT, !SMP && PREEMPT,
SMP && !PREEMPT and !SMP && !PREEMPT on x86, !SMP && !PREEMPT and SMP &&
PREEMPT on x64.  I also test-booted x86 with the generic_read_trylock
function to check that it works fine.  Essentially the same patch has been
in testing as part of the voluntary-preempt patches for some time already.

NOTE to architecture maintainers: generic_raw_read_trylock() is a crude
version that should be replaced with the proper arch-optimized version
ASAP.

From: Hugh Dickins <hugh@veritas.com>

The i386 and x86_64 _raw_read_trylocks in preempt-smp.patch are too
successful: atomic_read() returns a signed integer.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 18f27594
...@@ -23,6 +23,9 @@ typedef struct { ...@@ -23,6 +23,9 @@ typedef struct {
struct task_struct * task; struct task_struct * task;
const char *base_file; const char *base_file;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
...@@ -96,6 +99,9 @@ static inline int _raw_spin_trylock(spinlock_t *lock) ...@@ -96,6 +99,9 @@ static inline int _raw_spin_trylock(spinlock_t *lock)
typedef struct { typedef struct {
volatile unsigned int write_lock:1, read_counter:31; volatile unsigned int write_lock:1, read_counter:31;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} /*__attribute__((aligned(32)))*/ rwlock_t; } /*__attribute__((aligned(32)))*/ rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
......
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
*/ */
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
...@@ -70,6 +73,9 @@ static inline void _raw_spin_unlock(spinlock_t *lock) ...@@ -70,6 +73,9 @@ static inline void _raw_spin_unlock(spinlock_t *lock)
*/ */
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
...@@ -143,6 +149,8 @@ static inline void _raw_read_unlock(rwlock_t *rw) ...@@ -143,6 +149,8 @@ static inline void _raw_read_unlock(rwlock_t *rw)
: "cc", "memory"); : "cc", "memory");
} }
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
static inline int _raw_write_trylock(rwlock_t *rw) static inline int _raw_write_trylock(rwlock_t *rw)
{ {
unsigned long tmp; unsigned long tmp;
......
...@@ -19,6 +19,9 @@ typedef struct { ...@@ -19,6 +19,9 @@ typedef struct {
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
unsigned magic; unsigned magic;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define SPINLOCK_MAGIC 0xdead4ead #define SPINLOCK_MAGIC 0xdead4ead
...@@ -166,6 +169,9 @@ typedef struct { ...@@ -166,6 +169,9 @@ typedef struct {
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
unsigned magic; unsigned magic;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RWLOCK_MAGIC 0xdeaf1eed #define RWLOCK_MAGIC 0xdeaf1eed
...@@ -212,6 +218,16 @@ static inline void _raw_write_lock(rwlock_t *rw) ...@@ -212,6 +218,16 @@ static inline void _raw_write_lock(rwlock_t *rw)
#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") #define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") #define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
static inline int _raw_read_trylock(rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
atomic_dec(count);
if (atomic_read(count) >= 0)
return 1;
atomic_inc(count);
return 0;
}
static inline int _raw_write_trylock(rwlock_t *lock) static inline int _raw_write_trylock(rwlock_t *lock)
{ {
atomic_t *count = (atomic_t *)lock; atomic_t *count = (atomic_t *)lock;
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
...@@ -116,6 +119,9 @@ do { \ ...@@ -116,6 +119,9 @@ do { \
typedef struct { typedef struct {
volatile unsigned int read_counter : 31; volatile unsigned int read_counter : 31;
volatile unsigned int write_lock : 1; volatile unsigned int write_lock : 1;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
...@@ -190,6 +196,8 @@ do { \ ...@@ -190,6 +196,8 @@ do { \
#endif /* !ASM_SUPPORTED */ #endif /* !ASM_SUPPORTED */
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
#define _raw_write_unlock(x) \ #define _raw_write_unlock(x) \
({ \ ({ \
smp_mb__before_clear_bit(); /* need barrier before releasing lock... */ \ smp_mb__before_clear_bit(); /* need barrier before releasing lock... */ \
......
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
...@@ -127,6 +130,9 @@ static inline unsigned int _raw_spin_trylock(spinlock_t *lock) ...@@ -127,6 +130,9 @@ static inline unsigned int _raw_spin_trylock(spinlock_t *lock)
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
...@@ -246,6 +252,8 @@ static inline void _raw_write_unlock(rwlock_t *rw) ...@@ -246,6 +252,8 @@ static inline void _raw_write_unlock(rwlock_t *rw)
: "memory"); : "memory");
} }
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
static inline int _raw_write_trylock(rwlock_t *rw) static inline int _raw_write_trylock(rwlock_t *rw)
{ {
unsigned int tmp; unsigned int tmp;
......
...@@ -142,6 +142,9 @@ do { \ ...@@ -142,6 +142,9 @@ do { \
typedef struct { typedef struct {
spinlock_t lock; spinlock_t lock;
volatile int counter; volatile int counter;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { __SPIN_LOCK_UNLOCKED, 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { __SPIN_LOCK_UNLOCKED, 0 }
...@@ -150,6 +153,8 @@ typedef struct { ...@@ -150,6 +153,8 @@ typedef struct {
#define rwlock_is_locked(lp) ((lp)->counter != 0) #define rwlock_is_locked(lp) ((lp)->counter != 0)
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
/* read_lock, read_unlock are pretty straightforward. Of course it somehow /* read_lock, read_unlock are pretty straightforward. Of course it somehow
* sucks we end up saving/restoring flags twice for read_lock_irqsave aso. */ * sucks we end up saving/restoring flags twice for read_lock_irqsave aso. */
......
...@@ -176,6 +176,9 @@ typedef struct { ...@@ -176,6 +176,9 @@ typedef struct {
void *previous; void *previous;
struct task_struct * task; struct task_struct * task;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define __lock_aligned __attribute__((__section__(".data.lock_aligned"))) #define __lock_aligned __attribute__((__section__(".data.lock_aligned")))
......
...@@ -13,6 +13,9 @@ typedef struct { ...@@ -13,6 +13,9 @@ typedef struct {
volatile unsigned long owner_pc; volatile unsigned long owner_pc;
volatile unsigned long owner_cpu; volatile unsigned long owner_cpu;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -83,6 +86,9 @@ typedef struct { ...@@ -83,6 +86,9 @@ typedef struct {
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
volatile unsigned long owner_pc; volatile unsigned long owner_pc;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
...@@ -192,5 +198,7 @@ extern int _raw_write_trylock(rwlock_t *rw); ...@@ -192,5 +198,7 @@ extern int _raw_write_trylock(rwlock_t *rw);
#endif #endif
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
#endif /* __ASM_SPINLOCK_H */ #endif /* __ASM_SPINLOCK_H */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
...@@ -23,10 +23,16 @@ ...@@ -23,10 +23,16 @@
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
typedef struct { typedef struct {
volatile signed int lock; volatile signed int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -216,6 +222,8 @@ static void __inline__ _raw_read_unlock(rwlock_t *rw) ...@@ -216,6 +222,8 @@ static void __inline__ _raw_read_unlock(rwlock_t *rw)
: "cr0", "memory"); : "cr0", "memory");
} }
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
/* /*
* This returns the old value in the lock, * This returns the old value in the lock,
* so we got the write lock if the return value is 0. * so we got the write lock if the return value is 0.
......
...@@ -36,6 +36,9 @@ ...@@ -36,6 +36,9 @@
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} __attribute__ ((aligned (4))) spinlock_t; } __attribute__ ((aligned (4))) spinlock_t;
#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
...@@ -105,6 +108,9 @@ extern inline void _raw_spin_unlock(spinlock_t *lp) ...@@ -105,6 +108,9 @@ extern inline void _raw_spin_unlock(spinlock_t *lp)
typedef struct { typedef struct {
volatile unsigned long lock; volatile unsigned long lock;
volatile unsigned long owner_pc; volatile unsigned long owner_pc;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
...@@ -211,6 +217,8 @@ typedef struct { ...@@ -211,6 +217,8 @@ typedef struct {
"m" ((rw)->lock) : "2", "3", "cc", "memory" ) "m" ((rw)->lock) : "2", "3", "cc", "memory" )
#endif /* __s390x__ */ #endif /* __s390x__ */
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
extern inline int _raw_write_trylock(rwlock_t *rw) extern inline int _raw_write_trylock(rwlock_t *rw)
{ {
unsigned long result, reg; unsigned long result, reg;
......
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
*/ */
typedef struct { typedef struct {
volatile unsigned long lock; volatile unsigned long lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
...@@ -68,6 +71,9 @@ static inline void _raw_spin_unlock(spinlock_t *lock) ...@@ -68,6 +71,9 @@ static inline void _raw_spin_unlock(spinlock_t *lock)
typedef struct { typedef struct {
spinlock_t lock; spinlock_t lock;
atomic_t counter; atomic_t counter;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RW_LOCK_BIAS 0x01000000 #define RW_LOCK_BIAS 0x01000000
...@@ -105,6 +111,8 @@ static inline void _raw_write_unlock(rwlock_t *rw) ...@@ -105,6 +111,8 @@ static inline void _raw_write_unlock(rwlock_t *rw)
_raw_spin_unlock(&rw->lock); _raw_spin_unlock(&rw->lock);
} }
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
static inline int _raw_write_trylock(rwlock_t *rw) static inline int _raw_write_trylock(rwlock_t *rw)
{ {
if (atomic_sub_and_test(RW_LOCK_BIAS, &rw->counter)) if (atomic_sub_and_test(RW_LOCK_BIAS, &rw->counter))
......
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
struct _spinlock_debug { struct _spinlock_debug {
unsigned char lock; unsigned char lock;
unsigned long owner_pc; unsigned long owner_pc;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
}; };
typedef struct _spinlock_debug spinlock_t; typedef struct _spinlock_debug spinlock_t;
...@@ -36,6 +39,9 @@ struct _rwlock_debug { ...@@ -36,6 +39,9 @@ struct _rwlock_debug {
volatile unsigned int lock; volatile unsigned int lock;
unsigned long owner_pc; unsigned long owner_pc;
unsigned long reader_pc[NR_CPUS]; unsigned long reader_pc[NR_CPUS];
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
}; };
typedef struct _rwlock_debug rwlock_t; typedef struct _rwlock_debug rwlock_t;
...@@ -79,8 +85,14 @@ do { unsigned long flags; \ ...@@ -79,8 +85,14 @@ do { unsigned long flags; \
#else /* !CONFIG_DEBUG_SPINLOCK */ #else /* !CONFIG_DEBUG_SPINLOCK */
typedef unsigned char spinlock_t; typedef struct {
#define SPIN_LOCK_UNLOCKED 0 unsigned char lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t;
#define SPIN_LOCK_UNLOCKED { 0, }
#define spin_lock_init(lock) (*((unsigned char *)(lock)) = 0) #define spin_lock_init(lock) (*((unsigned char *)(lock)) = 0)
#define spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) #define spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
...@@ -137,7 +149,12 @@ extern __inline__ void _raw_spin_unlock(spinlock_t *lock) ...@@ -137,7 +149,12 @@ extern __inline__ void _raw_spin_unlock(spinlock_t *lock)
* XXX This might create some problems with my dual spinlock * XXX This might create some problems with my dual spinlock
* XXX scheme, deadlocks etc. -DaveM * XXX scheme, deadlocks etc. -DaveM
*/ */
typedef struct { volatile unsigned int lock; } rwlock_t; typedef struct {
volatile unsigned int lock;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t;
#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
......
...@@ -304,6 +304,8 @@ do { unsigned long flags; \ ...@@ -304,6 +304,8 @@ do { unsigned long flags; \
#endif /* CONFIG_DEBUG_SPINLOCK */ #endif /* CONFIG_DEBUG_SPINLOCK */
#define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
#endif /* !(__ASSEMBLY__) */ #endif /* !(__ASSEMBLY__) */
#endif /* !(__SPARC64_SPINLOCK_H) */ #endif /* !(__SPARC64_SPINLOCK_H) */
...@@ -18,6 +18,9 @@ typedef struct { ...@@ -18,6 +18,9 @@ typedef struct {
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
unsigned magic; unsigned magic;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} spinlock_t; } spinlock_t;
#define SPINLOCK_MAGIC 0xdead4ead #define SPINLOCK_MAGIC 0xdead4ead
...@@ -139,6 +142,9 @@ typedef struct { ...@@ -139,6 +142,9 @@ typedef struct {
#ifdef CONFIG_DEBUG_SPINLOCK #ifdef CONFIG_DEBUG_SPINLOCK
unsigned magic; unsigned magic;
#endif #endif
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
} rwlock_t; } rwlock_t;
#define RWLOCK_MAGIC 0xdeaf1eed #define RWLOCK_MAGIC 0xdeaf1eed
...@@ -185,6 +191,16 @@ static inline void _raw_write_lock(rwlock_t *rw) ...@@ -185,6 +191,16 @@ static inline void _raw_write_lock(rwlock_t *rw)
#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") #define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") #define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
static inline int _raw_read_trylock(rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
atomic_dec(count);
if (atomic_read(count) >= 0)
return 1;
atomic_inc(count);
return 0;
}
static inline int _raw_write_trylock(rwlock_t *lock) static inline int _raw_write_trylock(rwlock_t *lock)
{ {
atomic_t *count = (atomic_t *)lock; atomic_t *count = (atomic_t *)lock;
......
...@@ -1072,23 +1072,7 @@ static inline void cond_resched(void) ...@@ -1072,23 +1072,7 @@ static inline void cond_resched(void)
__cond_resched(); __cond_resched();
} }
/* extern int cond_resched_lock(spinlock_t * lock);
* cond_resched_lock() - if a reschedule is pending, drop the given lock,
* call schedule, and on return reacquire the lock.
*
* This works OK both with and without CONFIG_PREEMPT. We do strange low-level
* operations here to prevent schedule() from being called twice (once via
* spin_unlock(), once by hand).
*/
static inline void cond_resched_lock(spinlock_t * lock)
{
if (need_resched()) {
_raw_spin_unlock(lock);
preempt_enable_no_resched();
__cond_resched();
spin_lock(lock);
}
}
/* Reevaluate whether the task has signals pending delivery. /* Reevaluate whether the task has signals pending delivery.
This is required every time the blocked sigset_t changes. This is required every time the blocked sigset_t changes.
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <asm/spinlock.h> #include <asm/spinlock.h>
int __lockfunc _spin_trylock(spinlock_t *lock); int __lockfunc _spin_trylock(spinlock_t *lock);
int __lockfunc _read_trylock(rwlock_t *lock);
int __lockfunc _write_trylock(rwlock_t *lock); int __lockfunc _write_trylock(rwlock_t *lock);
void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t); void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t);
...@@ -73,6 +74,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock) __releases(rwlock_t); ...@@ -73,6 +74,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock) __releases(rwlock_t);
void __lockfunc _write_unlock_bh(rwlock_t *lock) __releases(rwlock_t); void __lockfunc _write_unlock_bh(rwlock_t *lock) __releases(rwlock_t);
int __lockfunc _spin_trylock_bh(spinlock_t *lock); int __lockfunc _spin_trylock_bh(spinlock_t *lock);
int __lockfunc generic_raw_read_trylock(rwlock_t *lock);
int in_lock_functions(unsigned long addr); int in_lock_functions(unsigned long addr);
#else #else
...@@ -219,11 +221,15 @@ typedef struct { ...@@ -219,11 +221,15 @@ typedef struct {
#define _raw_read_unlock(lock) do { (void)(lock); } while(0) #define _raw_read_unlock(lock) do { (void)(lock); } while(0)
#define _raw_write_lock(lock) do { (void)(lock); } while(0) #define _raw_write_lock(lock) do { (void)(lock); } while(0)
#define _raw_write_unlock(lock) do { (void)(lock); } while(0) #define _raw_write_unlock(lock) do { (void)(lock); } while(0)
#define _raw_read_trylock(lock) ({ (void)(lock); (1); })
#define _raw_write_trylock(lock) ({ (void)(lock); (1); }) #define _raw_write_trylock(lock) ({ (void)(lock); (1); })
#define _spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ #define _spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \
1 : ({preempt_enable(); 0;});}) 1 : ({preempt_enable(); 0;});})
#define _read_trylock(lock) ({preempt_disable();_raw_read_trylock(lock) ? \
1 : ({preempt_enable(); 0;});})
#define _write_trylock(lock) ({preempt_disable(); _raw_write_trylock(lock) ? \ #define _write_trylock(lock) ({preempt_disable(); _raw_write_trylock(lock) ? \
1 : ({preempt_enable(); 0;});}) 1 : ({preempt_enable(); 0;});})
...@@ -425,16 +431,12 @@ do { \ ...@@ -425,16 +431,12 @@ do { \
* methods are defined as nops in the case they are not required. * methods are defined as nops in the case they are not required.
*/ */
#define spin_trylock(lock) __cond_lock(_spin_trylock(lock)) #define spin_trylock(lock) __cond_lock(_spin_trylock(lock))
#define read_trylock(lock) __cond_lock(_read_trylock(lock))
#define write_trylock(lock) __cond_lock(_write_trylock(lock)) #define write_trylock(lock) __cond_lock(_write_trylock(lock))
/* Where's read_trylock? */
#define spin_lock(lock) _spin_lock(lock) #define spin_lock(lock) _spin_lock(lock)
#define write_lock(lock) _write_lock(lock) #define write_lock(lock) _write_lock(lock)
#define read_lock(lock) _read_lock(lock) #define read_lock(lock) _read_lock(lock)
#define spin_unlock(lock) _spin_unlock(lock)
#define write_unlock(lock) _write_unlock(lock)
#define read_unlock(lock) _read_unlock(lock)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock) #define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock)
...@@ -454,6 +456,11 @@ do { \ ...@@ -454,6 +456,11 @@ do { \
#define write_lock_irq(lock) _write_lock_irq(lock) #define write_lock_irq(lock) _write_lock_irq(lock)
#define write_lock_bh(lock) _write_lock_bh(lock) #define write_lock_bh(lock) _write_lock_bh(lock)
#define spin_unlock(lock) _spin_unlock(lock)
#define write_unlock(lock) _write_unlock(lock)
#define read_unlock(lock) _read_unlock(lock)
#define spin_unlock_irqrestore(lock, flags) _spin_unlock_irqrestore(lock, flags) #define spin_unlock_irqrestore(lock, flags) _spin_unlock_irqrestore(lock, flags)
#define spin_unlock_irq(lock) _spin_unlock_irq(lock) #define spin_unlock_irq(lock) _spin_unlock_irq(lock)
#define spin_unlock_bh(lock) _spin_unlock_bh(lock) #define spin_unlock_bh(lock) _spin_unlock_bh(lock)
...@@ -490,6 +497,7 @@ extern void _metered_read_lock (rwlock_t *lock); ...@@ -490,6 +497,7 @@ extern void _metered_read_lock (rwlock_t *lock);
extern void _metered_read_unlock (rwlock_t *lock); extern void _metered_read_unlock (rwlock_t *lock);
extern void _metered_write_lock (rwlock_t *lock); extern void _metered_write_lock (rwlock_t *lock);
extern void _metered_write_unlock (rwlock_t *lock); extern void _metered_write_unlock (rwlock_t *lock);
extern int _metered_read_trylock (rwlock_t *lock);
extern int _metered_write_trylock(rwlock_t *lock); extern int _metered_write_trylock(rwlock_t *lock);
#endif #endif
...@@ -519,8 +527,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr) ...@@ -519,8 +527,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
preempt_disable(); preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
while (test_and_set_bit(bitnum, addr)) { while (test_and_set_bit(bitnum, addr)) {
while (test_bit(bitnum, addr)) while (test_bit(bitnum, addr)) {
preempt_enable();
cpu_relax(); cpu_relax();
preempt_disable();
}
} }
#endif #endif
__acquire(bitlock); __acquire(bitlock);
......
...@@ -3441,6 +3441,37 @@ void __sched __cond_resched(void) ...@@ -3441,6 +3441,37 @@ void __sched __cond_resched(void)
EXPORT_SYMBOL(__cond_resched); EXPORT_SYMBOL(__cond_resched);
/*
* cond_resched_lock() - if a reschedule is pending, drop the given lock,
* call schedule, and on return reacquire the lock.
*
* This works OK both with and without CONFIG_PREEMPT. We do strange low-level
* operations here to prevent schedule() from being called twice (once via
* spin_unlock(), once by hand).
*/
int cond_resched_lock(spinlock_t * lock)
{
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
if (lock->break_lock) {
lock->break_lock = 0;
spin_unlock(lock);
cpu_relax();
spin_lock(lock);
}
#endif
if (need_resched()) {
_raw_spin_unlock(lock);
preempt_enable_no_resched();
set_current_state(TASK_RUNNING);
schedule();
spin_lock(lock);
return 1;
}
return 0;
}
EXPORT_SYMBOL(cond_resched_lock);
/** /**
* yield - yield the current processor to other threads. * yield - yield the current processor to other threads.
* *
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
* Copyright (2004) Linus Torvalds * Copyright (2004) Linus Torvalds
* *
* Author: Zwane Mwaikambo <zwane@fsmlabs.com> * Author: Zwane Mwaikambo <zwane@fsmlabs.com>
*
* Copyright (2004) Ingo Molnar
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -11,6 +13,17 @@ ...@@ -11,6 +13,17 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/module.h> #include <linux/module.h>
/*
* Generic declaration of the raw read_trylock() function,
* architectures are supposed to optimize this:
*/
int __lockfunc generic_raw_read_trylock(rwlock_t *lock)
{
_raw_read_lock(lock);
return 1;
}
EXPORT_SYMBOL(generic_raw_read_trylock);
int __lockfunc _spin_trylock(spinlock_t *lock) int __lockfunc _spin_trylock(spinlock_t *lock)
{ {
preempt_disable(); preempt_disable();
...@@ -22,86 +35,29 @@ int __lockfunc _spin_trylock(spinlock_t *lock) ...@@ -22,86 +35,29 @@ int __lockfunc _spin_trylock(spinlock_t *lock)
} }
EXPORT_SYMBOL(_spin_trylock); EXPORT_SYMBOL(_spin_trylock);
int __lockfunc _write_trylock(rwlock_t *lock) int __lockfunc _read_trylock(rwlock_t *lock)
{ {
preempt_disable(); preempt_disable();
if (_raw_write_trylock(lock)) if (_raw_read_trylock(lock))
return 1; return 1;
preempt_enable(); preempt_enable();
return 0; return 0;
} }
EXPORT_SYMBOL(_write_trylock); EXPORT_SYMBOL(_read_trylock);
#ifdef CONFIG_PREEMPT
/*
* This could be a long-held lock. If another CPU holds it for a long time,
* and that CPU is not asked to reschedule then *this* CPU will spin on the
* lock for a long time, even if *this* CPU is asked to reschedule.
*
* So what we do here, in the slow (contended) path is to spin on the lock by
* hand while permitting preemption.
*
* Called inside preempt_disable().
*/
static inline void __preempt_spin_lock(spinlock_t *lock)
{
if (preempt_count() > 1) {
_raw_spin_lock(lock);
return;
}
do {
preempt_enable();
while (spin_is_locked(lock))
cpu_relax();
preempt_disable();
} while (!_raw_spin_trylock(lock));
}
void __lockfunc _spin_lock(spinlock_t *lock) int __lockfunc _write_trylock(rwlock_t *lock)
{ {
preempt_disable(); preempt_disable();
if (unlikely(!_raw_spin_trylock(lock))) if (_raw_write_trylock(lock))
__preempt_spin_lock(lock); return 1;
}
static inline void __preempt_write_lock(rwlock_t *lock)
{
if (preempt_count() > 1) {
_raw_write_lock(lock);
return;
}
do {
preempt_enable();
while (rwlock_is_locked(lock))
cpu_relax();
preempt_disable();
} while (!_raw_write_trylock(lock));
}
void __lockfunc _write_lock(rwlock_t *lock) preempt_enable();
{ return 0;
preempt_disable();
if (unlikely(!_raw_write_trylock(lock)))
__preempt_write_lock(lock);
}
#else
void __lockfunc _spin_lock(spinlock_t *lock)
{
preempt_disable();
_raw_spin_lock(lock);
} }
EXPORT_SYMBOL(_write_trylock);
void __lockfunc _write_lock(rwlock_t *lock) #ifndef CONFIG_PREEMPT
{
preempt_disable();
_raw_write_lock(lock);
}
#endif
EXPORT_SYMBOL(_spin_lock);
EXPORT_SYMBOL(_write_lock);
void __lockfunc _read_lock(rwlock_t *lock) void __lockfunc _read_lock(rwlock_t *lock)
{ {
...@@ -110,27 +66,6 @@ void __lockfunc _read_lock(rwlock_t *lock) ...@@ -110,27 +66,6 @@ void __lockfunc _read_lock(rwlock_t *lock)
} }
EXPORT_SYMBOL(_read_lock); EXPORT_SYMBOL(_read_lock);
void __lockfunc _spin_unlock(spinlock_t *lock)
{
_raw_spin_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(_spin_unlock);
void __lockfunc _write_unlock(rwlock_t *lock)
{
_raw_write_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(_write_unlock);
void __lockfunc _read_unlock(rwlock_t *lock)
{
_raw_read_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(_read_unlock);
unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
{ {
unsigned long flags; unsigned long flags;
...@@ -212,6 +147,130 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) ...@@ -212,6 +147,130 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
} }
EXPORT_SYMBOL(_write_lock_bh); EXPORT_SYMBOL(_write_lock_bh);
void __lockfunc _spin_lock(spinlock_t *lock)
{
preempt_disable();
_raw_spin_lock(lock);
}
EXPORT_SYMBOL(_spin_lock);
void __lockfunc _write_lock(rwlock_t *lock)
{
preempt_disable();
_raw_write_lock(lock);
}
EXPORT_SYMBOL(_write_lock);
#else /* CONFIG_PREEMPT: */
/*
* This could be a long-held lock. We both prepare to spin for a long
* time (making _this_ CPU preemptable if possible), and we also signal
* towards that other CPU that it should break the lock ASAP.
*
* (We do this in a function because inlining it would be excessive.)
*/
#define BUILD_LOCK_OPS(op, locktype) \
void __lockfunc _##op##_lock(locktype *lock) \
{ \
preempt_disable(); \
for (;;) { \
if (likely(_raw_##op##_trylock(lock))) \
break; \
preempt_enable(); \
if (!(lock)->break_lock) \
(lock)->break_lock = 1; \
cpu_relax(); \
preempt_disable(); \
} \
} \
\
EXPORT_SYMBOL(_##op##_lock); \
\
unsigned long __lockfunc _##op##_lock_irqsave(locktype *lock) \
{ \
unsigned long flags; \
\
preempt_disable(); \
for (;;) { \
local_irq_save(flags); \
if (likely(_raw_##op##_trylock(lock))) \
break; \
local_irq_restore(flags); \
\
preempt_enable(); \
if (!(lock)->break_lock) \
(lock)->break_lock = 1; \
cpu_relax(); \
preempt_disable(); \
} \
return flags; \
} \
\
EXPORT_SYMBOL(_##op##_lock_irqsave); \
\
void __lockfunc _##op##_lock_irq(locktype *lock) \
{ \
_##op##_lock_irqsave(lock); \
} \
\
EXPORT_SYMBOL(_##op##_lock_irq); \
\
void __lockfunc _##op##_lock_bh(locktype *lock) \
{ \
unsigned long flags; \
\
/* */ \
/* Careful: we must exclude softirqs too, hence the */ \
/* irq-disabling. We use the generic preemption-aware */ \
/* function: */ \
/**/ \
flags = _##op##_lock_irqsave(lock); \
local_bh_disable(); \
local_irq_restore(flags); \
} \
\
EXPORT_SYMBOL(_##op##_lock_bh)
/*
* Build preemption-friendly versions of the following
* lock-spinning functions:
*
* _[spin|read|write]_lock()
* _[spin|read|write]_lock_irq()
* _[spin|read|write]_lock_irqsave()
* _[spin|read|write]_lock_bh()
*/
BUILD_LOCK_OPS(spin, spinlock_t);
BUILD_LOCK_OPS(read, rwlock_t);
BUILD_LOCK_OPS(write, rwlock_t);
#endif /* CONFIG_PREEMPT */
void __lockfunc _spin_unlock(spinlock_t *lock)
{
_raw_spin_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(_spin_unlock);
void __lockfunc _write_unlock(rwlock_t *lock)
{
_raw_write_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(_write_unlock);
void __lockfunc _read_unlock(rwlock_t *lock)
{
_raw_read_unlock(lock);
preempt_enable();
}
EXPORT_SYMBOL(_read_unlock);
void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{ {
_raw_spin_unlock(lock); _raw_spin_unlock(lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment