Commit 2faf4338 authored by Andrew Morton's avatar Andrew Morton Committed by Richard Henderson

[PATCH] Fix an SMP+preempt latency problem

Here is spin_lock():

#define spin_lock(lock) \
do { \
        preempt_disable(); \
        _raw_spin_lock(lock); \
} while(0)


Here is the scenario:

CPU0:
	spin_lock(some_lock);
	do_very_long_thing();	/* This has cond_resched()s in it */

CPU1:
	spin_lock(some_lock);

Now suppose that the scheduler tries to schedule a task on CPU1.  Nothing
happens, because CPU1 is spinning on the lock with preemption disabled.  CPU0
will happliy hold the lock for a long time because nobody has set
need_resched() against CPU0.

This problem can cause scheduling latencies of many tens of milliseconds on
SMP on kernels which handle UP quite happily.


This patch fixes the problem by changing the spin_lock() and write_lock()
contended slowpath to spin on the lock by hand, while polling for preemption
requests.

I would have done read_lock() too, but we don't seem to have read_trylock()
primitives.

The patch also shrinks the kernel by 30k due to not having separate
out-of-line spinning code for each spin_lock() callsite.
parent b4adddd6
...@@ -85,31 +85,37 @@ ...@@ -85,31 +85,37 @@
* regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
* methods are defined as nops in the case they are not required. * methods are defined as nops in the case they are not required.
*/ */
#define spin_lock(lock) \
do { \
preempt_disable(); \
_raw_spin_lock(lock); \
} while(0)
#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ #define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \
1 : ({preempt_enable(); 0;});}) 1 : ({preempt_enable(); 0;});})
#define spin_unlock(lock) \ #define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \
1 : ({preempt_enable(); 0;});})
/* Where's read_trylock? */
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
void __preempt_spin_lock(spinlock_t *lock);
void __preempt_write_lock(rwlock_t *lock);
#define spin_lock(lock) \
do { \ do { \
_raw_spin_unlock(lock); \ preempt_disable(); \
preempt_enable(); \ if (unlikely(!_raw_spin_trylock(lock))) \
__preempt_spin_lock(lock); \
} while (0) } while (0)
#define read_lock(lock) \ #define write_lock(lock) \
do { \ do { \
preempt_disable(); \ preempt_disable(); \
_raw_read_lock(lock); \ if (unlikely(!_raw_write_trylock(lock))) \
} while(0) __preempt_write_lock(lock); \
} while (0)
#define read_unlock(lock) \ #else
#define spin_lock(lock) \
do { \ do { \
_raw_read_unlock(lock); \ preempt_disable(); \
preempt_enable(); \ _raw_spin_lock(lock); \
} while(0) } while(0)
#define write_lock(lock) \ #define write_lock(lock) \
...@@ -117,6 +123,19 @@ do { \ ...@@ -117,6 +123,19 @@ do { \
preempt_disable(); \ preempt_disable(); \
_raw_write_lock(lock); \ _raw_write_lock(lock); \
} while(0) } while(0)
#endif
#define read_lock(lock) \
do { \
preempt_disable(); \
_raw_read_lock(lock); \
} while(0)
#define spin_unlock(lock) \
do { \
_raw_spin_unlock(lock); \
preempt_enable(); \
} while (0)
#define write_unlock(lock) \ #define write_unlock(lock) \
do { \ do { \
...@@ -124,8 +143,11 @@ do { \ ...@@ -124,8 +143,11 @@ do { \
preempt_enable(); \ preempt_enable(); \
} while(0) } while(0)
#define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \ #define read_unlock(lock) \
1 : ({preempt_enable(); 0;});}) do { \
_raw_read_unlock(lock); \
preempt_enable(); \
} while(0)
#define spin_lock_irqsave(lock, flags) \ #define spin_lock_irqsave(lock, flags) \
do { \ do { \
......
...@@ -491,6 +491,10 @@ EXPORT_SYMBOL(do_settimeofday); ...@@ -491,6 +491,10 @@ EXPORT_SYMBOL(do_settimeofday);
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
EXPORT_SYMBOL(__might_sleep); EXPORT_SYMBOL(__might_sleep);
#endif #endif
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
EXPORT_SYMBOL(__preempt_spin_lock);
EXPORT_SYMBOL(__preempt_write_lock);
#endif
#if !defined(__ia64__) #if !defined(__ia64__)
EXPORT_SYMBOL(loops_per_jiffy); EXPORT_SYMBOL(loops_per_jiffy);
#endif #endif
......
...@@ -2278,3 +2278,50 @@ void __might_sleep(char *file, int line) ...@@ -2278,3 +2278,50 @@ void __might_sleep(char *file, int line)
#endif #endif
} }
#endif #endif
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
/*
* This could be a long-held lock. If another CPU holds it for a long time,
* and that CPU is not asked to reschedule then *this* CPU will spin on the
* lock for a long time, even if *this* CPU is asked to reschedule.
*
* So what we do here, in the slow (contended) path is to spin on the lock by
* hand while permitting preemption.
*
* Called inside preempt_disable().
*/
void __preempt_spin_lock(spinlock_t *lock)
{
if (preempt_count() > 1) {
_raw_spin_lock(lock);
return;
}
while (!_raw_spin_trylock(lock)) {
if (need_resched()) {
preempt_enable_no_resched();
__cond_resched();
preempt_disable();
}
cpu_relax();
}
}
void __preempt_write_lock(rwlock_t *lock)
{
if (preempt_count() > 1) {
_raw_write_lock(lock);
return;
}
while (!_raw_write_trylock(lock)) {
if (need_resched()) {
preempt_enable_no_resched();
__cond_resched();
preempt_disable();
}
cpu_relax();
}
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment