Commit ec332cd3 authored by Robert Love's avatar Robert Love Committed by Linus Torvalds

[PATCH] Re: [PATCH] Preemptible Kernel for 2.5

On Sat, 2002-02-09 at 01:43, Linus Torvalds wrote:

> That will clean up all your issues with header file ordering.

You are right, it did.  I removed all the sched.h dependencies and this
reduced the size of the patch greatly.  I now use current_thread_info()
and none of the header or include hackery from before.  I've tested this
with and without preemption enabled with success.

I appreciate your help with this.

Again, this is a minimal i386-only patch.  I have other arches,
documentation, etc.  Patch against 2.5.4-pre5.  Enjoy,

	Robert Love
parent d7b65475
...@@ -25,6 +25,16 @@ CONFIG_SMP ...@@ -25,6 +25,16 @@ CONFIG_SMP
If you don't know what to do here, say N. If you don't know what to do here, say N.
CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to
real-time or interactive events by allowing a low priority process to
be preempted even if it is in kernel mode executing a system call.
This allows applications to run more reliably even when the system is
under load.
Say Y here if you are building a kernel for a desktop, embedded
or real-time system. Say N if you are unsure.
CONFIG_X86 CONFIG_X86
This is Linux's home port. Linux was originally native to the Intel This is Linux's home port. Linux was originally native to the Intel
386, and runs on all the later x86 processors including the Intel 386, and runs on all the later x86 processors including the Intel
......
...@@ -167,6 +167,7 @@ fi ...@@ -167,6 +167,7 @@ fi
bool 'Math emulation' CONFIG_MATH_EMULATION bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP bool 'Symmetric multi-processing support' CONFIG_SMP
bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" != "y" ]; then if [ "$CONFIG_SMP" != "y" ]; then
bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
...@@ -180,9 +181,12 @@ else ...@@ -180,9 +181,12 @@ else
bool 'Multiquad NUMA system' CONFIG_MULTIQUAD bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
fi fi
if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
define_bool CONFIG_HAVE_DEC_LOCK y if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then
define_bool CONFIG_HAVE_DEC_LOCK y
fi
fi fi
endmenu endmenu
mainmenu_option next_comment mainmenu_option next_comment
......
...@@ -69,6 +69,37 @@ IF_MASK = 0x00000200 ...@@ -69,6 +69,37 @@ IF_MASK = 0x00000200
NT_MASK = 0x00004000 NT_MASK = 0x00004000
VM_MASK = 0x00020000 VM_MASK = 0x00020000
/* These are offsets into the irq_stat structure
* There is one per cpu and it is aligned to 32
* byte boundry (we put that here as a shift count)
*/
irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT
irq_stat_local_irq_count = 4
irq_stat_local_bh_count = 8
#ifdef CONFIG_SMP
#define GET_CPU_INDX movl TI_CPU(%ebx),%eax; \
shll $irq_array_shift,%eax
#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx); \
GET_CPU_INDX
#define CPU_INDX (,%eax)
#else
#define GET_CPU_INDX
#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx)
#define CPU_INDX
#endif
#ifdef CONFIG_PREEMPT
#define preempt_stop cli
#define init_ret_intr \
cli; \
decl TI_PRE_COUNT(%ebx);
#else
#define preempt_stop
#define init_ret_intr
#define resume_kernel restore_all
#endif
#define SAVE_ALL \ #define SAVE_ALL \
cld; \ cld; \
pushl %es; \ pushl %es; \
...@@ -176,11 +207,12 @@ ENTRY(ret_from_fork) ...@@ -176,11 +207,12 @@ ENTRY(ret_from_fork)
ALIGN ALIGN
ENTRY(ret_from_intr) ENTRY(ret_from_intr)
GET_THREAD_INFO(%ebx) GET_THREAD_INFO(%ebx)
init_ret_intr
ret_from_exception: ret_from_exception:
movl EFLAGS(%esp),%eax # mix EFLAGS and CS movl EFLAGS(%esp),%eax # mix EFLAGS and CS
movb CS(%esp),%al movb CS(%esp),%al
testl $(VM_MASK | 3),%eax testl $(VM_MASK | 3),%eax
jz restore_all # returning to kernel-space or vm86-space jz resume_kernel # returning to kernel or vm86-space
ENTRY(resume_userspace) ENTRY(resume_userspace)
cli # make sure we don't miss an interrupt setting need_resched cli # make sure we don't miss an interrupt setting need_resched
# or sigpending between sampling and the iret # or sigpending between sampling and the iret
...@@ -189,6 +221,22 @@ ENTRY(resume_userspace) ...@@ -189,6 +221,22 @@ ENTRY(resume_userspace)
jne work_pending jne work_pending
jmp restore_all jmp restore_all
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
cmpl $0,TI_PRE_COUNT(%ebx)
jnz restore_all
movl TI_FLAGS(%ebx),%ecx
testb $_TIF_NEED_RESCHED,%cl
jz restore_all
movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
jnz restore_all
incl TI_PRE_COUNT(%ebx)
sti
call SYMBOL_NAME(preempt_schedule)
jmp ret_from_intr
#endif
# system call handler stub # system call handler stub
ALIGN ALIGN
ENTRY(system_call) ENTRY(system_call)
...@@ -302,6 +350,7 @@ error_code: ...@@ -302,6 +350,7 @@ error_code:
GET_THREAD_INFO(%ebx) GET_THREAD_INFO(%ebx)
call *%edi call *%edi
addl $8,%esp addl $8,%esp
preempt_stop
jmp ret_from_exception jmp ret_from_exception
ENTRY(coprocessor_error) ENTRY(coprocessor_error)
...@@ -321,12 +370,14 @@ ENTRY(device_not_available) ...@@ -321,12 +370,14 @@ ENTRY(device_not_available)
movl %cr0,%eax movl %cr0,%eax
testl $0x4,%eax # EM (math emulation bit) testl $0x4,%eax # EM (math emulation bit)
jne device_not_available_emulate jne device_not_available_emulate
preempt_stop
call SYMBOL_NAME(math_state_restore) call SYMBOL_NAME(math_state_restore)
jmp ret_from_exception jmp ret_from_exception
device_not_available_emulate: device_not_available_emulate:
pushl $0 # temporary storage for ORIG_EIP pushl $0 # temporary storage for ORIG_EIP
call SYMBOL_NAME(math_emulate) call SYMBOL_NAME(math_emulate)
addl $4,%esp addl $4,%esp
preempt_stop
jmp ret_from_exception jmp ret_from_exception
ENTRY(debug) ENTRY(debug)
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/spinlock.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/math_emu.h> #include <asm/math_emu.h>
...@@ -63,6 +64,7 @@ void save_init_fpu( struct task_struct *tsk ) ...@@ -63,6 +64,7 @@ void save_init_fpu( struct task_struct *tsk )
void kernel_fpu_begin(void) void kernel_fpu_begin(void)
{ {
preempt_disable();
if (test_thread_flag(TIF_USEDFPU)) { if (test_thread_flag(TIF_USEDFPU)) {
__save_init_fpu(current); __save_init_fpu(current);
return; return;
......
...@@ -497,7 +497,7 @@ void smp_migrate_task(int cpu, task_t *p) ...@@ -497,7 +497,7 @@ void smp_migrate_task(int cpu, task_t *p)
/* /*
* The target CPU will unlock the migration spinlock: * The target CPU will unlock the migration spinlock:
*/ */
spin_lock(&migration_lock); _raw_spin_lock(&migration_lock);
new_task = p; new_task = p;
send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR); send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
} }
...@@ -511,7 +511,7 @@ asmlinkage void smp_task_migration_interrupt(void) ...@@ -511,7 +511,7 @@ asmlinkage void smp_task_migration_interrupt(void)
ack_APIC_irq(); ack_APIC_irq();
p = new_task; p = new_task;
spin_unlock(&migration_lock); _raw_spin_unlock(&migration_lock);
sched_task_migrated(p); sched_task_migrated(p);
} }
/* /*
......
...@@ -710,6 +710,8 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, ...@@ -710,6 +710,8 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
* *
* Careful.. There are problems with IBM-designed IRQ13 behaviour. * Careful.. There are problems with IBM-designed IRQ13 behaviour.
* Don't touch unless you *really* know how it works. * Don't touch unless you *really* know how it works.
*
* Must be called with kernel preemption disabled.
*/ */
asmlinkage void math_state_restore(struct pt_regs regs) asmlinkage void math_state_restore(struct pt_regs regs)
{ {
......
...@@ -420,8 +420,8 @@ static int exec_mmap(void) ...@@ -420,8 +420,8 @@ static int exec_mmap(void)
active_mm = current->active_mm; active_mm = current->active_mm;
current->mm = mm; current->mm = mm;
current->active_mm = mm; current->active_mm = mm;
task_unlock(current);
activate_mm(active_mm, mm); activate_mm(active_mm, mm);
task_unlock(current);
mm_release(); mm_release();
if (old_mm) { if (old_mm) {
if (active_mm != old_mm) BUG(); if (active_mm != old_mm) BUG();
......
...@@ -36,6 +36,8 @@ typedef struct { ...@@ -36,6 +36,8 @@ typedef struct {
#define synchronize_irq() barrier() #define synchronize_irq() barrier()
#define release_irqlock(cpu) do { } while (0)
#else #else
#include <asm/atomic.h> #include <asm/atomic.h>
......
...@@ -88,6 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) ...@@ -88,6 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
enum fixed_addresses idx; enum fixed_addresses idx;
unsigned long vaddr; unsigned long vaddr;
preempt_disable();
if (page < highmem_start_page) if (page < highmem_start_page)
return page_address(page); return page_address(page);
...@@ -109,8 +110,10 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) ...@@ -109,8 +110,10 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
unsigned long vaddr = (unsigned long) kvaddr; unsigned long vaddr = (unsigned long) kvaddr;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
if (vaddr < FIXADDR_START) // FIXME if (vaddr < FIXADDR_START) { // FIXME
preempt_enable();
return; return;
}
if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
BUG(); BUG();
...@@ -122,6 +125,8 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) ...@@ -122,6 +125,8 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
pte_clear(kmap_pte-idx); pte_clear(kmap_pte-idx);
__flush_tlb_one(vaddr); __flush_tlb_one(vaddr);
#endif #endif
preempt_enable();
} }
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -96,6 +96,18 @@ extern char _stext, _etext; ...@@ -96,6 +96,18 @@ extern char _stext, _etext;
#define __STR(x) #x #define __STR(x) #x
#define STR(x) __STR(x) #define STR(x) __STR(x)
#define GET_THREAD_INFO \
"movl $-8192, %ebx\n\t" \
"andl %esp, %ebx\n\t"
#ifdef CONFIG_PREEMPT
#define BUMP_LOCK_COUNT \
GET_THREAD_INFO \
"incl 16(%ebx)\n\t"
#else
#define BUMP_LOCK_COUNT
#endif
#define SAVE_ALL \ #define SAVE_ALL \
"cld\n\t" \ "cld\n\t" \
"pushl %es\n\t" \ "pushl %es\n\t" \
...@@ -109,7 +121,8 @@ extern char _stext, _etext; ...@@ -109,7 +121,8 @@ extern char _stext, _etext;
"pushl %ebx\n\t" \ "pushl %ebx\n\t" \
"movl $" STR(__KERNEL_DS) ",%edx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \
"movl %edx,%ds\n\t" \ "movl %edx,%ds\n\t" \
"movl %edx,%es\n\t" "movl %edx,%es\n\t" \
BUMP_LOCK_COUNT
#define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME2(nr) nr##_interrupt(void)
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define __ASM_I386_I387_H #define __ASM_I386_I387_H
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/spinlock.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/sigcontext.h> #include <asm/sigcontext.h>
#include <asm/user.h> #include <asm/user.h>
...@@ -24,7 +25,7 @@ extern void save_init_fpu( struct task_struct *tsk ); ...@@ -24,7 +25,7 @@ extern void save_init_fpu( struct task_struct *tsk );
extern void restore_fpu( struct task_struct *tsk ); extern void restore_fpu( struct task_struct *tsk );
extern void kernel_fpu_begin(void); extern void kernel_fpu_begin(void);
#define kernel_fpu_end() stts() #define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
#define unlazy_fpu( tsk ) do { \ #define unlazy_fpu( tsk ) do { \
......
...@@ -75,20 +75,26 @@ static inline pgd_t *get_pgd_fast(void) ...@@ -75,20 +75,26 @@ static inline pgd_t *get_pgd_fast(void)
{ {
unsigned long *ret; unsigned long *ret;
preempt_disable();
if ((ret = pgd_quicklist) != NULL) { if ((ret = pgd_quicklist) != NULL) {
pgd_quicklist = (unsigned long *)(*ret); pgd_quicklist = (unsigned long *)(*ret);
ret[0] = 0; ret[0] = 0;
pgtable_cache_size--; pgtable_cache_size--;
} else preempt_enable();
} else {
preempt_enable();
ret = (unsigned long *)get_pgd_slow(); ret = (unsigned long *)get_pgd_slow();
}
return (pgd_t *)ret; return (pgd_t *)ret;
} }
static inline void free_pgd_fast(pgd_t *pgd) static inline void free_pgd_fast(pgd_t *pgd)
{ {
preempt_disable();
*(unsigned long *)pgd = (unsigned long) pgd_quicklist; *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
pgd_quicklist = (unsigned long *) pgd; pgd_quicklist = (unsigned long *) pgd;
pgtable_cache_size++; pgtable_cache_size++;
preempt_enable();
} }
static inline void free_pgd_slow(pgd_t *pgd) static inline void free_pgd_slow(pgd_t *pgd)
...@@ -119,19 +125,23 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, ...@@ -119,19 +125,23 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
{ {
unsigned long *ret; unsigned long *ret;
preempt_disable();
if ((ret = (unsigned long *)pte_quicklist) != NULL) { if ((ret = (unsigned long *)pte_quicklist) != NULL) {
pte_quicklist = (unsigned long *)(*ret); pte_quicklist = (unsigned long *)(*ret);
ret[0] = ret[1]; ret[0] = ret[1];
pgtable_cache_size--; pgtable_cache_size--;
} }
preempt_enable();
return (pte_t *)ret; return (pte_t *)ret;
} }
static inline void pte_free_fast(pte_t *pte) static inline void pte_free_fast(pte_t *pte)
{ {
preempt_disable();
*(unsigned long *)pte = (unsigned long) pte_quicklist; *(unsigned long *)pte = (unsigned long) pte_quicklist;
pte_quicklist = (unsigned long *) pte; pte_quicklist = (unsigned long *) pte;
pgtable_cache_size++; pgtable_cache_size++;
preempt_enable();
} }
static __inline__ void pte_free_slow(pte_t *pte) static __inline__ void pte_free_slow(pte_t *pte)
......
...@@ -10,7 +10,15 @@ ...@@ -10,7 +10,15 @@
extern spinlock_t kernel_flag; extern spinlock_t kernel_flag;
#ifdef CONFIG_SMP
#define kernel_locked() spin_is_locked(&kernel_flag) #define kernel_locked() spin_is_locked(&kernel_flag)
#else
#ifdef CONFIG_PREEMPT
#define kernel_locked() preempt_get_count()
#else
#define kernel_locked() 1
#endif
#endif
/* /*
* Release global kernel lock and global interrupt lock * Release global kernel lock and global interrupt lock
...@@ -43,6 +51,11 @@ do { \ ...@@ -43,6 +51,11 @@ do { \
*/ */
static __inline__ void lock_kernel(void) static __inline__ void lock_kernel(void)
{ {
#ifdef CONFIG_PREEMPT
if (current->lock_depth == -1)
spin_lock(&kernel_flag);
++current->lock_depth;
#else
#if 1 #if 1
if (!++current->lock_depth) if (!++current->lock_depth)
spin_lock(&kernel_flag); spin_lock(&kernel_flag);
...@@ -55,6 +68,7 @@ static __inline__ void lock_kernel(void) ...@@ -55,6 +68,7 @@ static __inline__ void lock_kernel(void)
:"=m" (__dummy_lock(&kernel_flag)), :"=m" (__dummy_lock(&kernel_flag)),
"=m" (current->lock_depth)); "=m" (current->lock_depth));
#endif #endif
#endif
} }
static __inline__ void unlock_kernel(void) static __inline__ void unlock_kernel(void)
......
...@@ -5,9 +5,9 @@ ...@@ -5,9 +5,9 @@
#include <asm/hardirq.h> #include <asm/hardirq.h>
#define __cpu_bh_enable(cpu) \ #define __cpu_bh_enable(cpu) \
do { barrier(); local_bh_count(cpu)--; } while (0) do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
#define cpu_bh_disable(cpu) \ #define cpu_bh_disable(cpu) \
do { local_bh_count(cpu)++; barrier(); } while (0) do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
#define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define local_bh_disable() cpu_bh_disable(smp_processor_id())
#define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id())
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* If you change the offsets in irq_stat then you have to * If you change the offsets in irq_stat then you have to
* update this code as well. * update this code as well.
*/ */
#define local_bh_enable() \ #define _local_bh_enable() \
do { \ do { \
unsigned int *ptr = &local_bh_count(smp_processor_id()); \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \
\ \
...@@ -45,4 +45,6 @@ do { \ ...@@ -45,4 +45,6 @@ do { \
/* no registers clobbered */ ); \ /* no registers clobbered */ ); \
} while (0) } while (0)
#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
#endif /* __ASM_SOFTIRQ_H */ #endif /* __ASM_SOFTIRQ_H */
...@@ -77,7 +77,7 @@ typedef struct { ...@@ -77,7 +77,7 @@ typedef struct {
:"=m" (lock->lock) : : "memory" :"=m" (lock->lock) : : "memory"
static inline void spin_unlock(spinlock_t *lock) static inline void _raw_spin_unlock(spinlock_t *lock)
{ {
#if SPINLOCK_DEBUG #if SPINLOCK_DEBUG
if (lock->magic != SPINLOCK_MAGIC) if (lock->magic != SPINLOCK_MAGIC)
...@@ -97,7 +97,7 @@ static inline void spin_unlock(spinlock_t *lock) ...@@ -97,7 +97,7 @@ static inline void spin_unlock(spinlock_t *lock)
:"=q" (oldval), "=m" (lock->lock) \ :"=q" (oldval), "=m" (lock->lock) \
:"0" (oldval) : "memory" :"0" (oldval) : "memory"
static inline void spin_unlock(spinlock_t *lock) static inline void _raw_spin_unlock(spinlock_t *lock)
{ {
char oldval = 1; char oldval = 1;
#if SPINLOCK_DEBUG #if SPINLOCK_DEBUG
...@@ -113,7 +113,7 @@ static inline void spin_unlock(spinlock_t *lock) ...@@ -113,7 +113,7 @@ static inline void spin_unlock(spinlock_t *lock)
#endif #endif
static inline int spin_trylock(spinlock_t *lock) static inline int _raw_spin_trylock(spinlock_t *lock)
{ {
char oldval; char oldval;
__asm__ __volatile__( __asm__ __volatile__(
...@@ -123,7 +123,7 @@ static inline int spin_trylock(spinlock_t *lock) ...@@ -123,7 +123,7 @@ static inline int spin_trylock(spinlock_t *lock)
return oldval > 0; return oldval > 0;
} }
static inline void spin_lock(spinlock_t *lock) static inline void _raw_spin_lock(spinlock_t *lock)
{ {
#if SPINLOCK_DEBUG #if SPINLOCK_DEBUG
__label__ here; __label__ here;
...@@ -179,7 +179,7 @@ typedef struct { ...@@ -179,7 +179,7 @@ typedef struct {
*/ */
/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */
static inline void read_lock(rwlock_t *rw) static inline void _raw_read_lock(rwlock_t *rw)
{ {
#if SPINLOCK_DEBUG #if SPINLOCK_DEBUG
if (rw->magic != RWLOCK_MAGIC) if (rw->magic != RWLOCK_MAGIC)
...@@ -188,7 +188,7 @@ static inline void read_lock(rwlock_t *rw) ...@@ -188,7 +188,7 @@ static inline void read_lock(rwlock_t *rw)
__build_read_lock(rw, "__read_lock_failed"); __build_read_lock(rw, "__read_lock_failed");
} }
static inline void write_lock(rwlock_t *rw) static inline void _raw_write_lock(rwlock_t *rw)
{ {
#if SPINLOCK_DEBUG #if SPINLOCK_DEBUG
if (rw->magic != RWLOCK_MAGIC) if (rw->magic != RWLOCK_MAGIC)
...@@ -197,10 +197,10 @@ static inline void write_lock(rwlock_t *rw) ...@@ -197,10 +197,10 @@ static inline void write_lock(rwlock_t *rw)
__build_write_lock(rw, "__write_lock_failed"); __build_write_lock(rw, "__write_lock_failed");
} }
#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") #define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") #define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
static inline int write_trylock(rwlock_t *lock) static inline int _raw_write_trylock(rwlock_t *lock)
{ {
atomic_t *count = (atomic_t *)lock; atomic_t *count = (atomic_t *)lock;
if (atomic_sub_and_test(RW_LOCK_BIAS, count)) if (atomic_sub_and_test(RW_LOCK_BIAS, count))
......
...@@ -25,6 +25,7 @@ struct thread_info { ...@@ -25,6 +25,7 @@ struct thread_info {
struct exec_domain *exec_domain; /* execution domain */ struct exec_domain *exec_domain; /* execution domain */
__u32 flags; /* low level flags */ __u32 flags; /* low level flags */
__u32 cpu; /* current CPU */ __u32 cpu; /* current CPU */
__s32 preempt_count; /* 0 => preemptable, <0 => BUG */
mm_segment_t addr_limit; /* thread address space: mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead 0-0xBFFFFFFF for user-thead
...@@ -41,7 +42,8 @@ struct thread_info { ...@@ -41,7 +42,8 @@ struct thread_info {
#define TI_EXEC_DOMAIN 0x00000004 #define TI_EXEC_DOMAIN 0x00000004
#define TI_FLAGS 0x00000008 #define TI_FLAGS 0x00000008
#define TI_CPU 0x0000000C #define TI_CPU 0x0000000C
#define TI_ADDR_LIMIT 0x00000010 #define TI_PRE_COUNT 0x00000010
#define TI_ADDR_LIMIT 0x00000014
#endif #endif
......
...@@ -171,11 +171,11 @@ static inline void br_write_unlock (enum brlock_indices idx) ...@@ -171,11 +171,11 @@ static inline void br_write_unlock (enum brlock_indices idx)
} }
#else #else
# define br_read_lock(idx) ((void)(idx)) # define br_read_lock(idx) ({ (void)(idx); preempt_disable(); })
# define br_read_unlock(idx) ((void)(idx)) # define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); })
# define br_write_lock(idx) ((void)(idx)) # define br_write_lock(idx) ({ (void)(idx); preempt_disable(); })
# define br_write_unlock(idx) ((void)(idx)) # define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); })
#endif #endif /* CONFIG_SMP */
/* /*
* Now enumerate all of the possible sw/hw IRQ protected * Now enumerate all of the possible sw/hw IRQ protected
......
...@@ -91,6 +91,7 @@ extern unsigned long nr_running(void); ...@@ -91,6 +91,7 @@ extern unsigned long nr_running(void);
#define TASK_UNINTERRUPTIBLE 2 #define TASK_UNINTERRUPTIBLE 2
#define TASK_ZOMBIE 4 #define TASK_ZOMBIE 4
#define TASK_STOPPED 8 #define TASK_STOPPED 8
#define PREEMPT_ACTIVE 0x4000000
#define __set_task_state(tsk, state_value) \ #define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0) do { (tsk)->state = (state_value); } while (0)
......
...@@ -81,7 +81,9 @@ extern volatile int smp_msg_id; ...@@ -81,7 +81,9 @@ extern volatile int smp_msg_id;
#define smp_processor_id() 0 #define smp_processor_id() 0
#define hard_smp_processor_id() 0 #define hard_smp_processor_id() 0
#define smp_threads_ready 1 #define smp_threads_ready 1
#ifndef CONFIG_PREEMPT
#define kernel_lock() #define kernel_lock()
#endif
#define cpu_logical_map(cpu) 0 #define cpu_logical_map(cpu) 0
#define cpu_number_map(cpu) 0 #define cpu_number_map(cpu) 0
#define smp_call_function(func,info,retry,wait) ({ 0; }) #define smp_call_function(func,info,retry,wait) ({ 0; })
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <linux/config.h> #include <linux/config.h>
#ifndef CONFIG_SMP #if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
#define lock_kernel() do { } while(0) #define lock_kernel() do { } while(0)
#define unlock_kernel() do { } while(0) #define unlock_kernel() do { } while(0)
......
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
#define __LINUX_SPINLOCK_H #define __LINUX_SPINLOCK_H
#include <linux/config.h> #include <linux/config.h>
#include <linux/linkage.h>
#include <linux/compiler.h>
#include <linux/thread_info.h>
#include <linux/kernel.h>
/* /*
* These are the generic versions of the spinlocks and read-write * These are the generic versions of the spinlocks and read-write
...@@ -62,8 +66,10 @@ ...@@ -62,8 +66,10 @@
#if (DEBUG_SPINLOCKS < 1) #if (DEBUG_SPINLOCKS < 1)
#ifndef CONFIG_PREEMPT
#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
#define ATOMIC_DEC_AND_LOCK #define ATOMIC_DEC_AND_LOCK
#endif
/* /*
* Your basic spinlocks, allowing only a single CPU anywhere * Your basic spinlocks, allowing only a single CPU anywhere
...@@ -79,11 +85,11 @@ ...@@ -79,11 +85,11 @@
#endif #endif
#define spin_lock_init(lock) do { } while(0) #define spin_lock_init(lock) do { } while(0)
#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */
#define spin_is_locked(lock) (0) #define spin_is_locked(lock) (0)
#define spin_trylock(lock) ({1; }) #define _raw_spin_trylock(lock) ({1; })
#define spin_unlock_wait(lock) do { } while(0) #define spin_unlock_wait(lock) do { } while(0)
#define spin_unlock(lock) do { } while(0) #define _raw_spin_unlock(lock) do { } while(0)
#elif (DEBUG_SPINLOCKS < 2) #elif (DEBUG_SPINLOCKS < 2)
...@@ -142,13 +148,79 @@ typedef struct { ...@@ -142,13 +148,79 @@ typedef struct {
#endif #endif
#define rwlock_init(lock) do { } while(0) #define rwlock_init(lock) do { } while(0)
#define read_lock(lock) (void)(lock) /* Not "unused variable". */ #define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */
#define read_unlock(lock) do { } while(0) #define _raw_read_unlock(lock) do { } while(0)
#define write_lock(lock) (void)(lock) /* Not "unused variable". */ #define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */
#define write_unlock(lock) do { } while(0) #define _raw_write_unlock(lock) do { } while(0)
#endif /* !SMP */ #endif /* !SMP */
#ifdef CONFIG_PREEMPT
asmlinkage void preempt_schedule(void);
#define preempt_get_count() (current_thread_info()->preempt_count)
#define preempt_disable() \
do { \
++current_thread_info()->preempt_count; \
barrier(); \
} while (0)
#define preempt_enable_no_resched() \
do { \
--current_thread_info()->preempt_count; \
barrier(); \
} while (0)
#define preempt_enable() \
do { \
--current_thread_info()->preempt_count; \
barrier(); \
if (unlikely(!(current_thread_info()->preempt_count) && \
test_thread_flag(TIF_NEED_RESCHED))) \
preempt_schedule(); \
} while (0)
#define spin_lock(lock) \
do { \
preempt_disable(); \
_raw_spin_lock(lock); \
} while(0)
#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \
1 : ({preempt_enable(); 0;});})
#define spin_unlock(lock) \
do { \
_raw_spin_unlock(lock); \
preempt_enable(); \
} while (0)
#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);})
#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();})
#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);})
#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();})
#define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \
1 : ({preempt_enable(); 0;});})
#else
#define preempt_get_count() do { } while (0)
#define preempt_disable() do { } while (0)
#define preempt_enable_no_resched() do {} while(0)
#define preempt_enable() do { } while (0)
#define spin_lock(lock) _raw_spin_lock(lock)
#define spin_trylock(lock) _raw_spin_trylock(lock)
#define spin_unlock(lock) _raw_spin_unlock(lock)
#define read_lock(lock) _raw_read_lock(lock)
#define read_unlock(lock) _raw_read_unlock(lock)
#define write_lock(lock) _raw_write_lock(lock)
#define write_unlock(lock) _raw_write_unlock(lock)
#define write_trylock(lock) _raw_write_trylock(lock)
#endif
/* "lock on reference count zero" */ /* "lock on reference count zero" */
#ifndef ATOMIC_DEC_AND_LOCK #ifndef ATOMIC_DEC_AND_LOCK
#include <asm/atomic.h> #include <asm/atomic.h>
......
...@@ -390,8 +390,8 @@ static inline void __exit_mm(struct task_struct * tsk) ...@@ -390,8 +390,8 @@ static inline void __exit_mm(struct task_struct * tsk)
/* more a memory barrier than a real lock */ /* more a memory barrier than a real lock */
task_lock(tsk); task_lock(tsk);
tsk->mm = NULL; tsk->mm = NULL;
task_unlock(tsk);
enter_lazy_tlb(mm, current, smp_processor_id()); enter_lazy_tlb(mm, current, smp_processor_id());
task_unlock(tsk);
mmput(mm); mmput(mm);
} }
} }
......
...@@ -650,6 +650,13 @@ int do_fork(unsigned long clone_flags, unsigned long stack_start, ...@@ -650,6 +650,13 @@ int do_fork(unsigned long clone_flags, unsigned long stack_start,
if (p->binfmt && p->binfmt->module) if (p->binfmt && p->binfmt->module)
__MOD_INC_USE_COUNT(p->binfmt->module); __MOD_INC_USE_COUNT(p->binfmt->module);
#ifdef CONFIG_PREEMPT
/*
* schedule_tail drops this_rq()->lock so we compensate with a count
* of 1. Also, we want to start with kernel preemption disabled.
*/
p->thread_info->preempt_count = 1;
#endif
p->did_exec = 0; p->did_exec = 0;
p->swappable = 0; p->swappable = 0;
p->state = TASK_UNINTERRUPTIBLE; p->state = TASK_UNINTERRUPTIBLE;
......
...@@ -445,6 +445,9 @@ EXPORT_SYMBOL(sleep_on_timeout); ...@@ -445,6 +445,9 @@ EXPORT_SYMBOL(sleep_on_timeout);
EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on);
EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(interruptible_sleep_on_timeout);
EXPORT_SYMBOL(schedule); EXPORT_SYMBOL(schedule);
#ifdef CONFIG_PREEMPT
EXPORT_SYMBOL(preempt_schedule);
#endif
EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(schedule_timeout);
EXPORT_SYMBOL(sys_sched_yield); EXPORT_SYMBOL(sys_sched_yield);
EXPORT_SYMBOL(set_user_nice); EXPORT_SYMBOL(set_user_nice);
......
...@@ -61,10 +61,12 @@ static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags) ...@@ -61,10 +61,12 @@ static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
struct runqueue *__rq; struct runqueue *__rq;
repeat_lock_task: repeat_lock_task:
preempt_disable();
__rq = task_rq(p); __rq = task_rq(p);
spin_lock_irqsave(&__rq->lock, *flags); spin_lock_irqsave(&__rq->lock, *flags);
if (unlikely(__rq != task_rq(p))) { if (unlikely(__rq != task_rq(p))) {
spin_unlock_irqrestore(&__rq->lock, *flags); spin_unlock_irqrestore(&__rq->lock, *flags);
preempt_enable();
goto repeat_lock_task; goto repeat_lock_task;
} }
return __rq; return __rq;
...@@ -73,6 +75,7 @@ static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags) ...@@ -73,6 +75,7 @@ static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags) static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
{ {
spin_unlock_irqrestore(&rq->lock, *flags); spin_unlock_irqrestore(&rq->lock, *flags);
preempt_enable();
} }
/* /*
* Adding/removing a task to/from a priority array: * Adding/removing a task to/from a priority array:
...@@ -195,6 +198,7 @@ static inline void resched_task(task_t *p) ...@@ -195,6 +198,7 @@ static inline void resched_task(task_t *p)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int need_resched, nrpolling; int need_resched, nrpolling;
preempt_disable();
/* minimise the chance of sending an interrupt to poll_idle() */ /* minimise the chance of sending an interrupt to poll_idle() */
nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
...@@ -202,6 +206,7 @@ static inline void resched_task(task_t *p) ...@@ -202,6 +206,7 @@ static inline void resched_task(task_t *p)
if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id())) if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id()))
smp_send_reschedule(p->thread_info->cpu); smp_send_reschedule(p->thread_info->cpu);
preempt_enable();
#else #else
set_tsk_need_resched(p); set_tsk_need_resched(p);
#endif #endif
...@@ -219,6 +224,7 @@ void wait_task_inactive(task_t * p) ...@@ -219,6 +224,7 @@ void wait_task_inactive(task_t * p)
runqueue_t *rq; runqueue_t *rq;
repeat: repeat:
preempt_disable();
rq = task_rq(p); rq = task_rq(p);
while (unlikely(rq->curr == p)) { while (unlikely(rq->curr == p)) {
cpu_relax(); cpu_relax();
...@@ -227,9 +233,11 @@ void wait_task_inactive(task_t * p) ...@@ -227,9 +233,11 @@ void wait_task_inactive(task_t * p)
rq = lock_task_rq(p, &flags); rq = lock_task_rq(p, &flags);
if (unlikely(rq->curr == p)) { if (unlikely(rq->curr == p)) {
unlock_task_rq(rq, &flags); unlock_task_rq(rq, &flags);
preempt_enable();
goto repeat; goto repeat;
} }
unlock_task_rq(rq, &flags); unlock_task_rq(rq, &flags);
preempt_enable();
} }
/* /*
...@@ -295,7 +303,10 @@ int wake_up_process(task_t * p) ...@@ -295,7 +303,10 @@ int wake_up_process(task_t * p)
void wake_up_forked_process(task_t * p) void wake_up_forked_process(task_t * p)
{ {
runqueue_t *rq = this_rq(); runqueue_t *rq;
preempt_disable();
rq = this_rq();
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
if (!rt_task(p)) { if (!rt_task(p)) {
...@@ -308,6 +319,7 @@ void wake_up_forked_process(task_t * p) ...@@ -308,6 +319,7 @@ void wake_up_forked_process(task_t * p)
p->thread_info->cpu = smp_processor_id(); p->thread_info->cpu = smp_processor_id();
activate_task(p, rq); activate_task(p, rq);
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
preempt_enable();
} }
asmlinkage void schedule_tail(task_t *prev) asmlinkage void schedule_tail(task_t *prev)
...@@ -635,17 +647,31 @@ void scheduling_functions_start_here(void) { } ...@@ -635,17 +647,31 @@ void scheduling_functions_start_here(void) { }
*/ */
asmlinkage void schedule(void) asmlinkage void schedule(void)
{ {
task_t *prev = current, *next; task_t *prev, *next;
runqueue_t *rq = this_rq(); runqueue_t *rq;
prio_array_t *array; prio_array_t *array;
list_t *queue; list_t *queue;
int idx; int idx;
if (unlikely(in_interrupt())) if (unlikely(in_interrupt()))
BUG(); BUG();
preempt_disable();
prev = current;
rq = this_rq();
release_kernel_lock(prev, smp_processor_id()); release_kernel_lock(prev, smp_processor_id());
spin_lock_irq(&rq->lock); spin_lock_irq(&rq->lock);
#ifdef CONFIG_PREEMPT
/*
* if entering from preempt_schedule, off a kernel preemption,
* go straight to picking the next task.
*/
if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
goto pick_next_task;
#endif
switch (prev->state) { switch (prev->state) {
case TASK_RUNNING: case TASK_RUNNING:
prev->sleep_timestamp = jiffies; prev->sleep_timestamp = jiffies;
...@@ -659,7 +685,7 @@ asmlinkage void schedule(void) ...@@ -659,7 +685,7 @@ asmlinkage void schedule(void)
default: default:
deactivate_task(prev, rq); deactivate_task(prev, rq);
} }
#if CONFIG_SMP #if CONFIG_SMP || CONFIG_PREEMPT
pick_next_task: pick_next_task:
#endif #endif
if (unlikely(!rq->nr_running)) { if (unlikely(!rq->nr_running)) {
...@@ -707,9 +733,25 @@ asmlinkage void schedule(void) ...@@ -707,9 +733,25 @@ asmlinkage void schedule(void)
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
reacquire_kernel_lock(current); reacquire_kernel_lock(current);
preempt_enable_no_resched();
return; return;
} }
#ifdef CONFIG_PREEMPT
/*
* this is is the entry point to schedule() from in-kernel preemption.
*/
asmlinkage void preempt_schedule(void)
{
do {
current_thread_info()->preempt_count += PREEMPT_ACTIVE;
schedule();
current_thread_info()->preempt_count -= PREEMPT_ACTIVE;
barrier();
} while (test_thread_flag(TIF_NEED_RESCHED));
}
#endif /* CONFIG_PREEMPT */
/* /*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
...@@ -1105,9 +1147,12 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param) ...@@ -1105,9 +1147,12 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
asmlinkage long sys_sched_yield(void) asmlinkage long sys_sched_yield(void)
{ {
runqueue_t *rq = this_rq(); runqueue_t *rq;
prio_array_t *array; prio_array_t *array;
preempt_disable();
rq = this_rq();
/* /*
* Decrease the yielding task's priority by one, to avoid * Decrease the yielding task's priority by one, to avoid
* livelocks. This priority loss is temporary, it's recovered * livelocks. This priority loss is temporary, it's recovered
...@@ -1134,6 +1179,7 @@ asmlinkage long sys_sched_yield(void) ...@@ -1134,6 +1179,7 @@ asmlinkage long sys_sched_yield(void)
__set_bit(current->prio, array->bitmap); __set_bit(current->prio, array->bitmap);
} }
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
preempt_enable_no_resched();
schedule(); schedule();
......
...@@ -132,7 +132,7 @@ static struct file_operations socket_file_ops = { ...@@ -132,7 +132,7 @@ static struct file_operations socket_file_ops = {
static struct net_proto_family *net_families[NPROTO]; static struct net_proto_family *net_families[NPROTO];
#ifdef CONFIG_SMP #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t net_family_lockct = ATOMIC_INIT(0); static atomic_t net_family_lockct = ATOMIC_INIT(0);
static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED; static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment