Commit b7cd5844 authored by Linus Torvalds's avatar Linus Torvalds

Import 2.1.133pre4

parent 9390bd47
......@@ -38,13 +38,6 @@ unsigned int local_irq_count[NR_CPUS];
unsigned int local_bh_count[NR_CPUS];
unsigned long hardirq_no[NR_CPUS];
#define RTC_IRQ 8
#ifdef CONFIG_RTC
#define TIMER_IRQ 0 /* timer is the pit */
#else
#define TIMER_IRQ RTC_IRQ /* the timer is, in fact, the rtc */
#endif
#if NR_IRQS > 64
# error Unable to handle more than 64 irq levels.
#endif
......
......@@ -21,3 +21,11 @@ extern void isa_device_interrupt(unsigned long vector, struct pt_regs * regs);
extern void srm_device_interrupt(unsigned long vector, struct pt_regs * regs);
extern void handle_irq(int irq, int ack, struct pt_regs * regs);
#define RTC_IRQ 8
#ifdef CONFIG_RTC
#define TIMER_IRQ 0 /* timer is the pit */
#else
#define TIMER_IRQ RTC_IRQ /* timer is the rtc */
#endif
......@@ -92,6 +92,12 @@ ruffian_device_interrupt(unsigned long vector, struct pt_regs *regs)
i = ffz(~pld);
pld &= pld - 1; /* clear least bit set */
if (i == 7) { /* if ISA int */
/* Ruffian does not have the RTC connected to
the CPU timer interrupt. Instead, it uses the
PIT connected to IRQ 0. So we must detect that
and route that specifically to where we expected
to find the timer interrupt come in. */
/* Copy this code from isa_device_interrupt because
we need to hook into int 0 for the timer. I
refuse to soil device_interrupt with ifdefs. */
......@@ -107,7 +113,7 @@ ruffian_device_interrupt(unsigned long vector, struct pt_regs *regs)
if (j == 7 && !(inb(0x20) & 0x80)) {
/* It's only a passive release... */
} else if (j == 0) {
handle_irq(8, -1, regs); /* fake it */
handle_irq(TIMER_IRQ, -1, regs);
ruffian_ack_irq(0);
} else {
handle_irq(j, j, regs);
......
......@@ -35,12 +35,7 @@
#include <linux/timex.h>
#include "proto.h"
#ifdef CONFIG_RTC
#define TIMER_IRQ 0 /* using pit for timer */
#else
#define TIMER_IRQ 8 /* using rtc for timer */
#endif
#include "irq.h"
static int set_rtc_mmss(unsigned long);
......
......@@ -153,10 +153,10 @@ ENTRY(lcall7)
ALIGN
.globl ret_from_fork
ret_from_fork:
GET_CURRENT(%ebx)
#ifdef __SMP__
lock ; btrl $0, SYMBOL_NAME(scheduler_lock)
call SYMBOL_NAME(schedule_tail)
#endif /* __SMP__ */
GET_CURRENT(%ebx)
jmp ret_from_sys_call
/*
......
......@@ -83,7 +83,6 @@ EXPORT_SYMBOL(__global_cli);
EXPORT_SYMBOL(__global_sti);
EXPORT_SYMBOL(__global_save_flags);
EXPORT_SYMBOL(__global_restore_flags);
EXPORT_SYMBOL(smp_message_pass);
EXPORT_SYMBOL(mtrr_hook);
#endif
......
......@@ -953,7 +953,7 @@ static inline void self_IPI(unsigned int irq)
if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
desc->status = status | IRQ_REPLAY;
send_IPI(APIC_DEST_SELF, IO_APIC_VECTOR(irq));
send_IPI_self(IO_APIC_VECTOR(irq));
}
}
......
......@@ -189,7 +189,7 @@ BUILD_IRQ(60) BUILD_IRQ(61) BUILD_IRQ(62) BUILD_IRQ(63)
/*
* The following vectors are part of the Linux architecture, there
* is no hardware IRQ pin equivalent for them, they are triggered
* through the ICC by us (IPIs), via smp_message_pass():
* through the ICC by us (IPIs)
*/
BUILD_SMP_INTERRUPT(reschedule_interrupt)
BUILD_SMP_INTERRUPT(invalidate_interrupt)
......@@ -297,7 +297,7 @@ int get_irq_list(char *buf)
}
p += sprintf(p, "NMI: %10u\n", atomic_read(&nmi_counter));
#ifdef __SMP__
p += sprintf(p, "IPI: %10lu\n", ipi_count);
p += sprintf(p, "ERR: %10lu\n", ipi_count);
#endif
return p - buf;
}
......@@ -989,22 +989,22 @@ __initfunc(void init_IRQ(void))
*/
/* IPI for rescheduling */
set_intr_gate(0x30, reschedule_interrupt);
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */
set_intr_gate(0x31, invalidate_interrupt);
set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
/* IPI for CPU halt */
set_intr_gate(0x40, stop_cpu_interrupt);
set_intr_gate(STOP_CPU_VECTOR, stop_cpu_interrupt);
/* self generated IPI for local APIC timer */
set_intr_gate(0x41, apic_timer_interrupt);
set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
/* IPI for MTRR control */
set_intr_gate(0x50, mtrr_interrupt);
set_intr_gate(MTRR_CHANGE_VECTOR, mtrr_interrupt);
/* IPI vector for APIC spurious interrupts */
set_intr_gate(0xff, spurious_interrupt);
set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
#endif
request_region(0x20,0x20,"pic1");
request_region(0xa0,0x20,"pic2");
......
......@@ -40,8 +40,29 @@ typedef struct {
unsigned int depth; /* Disable depth for nested irq disables */
} irq_desc_t;
/*
* Special IRQ vectors used by the SMP architecture:
*
* (some of the following vectors are 'rare', they might be merged
* into a single vector to save vector space. TLB, reschedule and
* local APIC vectors are performance-critical.)
*/
#define RESCHEDULE_VECTOR 0x30
#define INVALIDATE_TLB_VECTOR 0x31
#define STOP_CPU_VECTOR 0x40
#define LOCAL_TIMER_VECTOR 0x41
#define MTRR_CHANGE_VECTOR 0x50
/*
* First vector available to drivers: (vectors 0x51-0xfe)
*/
#define IRQ0_TRAP_VECTOR 0x51
/*
* This IRQ should never happen, but we print a message nevertheless.
*/
#define SPURIOUS_APIC_VECTOR 0xff
extern irq_desc_t irq_desc[NR_IRQS];
extern int irq_vector[NR_IRQS];
#define IO_APIC_VECTOR(irq) irq_vector[irq]
......@@ -56,17 +77,18 @@ extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
* Interrupt entry/exit code at both C and assembly level
*/
void mask_irq(unsigned int irq);
void unmask_irq(unsigned int irq);
void disable_8259A_irq(unsigned int irq);
int i8259A_irq_pending(unsigned int irq);
void ack_APIC_irq(void);
void setup_IO_APIC(void);
int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
void make_8259A_irq(unsigned int irq);
void send_IPI(int dest, int vector);
void init_pic_mode(void);
void print_IO_APIC(void);
extern void mask_irq(unsigned int irq);
extern void unmask_irq(unsigned int irq);
extern void disable_8259A_irq(unsigned int irq);
extern int i8259A_irq_pending(unsigned int irq);
extern void ack_APIC_irq(void);
extern void setup_IO_APIC(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
extern void make_8259A_irq(unsigned int irq);
extern void FASTCALL(send_IPI_self(int vector));
extern void smp_send_mtrr(void);
extern void init_pic_mode(void);
extern void print_IO_APIC(void);
extern unsigned long long io_apic_irqs;
......
......@@ -164,6 +164,9 @@
#include <asm/bitops.h>
#include <asm/atomic.h>
#include <asm/hardirq.h>
#include "irq.h"
#define MTRR_VERSION "1.26 (19981001)"
#define TRUE 1
......@@ -612,7 +615,7 @@ static void do_all_cpus (void (*handler) (struct set_mtrr_context *ctxt,
/* Send a message to all other CPUs and wait for them to enter the
barrier */
atomic_set (&undone_count, smp_num_cpus - 1);
smp_message_pass (MSG_ALL_BUT_SELF, MSG_MTRR_CHANGE, 0, 0);
smp_send_mtrr();
/* Wait for it to be done */
timeout = jiffies + JIFFIE_TIMEOUT;
while ( (atomic_read (&undone_count) > 0) &&
......
......@@ -140,11 +140,18 @@ int cpu_idle(void *unused)
current->priority = 0;
current->counter = -100;
while(1) {
if (current_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched)
if (current_cpu_data.hlt_works_ok && !hlt_counter &&
!current->need_resched)
__asm__("hlt");
/*
* although we are an idle CPU, we do not want to
* get into the scheduler unnecessarily.
*/
if (current->need_resched) {
schedule();
check_pgt_cache();
}
}
}
#endif
......
This diff is collapsed.
......@@ -72,6 +72,8 @@ extern int setup_x86_irq(int, struct irqaction *);
unsigned long cpu_hz; /* Detected as we calibrate the TSC */
cycles_t cacheflush_time;
/* Number of usecs that the last interrupt was delayed */
static int delay_at_last_interrupt;
......@@ -96,7 +98,6 @@ static unsigned long do_fast_gettimeoffset(void)
:"=a" (eax), "=d" (edx));
/* .. relative to previous jiffy (32 bits is enough) */
edx = 0;
eax -= last_tsc_low; /* tsc_low delta */
/*
......@@ -110,11 +111,11 @@ static unsigned long do_fast_gettimeoffset(void)
__asm__("mull %2"
:"=a" (eax), "=d" (edx)
:"r" (fast_gettimeoffset_quotient),
"0" (eax), "1" (edx));
:"g" (fast_gettimeoffset_quotient),
"0" (eax));
/* our adjusted time offset in microseconds */
return edx + delay_at_last_interrupt;
return delay_at_last_interrupt + edx;
}
/* This function must be called with interrupts disabled
......@@ -240,17 +241,26 @@ void do_gettimeofday(struct timeval *tv)
{
extern volatile unsigned long lost_ticks;
unsigned long flags;
unsigned long usec, sec;
read_lock_irqsave(&xtime_lock, flags);
*tv = xtime;
tv->tv_usec += do_gettimeoffset();
if (lost_ticks)
tv->tv_usec += lost_ticks * (1000000/HZ);
usec = do_gettimeoffset();
{
unsigned long lost = lost_ticks;
if (lost)
usec += lost * (1000000 / HZ);
}
sec = xtime.tv_sec;
usec += xtime.tv_usec;
read_unlock_irqrestore(&xtime_lock, flags);
while (tv->tv_usec >= 1000000) {
tv->tv_usec -= 1000000;
tv->tv_sec++;
while (usec >= 1000000) {
usec -= 1000000;
sec++;
}
tv->tv_sec = sec;
tv->tv_usec = usec;
}
void do_settimeofday(struct timeval *tv)
......@@ -377,13 +387,6 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *reg
else
last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
}
#if 0
/* As we return to user mode fire off the other CPU schedulers.. this is
basically because we don't yet share IRQ's around. This message is
rigged to be safe on the 386 - basically it's a hack, so don't look
closely for now.. */
smp_message_pass(MSG_ALL_BUT_SELF, MSG_RESCHEDULE, 0L, 0);
#endif
#ifdef CONFIG_MCA
if( MCA_bus ) {
......@@ -639,5 +642,13 @@ __initfunc(void time_init(void))
printk("Detected %ld Hz processor.\n", cpu_hz);
}
}
/*
* Rough estimation for SMP scheduling, this is the number of
* cycles it takes for a fully memory-limited process to flush
* the SMP-local cache.
*/
cacheflush_time = cpu_hz/10000;
setup_x86_irq(0, &irq0);
}
......@@ -45,9 +45,13 @@ SECTIONS
. = ALIGN(4096);
__init_end = .;
. = ALIGN(32);
.data.cacheline_aligned : { *(.data.cacheline_aligned) }
. = ALIGN(4096);
.data.page_aligned : { *(.data.idt) }
__bss_start = .; /* BSS */
.bss : {
*(.bss)
......
......@@ -15,7 +15,7 @@ typedef struct { volatile int counter; } atomic_t;
typedef struct { int counter; } atomic_t;
#endif
#define ATOMIC_INIT(i) { (i) }
#define ATOMIC_INIT(i) ( (atomic_t) { (i) } )
#define atomic_read(v) ((v)->counter)
#define atomic_set(v,i) ((v)->counter = (i))
......
......@@ -458,7 +458,7 @@ __EXTERN_INLINE unsigned int apecs_inb(unsigned long addr)
__EXTERN_INLINE void apecs_outb(unsigned char b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_insbl(b, addr & 3);
*(vuip) ((addr << 5) + APECS_IO + 0x00) = w;
......@@ -473,7 +473,7 @@ __EXTERN_INLINE unsigned int apecs_inw(unsigned long addr)
__EXTERN_INLINE void apecs_outw(unsigned short b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_inswl(b, addr & 3);
*(vuip) ((addr << 5) + APECS_IO + 0x08) = w;
......
......@@ -326,7 +326,7 @@ __EXTERN_INLINE unsigned int cia_inb(unsigned long addr)
__EXTERN_INLINE void cia_outb(unsigned char b, unsigned long addr)
{
unsigned int w = __kernel_insbl(b, addr & 3);
unsigned long w = __kernel_insbl(b, addr & 3);
*(vuip) ((addr << 5) + CIA_IO + 0x00) = w;
wmb();
}
......@@ -340,7 +340,7 @@ __EXTERN_INLINE unsigned int cia_inw(unsigned long addr)
__EXTERN_INLINE void cia_outw(unsigned short b, unsigned long addr)
{
unsigned int w = __kernel_inswl(b, addr & 3);
unsigned long w = __kernel_inswl(b, addr & 3);
*(vuip) ((addr << 5) + CIA_IO + 0x08) = w;
wmb();
}
......
......@@ -262,7 +262,7 @@ __EXTERN_INLINE unsigned int lca_inb(unsigned long addr)
__EXTERN_INLINE void lca_outb(unsigned char b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_insbl(b, addr & 3);
*(vuip) ((addr << 5) + LCA_IO + 0x00) = w;
......@@ -277,7 +277,7 @@ __EXTERN_INLINE unsigned int lca_inw(unsigned long addr)
__EXTERN_INLINE void lca_outw(unsigned short b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_inswl(b, addr & 3);
*(vuip) ((addr << 5) + LCA_IO + 0x08) = w;
......@@ -340,7 +340,7 @@ __EXTERN_INLINE unsigned long lca_readq(unsigned long addr)
__EXTERN_INLINE void lca_writeb(unsigned char b, unsigned long addr)
{
unsigned long msb;
unsigned int w;
unsigned long w;
if (addr >= (1UL << 24)) {
msb = addr & 0xf8000000;
......@@ -354,7 +354,7 @@ __EXTERN_INLINE void lca_writeb(unsigned char b, unsigned long addr)
__EXTERN_INLINE void lca_writew(unsigned short b, unsigned long addr)
{
unsigned long msb;
unsigned int w;
unsigned long w;
if (addr >= (1UL << 24)) {
msb = addr & 0xf8000000;
......
......@@ -264,7 +264,7 @@ __EXTERN_INLINE void mcpcia_outb(unsigned char b, unsigned long in_addr)
{
unsigned long addr = in_addr & 0xffffffffUL;
unsigned long hose = (in_addr >> 32) & 3;
unsigned int w;
unsigned long w;
w = __kernel_insbl(b, addr & 3);
*(vuip) ((addr << 5) + MCPCIA_IO(hose) + 0x00) = w;
......@@ -283,7 +283,7 @@ __EXTERN_INLINE void mcpcia_outw(unsigned short b, unsigned long in_addr)
{
unsigned long addr = in_addr & 0xffffffffUL;
unsigned long hose = (in_addr >> 32) & 3;
unsigned int w;
unsigned long w;
w = __kernel_inswl(b, addr & 3);
*(vuip) ((addr << 5) + MCPCIA_IO(hose) + 0x08) = w;
......
......@@ -326,7 +326,7 @@ __EXTERN_INLINE unsigned int pyxis_inb(unsigned long addr)
__EXTERN_INLINE void pyxis_outb(unsigned char b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_insbl(b, addr & 3);
*(vuip) ((addr << 5) + PYXIS_IO + 0x00) = w;
......@@ -341,7 +341,7 @@ __EXTERN_INLINE unsigned int pyxis_inw(unsigned long addr)
__EXTERN_INLINE void pyxis_outw(unsigned short b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_inswl(b, addr & 3);
*(vuip) ((addr << 5) + PYXIS_IO + 0x08) = w;
......
......@@ -378,7 +378,7 @@ __EXTERN_INLINE unsigned int t2_inw(unsigned long addr)
__EXTERN_INLINE void t2_outw(unsigned short b, unsigned long addr)
{
unsigned int w;
unsigned long w;
w = __kernel_inswl(b, addr & 3);
*(vuip) ((addr << 5) + T2_IO + 0x08) = w;
......
......@@ -3,7 +3,6 @@
#include <linux/config.h>
#include <asm/system.h>
#include <asm/machvec.h>
/* We don't use IO slowdowns on the Alpha, but.. */
#define __SLOW_DOWN_IO do { } while (0)
......@@ -19,6 +18,7 @@
#endif
#ifdef __KERNEL__
#include <asm/machvec.h>
/*
* We try to avoid hae updates (thus the cache), but when we
......@@ -78,6 +78,7 @@ extern void _sethae (unsigned long addr); /* cached version */
* There are different chipsets to interface the Alpha CPUs to the world.
*/
#ifdef __KERNEL__
#ifdef CONFIG_ALPHA_GENERIC
/* In a generic kernel, we always go through the machine vector. */
......@@ -147,6 +148,7 @@ extern void _sethae (unsigned long addr); /* cached version */
#undef __WANT_IO_DEF
#endif /* GENERIC */
#endif /* __KERNEL__ */
/*
* The convention used for inb/outb etc. is that names starting with
......@@ -172,6 +174,7 @@ extern void _writew(unsigned short b, unsigned long addr);
extern void _writel(unsigned int b, unsigned long addr);
extern void _writeq(unsigned long b, unsigned long addr);
#ifdef __KERNEL__
/*
* The platform header files may define some of these macros to use
* the inlined versions where appropriate. These macros may also be
......@@ -216,6 +219,27 @@ extern void _writeq(unsigned long b, unsigned long addr);
# define outl_p outl
#endif
#else
/* Userspace declarations. */
extern unsigned int inb (unsigned long port);
extern unsigned int inw (unsigned long port);
extern unsigned int inl (unsigned long port);
extern void outb (unsigned char b,unsigned long port);
extern void outw (unsigned short w,unsigned long port);
extern void outl (unsigned int l,unsigned long port);
extern unsigned long readb(unsigned long addr);
extern unsigned long readw(unsigned long addr);
extern unsigned long readl(unsigned long addr);
extern void writeb(unsigned char b, unsigned long addr);
extern void writew(unsigned short b, unsigned long addr);
extern void writel(unsigned int b, unsigned long addr);
#endif /* __KERNEL__ */
#ifdef __KERNEL__
/*
* The "address" in IO memory space is not clearly either an integer or a
* pointer. We will accept both, thus the casts.
......@@ -257,8 +281,6 @@ static inline void iounmap(void *addr)
# define writeq(v,a) _writeq((v),(unsigned long)(a))
#endif
#ifdef __KERNEL__
/*
* String version of IO memory access ops:
*/
......
......@@ -117,6 +117,7 @@ extern inline void disable_bh(int nr)
{
bh_mask &= ~(1 << nr);
atomic_inc(&bh_mask_count[nr]);
synchronize_bh();
}
extern inline void enable_bh(int nr)
......
......@@ -11,4 +11,7 @@
#define __FINIT .previous
#define __INITDATA .section ".data.init",#alloc,#write
#define __cacheline_aligned __attribute__ \
((__section__ (".data.cacheline_aligned")))
#endif
......@@ -185,10 +185,6 @@ extern inline int cpu_logical_map(int cpu)
extern void smp_callin(void);
extern void smp_boot_cpus(void);
extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
extern void smp_message_pass(int target, int msg, unsigned long data, int wait);
extern volatile unsigned long smp_proc_in_lock[NR_CPUS]; /* for computing process time */
extern volatile int smp_process_available;
/*
* APIC handlers: Note according to the Intel specification update
......@@ -237,9 +233,7 @@ extern __inline int hard_smp_processor_id(void)
* processes are run.
*/
#define PROC_CHANGE_PENALTY 10 /* Schedule penalty */
#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
#define SMP_FROM_INT 1
#define SMP_FROM_SYSCALL 2
#endif
#endif
torvalds@penguin.transmeta.com
\ No newline at end of file
......@@ -12,4 +12,22 @@
(1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
<< (SHIFT_SCALE-SHIFT_HZ)) / HZ)
/*
* Standard way to access the cycle counter on i586+ CPUs.
* Currently only used on SMP.
*/
typedef unsigned long long cycles_t;
extern cycles_t cacheflush_time;
static inline cycles_t get_cycles (void)
{
cycles_t value;
__asm__("rdtsc"
:"=a" (*(((int *)&value)+0)),
"=d" (*(((int *)&value)+1)));
return value;
}
#endif
......@@ -11,6 +11,7 @@ extern unsigned long event;
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/times.h>
#include <linux/timex.h>
#include <asm/system.h>
#include <asm/semaphore.h>
......@@ -219,6 +220,7 @@ struct task_struct {
/* various fields */
long counter;
long priority;
cycles_t avg_slice;
/* SMP and runqueue state */
int has_cpu;
int processor;
......@@ -336,7 +338,7 @@ struct task_struct {
*/
#define INIT_TASK \
/* state etc */ { 0,0,0,KERNEL_DS,&default_exec_domain,0, \
/* counter */ DEF_PRIORITY,DEF_PRIORITY, \
/* counter */ DEF_PRIORITY,DEF_PRIORITY,0, \
/* SMP */ 0,0,0,-1, \
/* schedlink */ &init_task,&init_task, &init_task, &init_task, \
/* binfmt */ NULL, \
......
......@@ -11,11 +11,21 @@
#include <asm/smp.h>
/*
* main IPI interface, handles INIT, TLB flush, STOP, etc. (defined in asm header):
*
* extern void smp_message_pass(int target, int msg, unsigned long data, int wait);
* main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
* (defined in asm header):
*/
/*
* stops all CPUs but the current one:
*/
extern void smp_send_stop(void);
/*
* sends a 'reschedule' event to another CPU:
*/
extern void FASTCALL(smp_send_reschedule(int cpu));
/*
* Boot processor call to load the other CPU's
*/
......@@ -61,7 +71,6 @@ extern volatile int smp_msg_id;
#define smp_num_cpus 1
#define smp_processor_id() 0
#define hard_smp_processor_id() 0
#define smp_message_pass(t,m,d,w)
#define smp_threads_ready 1
#define kernel_lock()
#define cpu_logical_map(cpu) 0
......
......@@ -1177,6 +1177,7 @@ asmlinkage void __init start_kernel(void)
*/
smp_init();
kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
current->need_resched = 1;
cpu_idle(NULL);
}
......
......@@ -50,7 +50,7 @@ NORET_TYPE void panic(const char * fmt, ...)
unblank_console();
#ifdef __SMP__
smp_message_pass(MSG_ALL_BUT_SELF, MSG_STOP_CPU, 0, 0);
smp_send_stop();
#endif
if (panic_timeout > 0)
{
......
......@@ -12,6 +12,7 @@
* 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
* serialize accesses to xtime/lost_ticks).
* Copyright (C) 1998 Andrea Arcangeli
* 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
*/
/*
......@@ -96,47 +97,110 @@ struct kernel_stat kstat = { 0 };
void scheduling_functions_start_here(void) { }
static inline void reschedule_idle(struct task_struct * p)
#ifdef __SMP__
static void reschedule_idle_slow(struct task_struct * p)
{
/*
* (see reschedule_idle() for an explanation first ...)
*
* Pass #2
*
* We try to find another (idle) CPU for this woken-up process.
*
* On SMP, we mostly try to see if the CPU the task used
* to run on is idle.. but we will use another idle CPU too,
* at this point we already know that this CPU is not
* willing to reschedule in the near future.
*
* An idle CPU is definitely wasted, especially if this CPU is
* running long-timeslice processes. The following algorithm is
* pretty good at finding the best idle CPU to send this process
* to.
*
* [We can try to preempt low-priority processes on other CPUs in
* 2.3. Also we can try to use the avg_slice value to predict
* 'likely reschedule' events even on other CPUs.]
*/
int best_cpu = p->processor, this_cpu = smp_processor_id();
struct task_struct **idle = task, *tsk, *target_tsk;
int i = smp_num_cpus;
target_tsk = NULL;
do {
tsk = *idle;
idle++;
if (tsk->has_cpu) {
if (tsk->processor == this_cpu)
continue;
target_tsk = tsk;
if (tsk->processor == best_cpu) {
/*
* For SMP, we try to see if the CPU the task used
* to run on is idle..
* bingo, we couldnt get a better
* CPU, activate it.
*/
#if 0
goto send; /* this one helps GCC ... */
}
}
} while (--i > 0);
/*
* Disable this for now. Ingo has some interesting
* code that looks too complex, and I have some ideas,
* but in the meantime.. One problem is that "wakeup()"
* can be (and is) called before we've even initialized
* SMP completely, so..
* found any idle CPU?
*/
#ifdef __SMP__
int want_cpu = p->processor;
if (target_tsk) {
send:
target_tsk->need_resched = 1;
smp_send_reschedule(target_tsk->processor);
return;
}
}
#endif /* __SMP__ */
static inline void reschedule_idle(struct task_struct * p)
{
if (p->policy != SCHED_OTHER || p->counter > current->counter + 3) {
current->need_resched = 1;
return;
}
#ifdef __SMP__
/*
* Don't even try to find another CPU for us if the task
* ran on this one before..
* ("wakeup()" should not be called before we've initialized
* SMP completely. [Linus, is there any exception to this?]
* Basically a not-yet initialized SMP subsystem can be
* considered as a not-yet working scheduler, simply dont use
* it before it'd up and running ...)
*
* SMP rescheduling is done in 2 passes:
* - pass #1: faster: 'quick decisions'
* - pass #2: slower: 'lets try and find another CPU'
*/
if (want_cpu != smp_processor_id()) {
struct task_struct **idle = task;
int i = smp_num_cpus;
do {
struct task_struct *tsk = *idle;
idle++;
/* Something like this.. */
if (tsk->has_cpu && tsk->processor == want_cpu) {
tsk->need_resched = 1;
smp_send_reschedule(want_cpu);
/*
* Pass #1
*
* There are two metrics here:
*
* first, a 'cutoff' interval, currently ~250 usecs on
* x86 CPUs. If the current process has longer average
* timeslices than this, then we utilize the idle CPU.
*
* second, if the wakeup comes from a process context,
* then the two processes are 'related'. (they form a
* 'gang')
*
* An idle CPU is almost always a bad thing, thus we skip
* the idle-CPU utilization only if both these conditions
* are true. (ie. a 'process-gang' rescheduling with rather
* high frequency should stay on the same CPU).
*
* [We can switch to something more finegrained in 2.3.]
*/
if ((current->avg_slice < cacheflush_time) && !in_interrupt())
return;
}
} while (--i > 0);
}
#endif
#endif
if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
current->need_resched = 1;
reschedule_idle_slow(p);
#endif /* __SMP__ */
}
/*
......@@ -244,6 +308,8 @@ static void process_timeout(unsigned long __data)
wake_up_process(p);
}
int _PROC_CHANGE_PENALTY = 13;
/*
* This is the function that decides how desirable a process is..
* You can weigh different processes against each other depending
......@@ -488,6 +554,63 @@ signed long schedule_timeout(signed long timeout)
return timeout < 0 ? 0 : timeout;
}
/*
* This one aligns per-CPU data on cacheline boundaries.
*/
static union {
struct schedule_data {
struct task_struct * prev;
long prevstate;
cycles_t last_schedule;
} schedule_data;
char __pad [L1_CACHE_BYTES];
} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
static inline void __schedule_tail (void)
{
#ifdef __SMP__
struct schedule_data * sched_data;
/*
* We might have switched CPUs:
*/
sched_data = & aligned_data[smp_processor_id()].schedule_data;
/*
* Subtle. In the rare event that we got a wakeup to 'prev' just
* during the reschedule (this is possible, the scheduler is pretty
* parallel), we should do another reschedule in the next task's
* context. schedule() will do the right thing next time around.
* this is equivalent to 'delaying' the wakeup until the reschedule
* has finished.
*/
if (sched_data->prev->state != sched_data->prevstate)
current->need_resched = 1;
/*
* Release the previous process ...
*
* We have dropped all locks, and we must make sure that we
* only mark the previous process as no longer having a CPU
* after all other state has been seen by other CPU's. Thus
* the memory barrier!
*/
mb();
sched_data->prev->has_cpu = 0;
#endif /* __SMP__ */
}
/*
* schedule_tail() is getting called from the fork return path. This
* cleans up all remaining scheduler things, without impacting the
* common case.
*/
void schedule_tail (void)
{
__schedule_tail();
}
/*
* 'schedule()' is the scheduler function. It's a very simple and nice
* scheduler: it's not perfect, but certainly works for most things.
......@@ -500,11 +623,18 @@ signed long schedule_timeout(signed long timeout)
*/
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct * prev, * next;
int this_cpu;
prev = current;
this_cpu = prev->processor;
/*
* 'sched_data' is protected by the fact that we can run
* only one process per CPU.
*/
sched_data = & aligned_data[this_cpu].schedule_data;
if (in_interrupt())
goto scheduling_in_interrupt;
release_kernel_lock(prev, this_cpu);
......@@ -519,6 +649,7 @@ asmlinkage void schedule(void)
/* move an exhausted RR process to be last.. */
prev->need_resched = 0;
if (!prev->counter && prev->policy == SCHED_RR) {
prev->counter = prev->priority;
move_last_runqueue(prev);
......@@ -534,6 +665,9 @@ asmlinkage void schedule(void)
del_from_runqueue(prev);
case TASK_RUNNING:
}
sched_data->prevstate = prev->state;
{
struct task_struct * p = init_task.next_run;
/*
......@@ -580,27 +714,49 @@ asmlinkage void schedule(void)
}
}
/*
* maintain the per-process 'average timeslice' value.
* (this has to be recalculated even if we reschedule to
* the same process) Currently this is only used on SMP:
*/
#ifdef __SMP__
next->has_cpu = 1;
#endif
{
cycles_t t, this_slice;
t = get_cycles();
this_slice = t - sched_data->last_schedule;
sched_data->last_schedule = t;
/*
* Simple, exponentially fading average calculation:
*/
prev->avg_slice = this_slice + prev->avg_slice;
prev->avg_slice >>= 1;
}
/*
* We drop the scheduler lock early (it's a global spinlock),
* thus we have to lock the previous process from getting
* rescheduled during switch_to().
*/
prev->has_cpu = 1;
next->has_cpu = 1;
next->processor = this_cpu;
spin_unlock(&scheduler_lock);
#endif /* __SMP__ */
if (prev != next) {
#ifdef __SMP__
next->processor = this_cpu;
sched_data->prev = prev;
#endif
kstat.context_swtch++;
get_mmu_context(next);
switch_to(prev,next);
}
spin_unlock(&scheduler_lock);
__schedule_tail();
}
/*
* At this point "prev" is "current", as we just
* switched into it (from an even more "previous"
* prev)
*/
reacquire_kernel_lock(prev);
reacquire_kernel_lock(current);
return;
scheduling_in_interrupt:
......@@ -608,7 +764,6 @@ asmlinkage void schedule(void)
*(int *)0 = 0;
}
rwlock_t waitqueue_lock = RW_LOCK_UNLOCKED;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment