Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
7e54bc75
Commit
7e54bc75
authored
Feb 11, 2002
by
Ingo Molnar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merge to the -K3 scheduler.
parent
14d39718
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
249 additions
and
210 deletions
+249
-210
arch/i386/kernel/entry.S
arch/i386/kernel/entry.S
+2
-0
fs/binfmt_elf.c
fs/binfmt_elf.c
+1
-1
fs/proc/array.c
fs/proc/array.c
+2
-6
include/asm-i386/bitops.h
include/asm-i386/bitops.h
+1
-1
include/asm-i386/mmu_context.h
include/asm-i386/mmu_context.h
+4
-10
include/linux/init_task.h
include/linux/init_task.h
+2
-1
include/linux/sched.h
include/linux/sched.h
+8
-63
kernel/exit.c
kernel/exit.c
+3
-24
kernel/fork.c
kernel/fork.c
+1
-4
kernel/ksyms.c
kernel/ksyms.c
+2
-0
kernel/sched.c
kernel/sched.c
+218
-85
kernel/sys.c
kernel/sys.c
+2
-2
kernel/timer.c
kernel/timer.c
+1
-11
mm/oom_kill.c
mm/oom_kill.c
+2
-2
No files found.
arch/i386/kernel/entry.S
View file @
7e54bc75
...
...
@@ -190,9 +190,11 @@ ENTRY(lcall27)
ENTRY
(
ret_from_fork
)
#if CONFIG_SMP
pushl
%
ebx
call
SYMBOL_NAME
(
schedule_tail
)
addl
$
4
,
%
esp
#endif
GET_THREAD_INFO
(%
ebx
)
jmp
syscall_exit
...
...
fs/binfmt_elf.c
View file @
7e54bc75
...
...
@@ -1119,7 +1119,7 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
psinfo
.
pr_state
=
i
;
psinfo
.
pr_sname
=
(
i
<
0
||
i
>
5
)
?
'.'
:
"RSDZTD"
[
i
];
psinfo
.
pr_zomb
=
psinfo
.
pr_sname
==
'Z'
;
psinfo
.
pr_nice
=
current
->
__nice
;
psinfo
.
pr_nice
=
task_nice
(
current
)
;
psinfo
.
pr_flag
=
current
->
flags
;
psinfo
.
pr_uid
=
NEW_TO_OLD_UID
(
current
->
uid
);
psinfo
.
pr_gid
=
NEW_TO_OLD_GID
(
current
->
gid
);
...
...
fs/proc/array.c
View file @
7e54bc75
...
...
@@ -336,12 +336,8 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
/* scale priority and nice values from timeslices to -20..20 */
/* to make it look like a "normal" Unix priority/nice value */
priority
=
task
->
prio
;
if
(
priority
>=
MAX_RT_PRIO
)
priority
-=
MAX_RT_PRIO
;
else
priority
=
priority
-
100
;
nice
=
task
->
__nice
;
priority
=
task_prio
(
task
);
nice
=
task_nice
(
task
);
read_lock
(
&
tasklist_lock
);
ppid
=
task
->
pid
?
task
->
p_opptr
->
pid
:
0
;
...
...
include/asm-i386/bitops.h
View file @
7e54bc75
...
...
@@ -358,7 +358,7 @@ static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
*/
static
__inline__
int
find_next_bit
(
void
*
addr
,
int
size
,
int
offset
)
static
__inline__
int
find_next_bit
(
void
*
addr
,
int
size
,
int
offset
)
{
unsigned
long
*
p
=
((
unsigned
long
*
)
addr
)
+
(
offset
>>
5
);
int
set
=
0
,
bit
=
offset
&
31
,
res
;
...
...
include/asm-i386/mmu_context.h
View file @
7e54bc75
...
...
@@ -8,14 +8,10 @@
/*
* Every architecture must define this function. It's the fastest
* way of searching a 1
68-bit bitmap where the first 128
bits are
* unlikely to be set. It's guaranteed that at least one of the 1
68
* way of searching a 1
40-bit bitmap where the first 100
bits are
* unlikely to be set. It's guaranteed that at least one of the 1
40
* bits is cleared.
*/
#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
# error update this function.
#endif
static
inline
int
sched_find_first_bit
(
unsigned
long
*
b
)
{
if
(
unlikely
(
b
[
0
]))
...
...
@@ -24,11 +20,9 @@ static inline int sched_find_first_bit(unsigned long *b)
return
__ffs
(
b
[
1
])
+
32
;
if
(
unlikely
(
b
[
2
]))
return
__ffs
(
b
[
2
])
+
64
;
if
(
unlikely
(
b
[
3
])
)
if
(
b
[
3
]
)
return
__ffs
(
b
[
3
])
+
96
;
if
(
b
[
4
])
return
__ffs
(
b
[
4
])
+
128
;
return
__ffs
(
b
[
5
])
+
32
+
128
;
return
__ffs
(
b
[
4
])
+
128
;
}
/*
* possibly do the LDT unload here?
...
...
include/linux/init_task.h
View file @
7e54bc75
...
...
@@ -45,7 +45,8 @@
thread_info: &init_thread_info, \
flags: 0, \
lock_depth: -1, \
__nice: DEF_USER_NICE, \
prio: 120, \
static_prio: 120, \
policy: SCHED_OTHER, \
cpus_allowed: -1, \
mm: NULL, \
...
...
include/linux/sched.h
View file @
7e54bc75
...
...
@@ -150,7 +150,7 @@ extern void trap_init(void);
extern
void
update_process_times
(
int
user
);
extern
void
update_one_process
(
struct
task_struct
*
p
,
unsigned
long
user
,
unsigned
long
system
,
int
cpu
);
extern
void
scheduler_tick
(
struct
task_struct
*
p
);
extern
void
scheduler_tick
(
int
user_tick
,
int
system
);
extern
void
sched_task_migrated
(
struct
task_struct
*
p
);
extern
void
smp_migrate_task
(
int
cpu
,
task_t
*
task
);
extern
unsigned
long
cache_decay_ticks
;
...
...
@@ -241,18 +241,16 @@ struct task_struct {
int
lock_depth
;
/* Lock depth */
int
prio
;
long
__nice
;
int
prio
,
static_prio
;
list_t
run_list
;
prio_array_t
*
array
;
unsigned
int
time_slice
;
unsigned
long
sleep_avg
;
unsigned
long
sleep_timestamp
;
unsigned
long
policy
;
unsigned
long
cpus_allowed
;
unsigned
int
time_slice
;
struct
task_struct
*
next_task
,
*
prev_task
;
...
...
@@ -385,66 +383,12 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
*/
#define _STK_LIM (8*1024*1024)
/*
* RT priorites go from 0 to 99, but internally we max
* them out at 128 to make it easier to search the
* scheduler bitmap.
*/
#define MAX_RT_PRIO 128
/*
* The lower the priority of a process, the more likely it is
* to run. Priority of a process goes from 0 to 167. The 0-99
* priority range is allocated to RT tasks, the 128-167 range
* is for SCHED_OTHER tasks.
*/
#define MAX_PRIO (MAX_RT_PRIO + 40)
/*
* Scales user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ 128 ... 167 (MAX_PRIO-1) ]
*
* User-nice value of -20 == static priority 128, and
* user-nice value 19 == static priority 167. The lower
* the priority value, the higher the task's priority.
*/
#define NICE_TO_PRIO(n) (MAX_RT_PRIO + (n) + 20)
#define DEF_USER_NICE 0
/*
* Default timeslice is 150 msecs, maximum is 300 msecs.
* Minimum timeslice is 10 msecs.
*
* These are the 'tuning knobs' of the scheduler:
*/
#define MIN_TIMESLICE ( 10 * HZ / 1000)
#define MAX_TIMESLICE (300 * HZ / 1000)
#define CHILD_FORK_PENALTY 95
#define PARENT_FORK_PENALTY 100
#define EXIT_WEIGHT 3
#define PRIO_INTERACTIVE_RATIO 20
#define PRIO_CPU_HOG_RATIO 60
#define PRIO_BONUS_RATIO 70
#define INTERACTIVE_DELTA 3
#define MAX_SLEEP_AVG (2*HZ)
#define STARVATION_LIMIT (2*HZ)
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
/*
* NICE_TO_TIMESLICE scales nice values [ -20 ... 19 ]
* to time slice values.
*
* The higher a process's priority, the bigger timeslices
* it gets during one round of execution. But even the lowest
* priority process gets MIN_TIMESLICE worth of execution time.
*/
#define NICE_TO_TIMESLICE(n) (MIN_TIMESLICE + \
((MAX_TIMESLICE - MIN_TIMESLICE) * (19-(n))) / 39)
extern
void
set_cpus_allowed
(
task_t
*
p
,
unsigned
long
new_mask
);
extern
void
set_user_nice
(
task_t
*
p
,
long
nice
);
extern
int
task_prio
(
task_t
*
p
);
extern
int
task_nice
(
task_t
*
p
);
extern
int
idle_cpu
(
int
cpu
);
asmlinkage
long
sys_sched_yield
(
void
);
#define yield() sys_sched_yield()
...
...
@@ -526,6 +470,7 @@ extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
signed
long
timeout
));
extern
int
FASTCALL
(
wake_up_process
(
struct
task_struct
*
tsk
));
extern
void
FASTCALL
(
wake_up_forked_process
(
struct
task_struct
*
tsk
));
extern
void
FASTCALL
(
sched_exit
(
task_t
*
p
));
#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
...
...
kernel/exit.c
View file @
7e54bc75
...
...
@@ -31,8 +31,6 @@ int getrusage(struct task_struct *, int, struct rusage *);
static
void
release_task
(
struct
task_struct
*
p
)
{
unsigned
long
flags
;
if
(
p
==
current
)
BUG
();
#ifdef CONFIG_SMP
...
...
@@ -46,25 +44,7 @@ static void release_task(struct task_struct * p)
current
->
cmin_flt
+=
p
->
min_flt
+
p
->
cmin_flt
;
current
->
cmaj_flt
+=
p
->
maj_flt
+
p
->
cmaj_flt
;
current
->
cnswap
+=
p
->
nswap
+
p
->
cnswap
;
/*
* Potentially available timeslices are retrieved
* here - this way the parent does not get penalized
* for creating too many processes.
*
* (this cannot be used to artificially 'generate'
* timeslices, because any timeslice recovered here
* was given away by the parent in the first place.)
*/
__save_flags
(
flags
);
__cli
();
current
->
time_slice
+=
p
->
time_slice
;
if
(
current
->
time_slice
>
MAX_TIMESLICE
)
current
->
time_slice
=
MAX_TIMESLICE
;
if
(
p
->
sleep_avg
<
current
->
sleep_avg
)
current
->
sleep_avg
=
(
current
->
sleep_avg
*
EXIT_WEIGHT
+
p
->
sleep_avg
)
/
(
EXIT_WEIGHT
+
1
);
__restore_flags
(
flags
);
sched_exit
(
p
);
p
->
pid
=
0
;
put_task_struct
(
p
);
}
...
...
@@ -172,9 +152,8 @@ void reparent_to_init(void)
current
->
exit_signal
=
SIGCHLD
;
current
->
ptrace
=
0
;
if
((
current
->
policy
==
SCHED_OTHER
)
&&
(
current
->
__nice
<
DEF_USER_NICE
))
set_user_nice
(
current
,
DEF_USER_NICE
);
if
((
current
->
policy
==
SCHED_OTHER
)
&&
(
task_nice
(
current
)
<
0
))
set_user_nice
(
current
,
0
);
/* cpus_allowed? */
/* rt_priority? */
/* signals? */
...
...
kernel/fork.c
View file @
7e54bc75
...
...
@@ -749,14 +749,11 @@ int do_fork(unsigned long clone_flags, unsigned long stack_start,
* runqueue lock is not a problem.
*/
current
->
time_slice
=
1
;
scheduler_tick
(
current
);
scheduler_tick
(
0
,
0
);
}
p
->
sleep_timestamp
=
jiffies
;
__restore_flags
(
flags
);
if
(
p
->
policy
==
SCHED_OTHER
)
p
->
prio
=
MAX_PRIO
-
1
-
((
MAX_PRIO
-
1
-
p
->
prio
)
*
1
)
/
3
;
/*
* Ok, add it to the run-queues and make it
* visible to the rest of the system.
...
...
kernel/ksyms.c
View file @
7e54bc75
...
...
@@ -455,6 +455,8 @@ EXPORT_SYMBOL(preempt_schedule);
EXPORT_SYMBOL
(
schedule_timeout
);
EXPORT_SYMBOL
(
sys_sched_yield
);
EXPORT_SYMBOL
(
set_user_nice
);
EXPORT_SYMBOL
(
task_nice
);
EXPORT_SYMBOL_GPL
(
idle_cpu
);
EXPORT_SYMBOL
(
jiffies
);
EXPORT_SYMBOL
(
xtime
);
EXPORT_SYMBOL
(
do_gettimeofday
);
...
...
kernel/sched.c
View file @
7e54bc75
...
...
@@ -20,8 +20,107 @@
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <asm/mmu_context.h>
#include <linux/kernel_stat.h>
#define BITMAP_SIZE ((((MAX_PRIO+7)/8)+sizeof(long)-1)/sizeof(long))
/*
* Priority of a process goes from 0 to 139. The 0-99
* priority range is allocated to RT tasks, the 100-139
* range is for SCHED_OTHER tasks. Priority values are
* inverted: lower p->prio value means higher priority.
*/
#define MAX_RT_PRIO 100
#define MAX_PRIO (MAX_RT_PRIO + 40)
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ 100 ... 139 (MAX_PRIO-1) ],
* and back.
*/
#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
/*
* 'User priority' is the nice value converted to something we
* can work with better when scaling various scheduler parameters,
* it's a [ 0 ... 39 ] range.
*/
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
/*
* These are the 'tuning knobs' of the scheduler:
*
* Minimum timeslice is 10 msecs, default timeslice is 150 msecs,
* maximum timeslice is 300 msecs. Timeslices get refilled after
* they expire.
*/
#define MIN_TIMESLICE ( 10 * HZ / 1000)
#define MAX_TIMESLICE (300 * HZ / 1000)
#define CHILD_PENALTY 95
#define PARENT_PENALTY 100
#define EXIT_WEIGHT 3
#define PRIO_BONUS_RATIO 25
#define INTERACTIVE_DELTA 2
#define MAX_SLEEP_AVG (2*HZ)
#define STARVATION_LIMIT (2*HZ)
/*
* If a task is 'interactive' then we reinsert it in the active
* array after it has expired its current timeslice. (it will not
* continue to run immediately, it will still roundrobin with
* other interactive tasks.)
*
* This part scales the interactivity limit depending on niceness.
*
* We scale it linearly, offset by the INTERACTIVE_DELTA delta.
* Here are a few examples of different nice levels:
*
* TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
* TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
* TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
* TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
* TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
*
* (the X axis represents the possible -5 ... 0 ... +5 dynamic
* priority range a task can explore, a value of '1' means the
* task is rated interactive.)
*
* Ie. nice +19 tasks can never get 'interactive' enough to be
* reinserted into the active array. And only heavily CPU-hog nice -20
* tasks will be expired. Default nice 0 tasks are somewhere between,
* it takes some effort for them to get interactive, but it's not
* too hard.
*/
#define SCALE(v1,v1_max,v2_max) \
(v1) * (v2_max) / (v1_max)
#define DELTA(p) \
(SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
INTERACTIVE_DELTA)
#define TASK_INTERACTIVE(p) \
((p)->prio <= (p)->static_prio - DELTA(p))
/*
* TASK_TIMESLICE scales user-nice values [ -20 ... 19 ]
* to time slice values.
*
* The higher a process's priority, the bigger timeslices
* it gets during one round of execution. But even the lowest
* priority process gets MIN_TIMESLICE worth of execution time.
*/
#define TASK_TIMESLICE(p) (MIN_TIMESLICE + \
((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39))
/*
* These are the runqueue data structures:
*/
#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
typedef
struct
runqueue
runqueue_t
;
...
...
@@ -54,8 +153,7 @@ static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
#define this_rq() cpu_rq(smp_processor_id())
#define task_rq(p) cpu_rq((p)->thread_info->cpu)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->policy != SCHED_OTHER)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
static
inline
runqueue_t
*
lock_task_rq
(
task_t
*
p
,
unsigned
long
*
flags
)
{
...
...
@@ -78,6 +176,7 @@ static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
spin_unlock_irqrestore
(
&
rq
->
lock
,
*
flags
);
preempt_enable
();
}
/*
* Adding/removing a task to/from a priority array:
*/
...
...
@@ -97,66 +196,25 @@ static inline void enqueue_task(struct task_struct *p, prio_array_t *array)
p
->
array
=
array
;
}
/*
* A task is 'heavily interactive' if it either has reached the
* bottom 25% of the SCHED_OTHER priority range, or if it is below
* its default priority by at least 3 priority levels. In this
* case we favor it by reinserting it on the active array,
* even after it expired its current timeslice.
*
* A task is a 'CPU hog' if it's either in the upper 25% of the
* SCHED_OTHER priority range, or if's not an interactive task.
*
* A task can get a priority bonus by being 'somewhat
* interactive' - and it will get a priority penalty for
* being a CPU hog.
*
*/
#define PRIO_INTERACTIVE \
(MAX_RT_PRIO + MAX_USER_PRIO*PRIO_INTERACTIVE_RATIO/100)
#define PRIO_CPU_HOG \
(MAX_RT_PRIO + MAX_USER_PRIO*PRIO_CPU_HOG_RATIO/100)
#define TASK_INTERACTIVE(p) \
(((p)->prio <= PRIO_INTERACTIVE) || \
(((p)->prio < PRIO_CPU_HOG) && \
((p)->prio <= NICE_TO_PRIO((p)->__nice) - INTERACTIVE_DELTA)))
/*
* We place interactive tasks back into the active array, if possible.
*
* To guarantee that this does not starve expired tasks we ignore the
* interactivity of a task if the first expired task had to wait more
* than a 'reasonable' amount of time. This deadline timeout is
* load-dependent, as the frequency of array switched decreases with
* increasing number of running tasks:
*/
#define EXPIRED_STARVING(rq) \
((rq)->expired_timestamp && \
(jiffies - (rq)->expired_timestamp >= \
STARVATION_LIMIT * ((rq)->nr_running) + 1))
static
inline
int
effective_prio
(
task_t
*
p
)
{
int
bonus
,
prio
;
/*
* Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
* into the -
14 ... +14
bonus/penalty range.
* into the -
5 ... 0 ... +5
bonus/penalty range.
*
* We use
70
% of the full 0...39 priority range so that:
* We use
25
% of the full 0...39 priority range so that:
*
* 1) nice +19 CPU hogs do not preempt nice 0 CPU hogs.
* 2) nice -20 interactive tasks do not get preempted by
* nice 0 interactive tasks.
* 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
* 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
*
* Both properties are important to certain workloads.
*/
bonus
=
MAX_USER_PRIO
*
PRIO_BONUS_RATIO
*
p
->
sleep_avg
/
MAX_SLEEP_AVG
/
100
-
MAX_USER_PRIO
*
PRIO_BONUS_RATIO
/
100
/
2
;
prio
=
NICE_TO_PRIO
(
p
->
__nice
)
-
bonus
;
prio
=
p
->
static_prio
-
bonus
;
if
(
prio
<
MAX_RT_PRIO
)
prio
=
MAX_RT_PRIO
;
if
(
prio
>
MAX_PRIO
-
1
)
...
...
@@ -191,7 +249,6 @@ static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
rq
->
nr_running
--
;
dequeue_task
(
p
,
p
->
array
);
p
->
array
=
NULL
;
p
->
sleep_timestamp
=
jiffies
;
}
static
inline
void
resched_task
(
task_t
*
p
)
...
...
@@ -305,16 +362,20 @@ int wake_up_process(task_t * p)
void
wake_up_forked_process
(
task_t
*
p
)
{
runqueue_t
*
rq
;
preempt_disable
();
rq
=
this_rq
();
p
->
state
=
TASK_RUNNING
;
if
(
!
rt_task
(
p
))
{
p
->
sleep_avg
=
p
->
sleep_avg
*
CHILD_FORK_PENALTY
/
100
;
/*
* We decrease the sleep average of forking parents
* and children as well, to keep max-interactive tasks
* from forking tasks that are max-interactive.
*/
current
->
sleep_avg
=
current
->
sleep_avg
*
PARENT_PENALTY
/
100
;
p
->
sleep_avg
=
p
->
sleep_avg
*
CHILD_PENALTY
/
100
;
p
->
prio
=
effective_prio
(
p
);
current
->
sleep_avg
=
current
->
sleep_avg
*
PARENT_FORK_PENALTY
/
100
;
}
spin_lock_irq
(
&
rq
->
lock
);
p
->
thread_info
->
cpu
=
smp_processor_id
();
...
...
@@ -323,10 +384,37 @@ void wake_up_forked_process(task_t * p)
preempt_enable
();
}
/*
* Potentially available exiting-child timeslices are
* retrieved here - this way the parent does not get
* penalized for creating too many processes.
*
* (this cannot be used to 'generate' timeslices
* artificially, because any timeslice recovered here
* was given away by the parent in the first place.)
*/
void
sched_exit
(
task_t
*
p
)
{
__cli
();
current
->
time_slice
+=
p
->
time_slice
;
if
(
unlikely
(
current
->
time_slice
>
MAX_TIMESLICE
))
current
->
time_slice
=
MAX_TIMESLICE
;
__sti
();
/*
* If the child was a (relative-) CPU hog then decrease
* the sleep_avg of the parent as well.
*/
if
(
p
->
sleep_avg
<
current
->
sleep_avg
)
current
->
sleep_avg
=
(
current
->
sleep_avg
*
EXIT_WEIGHT
+
p
->
sleep_avg
)
/
(
EXIT_WEIGHT
+
1
);
}
#if CONFIG_SMP
asmlinkage
void
schedule_tail
(
task_t
*
prev
)
{
spin_unlock_irq
(
&
this_rq
()
->
lock
);
}
#endif
static
inline
void
context_switch
(
task_t
*
prev
,
task_t
*
next
)
{
...
...
@@ -403,6 +491,7 @@ static inline unsigned int double_lock_balance(runqueue_t *this_rq,
}
return
nr_running
;
}
/*
* Current runqueue is empty, or rebalance tick: if there is an
* inbalance (current runqueue is too short) then pull from
...
...
@@ -492,13 +581,13 @@ static void load_balance(runqueue_t *this_rq, int idle)
array
=
busiest
->
active
;
new_array:
/*
* Load-balancing does not affect RT tasks, so we start the
* searching at priority 128.
*/
idx
=
MAX_RT_PRIO
;
/* Start searching at priority 0: */
idx
=
0
;
skip_bitmap:
idx
=
find_next_bit
(
array
->
bitmap
,
MAX_PRIO
,
idx
);
if
(
!
idx
)
idx
=
sched_find_first_bit
(
array
->
bitmap
);
else
idx
=
find_next_bit
(
array
->
bitmap
,
MAX_PRIO
,
idx
);
if
(
idx
==
MAX_PRIO
)
{
if
(
array
==
busiest
->
expired
)
{
array
=
busiest
->
active
;
...
...
@@ -576,19 +665,44 @@ static inline void idle_tick(void)
#endif
/*
* We place interactive tasks back into the active array, if possible.
*
* To guarantee that this does not starve expired tasks we ignore the
* interactivity of a task if the first expired task had to wait more
* than a 'reasonable' amount of time. This deadline timeout is
* load-dependent, as the frequency of array switched decreases with
* increasing number of running tasks:
*/
#define EXPIRED_STARVING(rq) \
((rq)->expired_timestamp && \
(jiffies - (rq)->expired_timestamp >= \
STARVATION_LIMIT * ((rq)->nr_running) + 1))
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
*/
void
scheduler_tick
(
task_t
*
p
)
void
scheduler_tick
(
int
user_tick
,
int
system
)
{
int
cpu
=
smp_processor_id
();
runqueue_t
*
rq
=
this_rq
();
#if CONFIG_SMP
unsigned
long
now
=
jiffies
;
task_t
*
p
=
current
;
if
(
p
==
rq
->
idle
)
return
idle_tick
();
if
(
p
==
rq
->
idle
)
{
if
(
local_bh_count
(
cpu
)
||
local_irq_count
(
cpu
)
>
1
)
kstat
.
per_cpu_system
[
cpu
]
+=
system
;
#if CONFIG_SMP
idle_tick
();
#endif
return
;
}
if
(
TASK_NICE
(
p
)
>
0
)
kstat
.
per_cpu_nice
[
cpu
]
+=
user_tick
;
else
kstat
.
per_cpu_user
[
cpu
]
+=
user_tick
;
kstat
.
per_cpu_system
[
cpu
]
+=
system
;
/* Task might have expired already, but not scheduled off yet */
if
(
p
->
array
!=
rq
->
active
)
{
set_tsk_need_resched
(
p
);
...
...
@@ -601,7 +715,7 @@ void scheduler_tick(task_t *p)
* FIFO tasks have no timeslices.
*/
if
((
p
->
policy
==
SCHED_RR
)
&&
!--
p
->
time_slice
)
{
p
->
time_slice
=
NICE_TO_TIMESLICE
(
p
->
__nice
);
p
->
time_slice
=
TASK_TIMESLICE
(
p
);
set_tsk_need_resched
(
p
);
/* put it at the end of the queue: */
...
...
@@ -624,7 +738,7 @@ void scheduler_tick(task_t *p)
dequeue_task
(
p
,
rq
->
active
);
set_tsk_need_resched
(
p
);
p
->
prio
=
effective_prio
(
p
);
p
->
time_slice
=
NICE_TO_TIMESLICE
(
p
->
__nice
);
p
->
time_slice
=
TASK_TIMESLICE
(
p
);
if
(
!
TASK_INTERACTIVE
(
p
)
||
EXPIRED_STARVING
(
rq
))
{
if
(
!
rq
->
expired_timestamp
)
...
...
@@ -635,7 +749,7 @@ void scheduler_tick(task_t *p)
}
out:
#if CONFIG_SMP
if
(
!
(
now
%
BUSY_REBALANCE_TICK
))
if
(
!
(
jiffies
%
BUSY_REBALANCE_TICK
))
load_balance
(
rq
,
0
);
#endif
spin_unlock
(
&
rq
->
lock
);
...
...
@@ -656,12 +770,12 @@ asmlinkage void schedule(void)
if
(
unlikely
(
in_interrupt
()))
BUG
();
preempt_disable
();
prev
=
current
;
rq
=
this_rq
();
release_kernel_lock
(
prev
,
smp_processor_id
());
prev
->
sleep_timestamp
=
jiffies
;
spin_lock_irq
(
&
rq
->
lock
);
#ifdef CONFIG_PREEMPT
...
...
@@ -672,19 +786,17 @@ asmlinkage void schedule(void)
if
(
unlikely
(
preempt_get_count
()
&
PREEMPT_ACTIVE
))
goto
pick_next_task
;
#endif
switch
(
prev
->
state
)
{
case
TASK_RUNNING
:
prev
->
sleep_timestamp
=
jiffies
;
break
;
case
TASK_INTERRUPTIBLE
:
if
(
unlikely
(
signal_pending
(
prev
)))
{
prev
->
state
=
TASK_RUNNING
;
prev
->
sleep_timestamp
=
jiffies
;
break
;
}
default:
deactivate_task
(
prev
,
rq
);
case
TASK_RUNNING
:
;
}
#if CONFIG_SMP || CONFIG_PREEMPT
pick_next_task:
...
...
@@ -905,6 +1017,8 @@ void set_cpus_allowed(task_t *p, unsigned long new_mask)
new_mask
&=
cpu_online_map
;
if
(
!
new_mask
)
BUG
();
if
(
p
!=
current
)
BUG
();
p
->
cpus_allowed
=
new_mask
;
/*
...
...
@@ -929,7 +1043,7 @@ void set_user_nice(task_t *p, long nice)
prio_array_t
*
array
;
runqueue_t
*
rq
;
if
(
p
->
__nice
==
nice
)
if
(
TASK_NICE
(
p
)
==
nice
||
nice
<
-
20
||
nice
>
19
)
return
;
/*
* We have to be careful, if called from sys_setpriority(),
...
...
@@ -937,13 +1051,13 @@ void set_user_nice(task_t *p, long nice)
*/
rq
=
lock_task_rq
(
p
,
&
flags
);
if
(
rt_task
(
p
))
{
p
->
__nice
=
nice
;
p
->
static_prio
=
NICE_TO_PRIO
(
nice
)
;
goto
out_unlock
;
}
array
=
p
->
array
;
if
(
array
)
dequeue_task
(
p
,
array
);
p
->
__nice
=
nice
;
p
->
static_prio
=
NICE_TO_PRIO
(
nice
)
;
p
->
prio
=
NICE_TO_PRIO
(
nice
);
if
(
array
)
{
enqueue_task
(
p
,
array
);
...
...
@@ -951,8 +1065,7 @@ void set_user_nice(task_t *p, long nice)
* If the task is running and lowered its priority,
* or increased its priority then reschedule its CPU:
*/
if
((
nice
<
p
->
__nice
)
||
((
p
->
__nice
<
nice
)
&&
(
p
==
rq
->
curr
)))
if
((
NICE_TO_PRIO
(
nice
)
<
p
->
static_prio
)
||
(
p
==
rq
->
curr
))
resched_task
(
rq
->
curr
);
}
out_unlock:
...
...
@@ -985,7 +1098,7 @@ asmlinkage long sys_nice(int increment)
if
(
increment
>
40
)
increment
=
40
;
nice
=
current
->
__nice
+
increment
;
nice
=
PRIO_TO_NICE
(
current
->
static_prio
)
+
increment
;
if
(
nice
<
-
20
)
nice
=
-
20
;
if
(
nice
>
19
)
...
...
@@ -996,6 +1109,27 @@ asmlinkage long sys_nice(int increment)
#endif
/*
* This is the priority value as seen by users in /proc
*
* RT tasks are offset by -200. Normal tasks are centered
* around 0, value goes from -16 to +15.
*/
int
task_prio
(
task_t
*
p
)
{
return
p
->
prio
-
100
;
}
int
task_nice
(
task_t
*
p
)
{
return
TASK_NICE
(
p
);
}
int
idle_cpu
(
int
cpu
)
{
return
cpu_curr
(
cpu
)
==
cpu_rq
(
cpu
)
->
idle
;
}
static
inline
task_t
*
find_process_by_pid
(
pid_t
pid
)
{
return
pid
?
find_task_by_pid
(
pid
)
:
current
;
...
...
@@ -1069,9 +1203,9 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param)
p
->
policy
=
policy
;
p
->
rt_priority
=
lp
.
sched_priority
;
if
(
rt_task
(
p
))
p
->
prio
=
99
-
p
->
rt_priority
;
p
->
prio
=
99
-
p
->
rt_priority
;
else
p
->
prio
=
NICE_TO_PRIO
(
p
->
__nice
)
;
p
->
prio
=
p
->
static_prio
;
if
(
array
)
activate_task
(
p
,
task_rq
(
p
));
...
...
@@ -1186,7 +1320,6 @@ asmlinkage long sys_sched_yield(void)
return
0
;
}
asmlinkage
long
sys_sched_get_priority_max
(
int
policy
)
{
int
ret
=
-
EINVAL
;
...
...
@@ -1232,7 +1365,7 @@ asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
p
=
find_process_by_pid
(
pid
);
if
(
p
)
jiffies_to_timespec
(
p
->
policy
&
SCHED_FIFO
?
0
:
NICE_TO_TIMESLICE
(
p
->
__nice
),
&
t
);
0
:
TASK_TIMESLICE
(
p
),
&
t
);
read_unlock
(
&
tasklist_lock
);
if
(
p
)
retval
=
copy_to_user
(
interval
,
&
t
,
sizeof
(
t
))
?
-
EFAULT
:
0
;
...
...
kernel/sys.c
View file @
7e54bc75
...
...
@@ -221,7 +221,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
}
if
(
error
==
-
ESRCH
)
error
=
0
;
if
(
niceval
<
p
->
__nice
&&
!
capable
(
CAP_SYS_NICE
))
if
(
niceval
<
task_nice
(
p
)
&&
!
capable
(
CAP_SYS_NICE
))
error
=
-
EACCES
;
else
set_user_nice
(
p
,
niceval
);
...
...
@@ -250,7 +250,7 @@ asmlinkage long sys_getpriority(int which, int who)
long
niceval
;
if
(
!
proc_sel
(
p
,
which
,
who
))
continue
;
niceval
=
20
-
p
->
__nice
;
niceval
=
20
-
task_nice
(
p
)
;
if
(
niceval
>
retval
)
retval
=
niceval
;
}
...
...
kernel/timer.c
View file @
7e54bc75
...
...
@@ -584,17 +584,7 @@ void update_process_times(int user_tick)
int
cpu
=
smp_processor_id
(),
system
=
user_tick
^
1
;
update_one_process
(
p
,
user_tick
,
system
,
cpu
);
if
(
p
->
pid
)
{
if
(
p
->
__nice
>
0
)
kstat
.
per_cpu_nice
[
cpu
]
+=
user_tick
;
else
kstat
.
per_cpu_user
[
cpu
]
+=
user_tick
;
kstat
.
per_cpu_system
[
cpu
]
+=
system
;
}
else
{
if
(
local_bh_count
(
cpu
)
||
local_irq_count
(
cpu
)
>
1
)
kstat
.
per_cpu_system
[
cpu
]
+=
system
;
}
scheduler_tick
(
p
);
scheduler_tick
(
user_tick
,
system
);
}
/*
...
...
mm/oom_kill.c
View file @
7e54bc75
...
...
@@ -82,7 +82,7 @@ static int badness(struct task_struct *p)
* Niced processes are most likely less important, so double
* their badness points.
*/
if
(
p
->
__nice
>
0
)
if
(
task_nice
(
p
)
>
0
)
points
*=
2
;
/*
...
...
@@ -146,7 +146,7 @@ void oom_kill_task(struct task_struct *p)
* all the memory it needs. That way it should be able to
* exit() and clear out its resources quickly...
*/
p
->
time_slice
=
2
*
MAX_TIMESLICE
;
p
->
time_slice
=
HZ
;
p
->
flags
|=
PF_MEMALLOC
|
PF_MEMDIE
;
/* This process has hardware access, be more careful. */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment