Commit 4b81e400 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] cpumask: optimize various uses of new cpumasks

From: Paul Jackson <pj@sgi.com>

Make use of for_each_cpu_mask() macro to simplify and optimize a couple of
sparc64 per-CPU loops.

Optimize a bit of cpumask code for asm-i386/mach-es7000

Convert physids_complement() to use both args in the files
include/asm-i386/mpspec.h, include/asm-x86_64/mpspec.h.

Remove cpumask hack from asm-x86_64/topology.h routine pcibus_to_cpumask().

Clarify and slightly optimize several cpumask manipulations in kernel/sched.c
Signed-off-by: default avatarPaul Jackson <pj@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 5ffa67fc
...@@ -406,14 +406,8 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c ...@@ -406,14 +406,8 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
int i; int i;
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
for (i = 0; i < NR_CPUS; i++) { for_each_cpu_mask(i, mask)
if (cpu_isset(i, mask)) { spitfire_xcall_helper(data0, data1, data2, pstate, i);
spitfire_xcall_helper(data0, data1, data2, pstate, i);
cpu_clear(i, mask);
if (cpus_empty(mask))
break;
}
}
} }
/* Cheetah now allows to send the whole 64-bytes of data in the interrupt /* Cheetah now allows to send the whole 64-bytes of data in the interrupt
...@@ -456,25 +450,19 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -456,25 +450,19 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
nack_busy_id = 0; nack_busy_id = 0;
{ {
cpumask_t work_mask = mask;
int i; int i;
for (i = 0; i < NR_CPUS; i++) { for_each_cpu_mask(i, mask) {
if (cpu_isset(i, work_mask)) { u64 target = (i << 14) | 0x70;
u64 target = (i << 14) | 0x70;
if (!is_jalapeno)
if (!is_jalapeno) target |= (nack_busy_id << 24);
target |= (nack_busy_id << 24); __asm__ __volatile__(
__asm__ __volatile__( "stxa %%g0, [%0] %1\n\t"
"stxa %%g0, [%0] %1\n\t" "membar #Sync\n\t"
"membar #Sync\n\t" : /* no outputs */
: /* no outputs */ : "r" (target), "i" (ASI_INTR_W));
: "r" (target), "i" (ASI_INTR_W)); nack_busy_id++;
nack_busy_id++;
cpu_clear(i, work_mask);
if (cpus_empty(work_mask))
break;
}
} }
} }
...@@ -507,7 +495,6 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -507,7 +495,6 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
printk("CPU[%d]: mondo stuckage result[%016lx]\n", printk("CPU[%d]: mondo stuckage result[%016lx]\n",
smp_processor_id(), dispatch_stat); smp_processor_id(), dispatch_stat);
} else { } else {
cpumask_t work_mask = mask;
int i, this_busy_nack = 0; int i, this_busy_nack = 0;
/* Delay some random time with interrupts enabled /* Delay some random time with interrupts enabled
...@@ -518,22 +505,17 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -518,22 +505,17 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
/* Clear out the mask bits for cpus which did not /* Clear out the mask bits for cpus which did not
* NACK us. * NACK us.
*/ */
for (i = 0; i < NR_CPUS; i++) { for_each_cpu_mask(i, mask) {
if (cpu_isset(i, work_mask)) { u64 check_mask;
u64 check_mask;
if (is_jalapeno)
if (is_jalapeno) check_mask = (0x2UL << (2*i));
check_mask = (0x2UL << (2*i)); else
else check_mask = (0x2UL <<
check_mask = (0x2UL << this_busy_nack);
this_busy_nack); if ((dispatch_stat & check_mask) == 0)
if ((dispatch_stat & check_mask) == 0) cpu_clear(i, mask);
cpu_clear(i, mask); this_busy_nack += 2;
this_busy_nack += 2;
cpu_clear(i, work_mask);
if (cpus_empty(work_mask))
break;
}
} }
goto retry; goto retry;
......
...@@ -10,9 +10,8 @@ static inline void send_IPI_mask(cpumask_t mask, int vector) ...@@ -10,9 +10,8 @@ static inline void send_IPI_mask(cpumask_t mask, int vector)
static inline void send_IPI_allbutself(int vector) static inline void send_IPI_allbutself(int vector)
{ {
cpumask_t mask = cpumask_of_cpu(smp_processor_id()); cpumask_t mask = cpu_online_map;
cpus_complement(mask); cpu_clear(smp_processor_id(), mask);
cpus_and(mask, mask, cpu_online_map);
if (!cpus_empty(mask)) if (!cpus_empty(mask))
send_IPI_mask(mask, vector); send_IPI_mask(mask, vector);
} }
......
...@@ -53,7 +53,7 @@ typedef struct physid_mask physid_mask_t; ...@@ -53,7 +53,7 @@ typedef struct physid_mask physid_mask_t;
#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) #define physids_clear(map) bitmap_zero((map).mask, MAX_APICS)
#define physids_complement(map) bitmap_complement((map).mask, (map).mask, MAX_APICS) #define physids_complement(dst, src) bitmap_complement((dst).mask,(src).mask, MAX_APICS)
#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS) #define physids_empty(map) bitmap_empty((map).mask, MAX_APICS)
#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS) #define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS) #define physids_weight(map) bitmap_weight((map).mask, MAX_APICS)
......
...@@ -212,7 +212,7 @@ typedef struct physid_mask physid_mask_t; ...@@ -212,7 +212,7 @@ typedef struct physid_mask physid_mask_t;
#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) #define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) #define physids_clear(map) bitmap_zero((map).mask, MAX_APICS)
#define physids_complement(map) bitmap_complement((map).mask, (map).mask, MAX_APICS) #define physids_complement(dst, src) bitmap_complement((dst).mask, (src).mask, MAX_APICS)
#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS) #define physids_empty(map) bitmap_empty((map).mask, MAX_APICS)
#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS) #define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS) #define physids_weight(map) bitmap_weight((map).mask, MAX_APICS)
......
...@@ -20,9 +20,11 @@ extern cpumask_t node_to_cpumask[]; ...@@ -20,9 +20,11 @@ extern cpumask_t node_to_cpumask[];
#define node_to_first_cpu(node) (__ffs(node_to_cpumask[node])) #define node_to_first_cpu(node) (__ffs(node_to_cpumask[node]))
#define node_to_cpumask(node) (node_to_cpumask[node]) #define node_to_cpumask(node) (node_to_cpumask[node])
static inline unsigned long pcibus_to_cpumask(int bus) static inline cpumask_t pcibus_to_cpumask(int bus)
{ {
return mp_bus_to_cpumask[bus] & cpu_online_map; cpumask_t tmp;
cpus_and(tmp, mp_bus_to_cpumask[bus], cpu_online_map);
return tmp;
} }
#define NODE_BALANCE_RATE 30 /* CHECKME */ #define NODE_BALANCE_RATE 30 /* CHECKME */
......
...@@ -696,10 +696,9 @@ static int wake_idle(int cpu, task_t *p) ...@@ -696,10 +696,9 @@ static int wake_idle(int cpu, task_t *p)
return cpu; return cpu;
cpus_and(tmp, sd->span, cpu_online_map); cpus_and(tmp, sd->span, cpu_online_map);
for_each_cpu_mask(i, tmp) { cpus_and(tmp, tmp, p->cpus_allowed);
if (!cpu_isset(i, p->cpus_allowed))
continue;
for_each_cpu_mask(i, tmp) {
if (idle_cpu(i)) if (idle_cpu(i))
return i; return i;
} }
...@@ -3335,7 +3334,7 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask) ...@@ -3335,7 +3334,7 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
runqueue_t *rq; runqueue_t *rq;
rq = task_rq_lock(p, &flags); rq = task_rq_lock(p, &flags);
if (any_online_cpu(new_mask) == NR_CPUS) { if (!cpus_intersects(new_mask, cpu_online_map)) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
...@@ -3510,8 +3509,7 @@ static void migrate_all_tasks(int src_cpu) ...@@ -3510,8 +3509,7 @@ static void migrate_all_tasks(int src_cpu)
if (dest_cpu == NR_CPUS) if (dest_cpu == NR_CPUS)
dest_cpu = any_online_cpu(tsk->cpus_allowed); dest_cpu = any_online_cpu(tsk->cpus_allowed);
if (dest_cpu == NR_CPUS) { if (dest_cpu == NR_CPUS) {
cpus_clear(tsk->cpus_allowed); cpus_setall(tsk->cpus_allowed);
cpus_complement(tsk->cpus_allowed);
dest_cpu = any_online_cpu(tsk->cpus_allowed); dest_cpu = any_online_cpu(tsk->cpus_allowed);
/* Don't tell them about moving exiting tasks /* Don't tell them about moving exiting tasks
...@@ -3827,7 +3825,7 @@ void sched_domain_debug(void) ...@@ -3827,7 +3825,7 @@ void sched_domain_debug(void)
int j; int j;
char str[NR_CPUS]; char str[NR_CPUS];
struct sched_group *group = sd->groups; struct sched_group *group = sd->groups;
cpumask_t groupmask, tmp; cpumask_t groupmask;
cpumask_scnprintf(str, NR_CPUS, sd->span); cpumask_scnprintf(str, NR_CPUS, sd->span);
cpus_clear(groupmask); cpus_clear(groupmask);
...@@ -3857,8 +3855,7 @@ void sched_domain_debug(void) ...@@ -3857,8 +3855,7 @@ void sched_domain_debug(void)
if (!cpus_weight(group->cpumask)) if (!cpus_weight(group->cpumask))
printk(" ERROR empty group:"); printk(" ERROR empty group:");
cpus_and(tmp, groupmask, group->cpumask); if (cpus_intersects(groupmask, group->cpumask))
if (cpus_weight(tmp) > 0)
printk(" ERROR repeated CPUs:"); printk(" ERROR repeated CPUs:");
cpus_or(groupmask, groupmask, group->cpumask); cpus_or(groupmask, groupmask, group->cpumask);
...@@ -3877,8 +3874,7 @@ void sched_domain_debug(void) ...@@ -3877,8 +3874,7 @@ void sched_domain_debug(void)
sd = sd->parent; sd = sd->parent;
if (sd) { if (sd) {
cpus_and(tmp, groupmask, sd->span); if (!cpus_subset(groupmask, sd->span))
if (!cpus_equal(tmp, groupmask))
printk(KERN_DEBUG "ERROR parent span is not a superset of domain->span\n"); printk(KERN_DEBUG "ERROR parent span is not a superset of domain->span\n");
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment