Commit 92704a1c authored by David S. Miller's avatar David S. Miller Committed by David S. Miller

[SPARC64]: Refine code sequences to get the cpu id.

On uniprocessor, it's always zero for optimize that.

On SMP, the jmpl to the stub kills the return address stack in the cpu
branch prediction logic, so expand the code sequence inline and use a
code patching section to fix things up.  This also always better and
explicit register selection, which will be taken advantage of in a
future changeset.

The hard_smp_processor_id() function is big, so do not inline it.

Fix up tests for Jalapeno to also test for Serrano chips too.  These
tests want "jbus Ultra-IIIi" cases to match, so that is what we should
test for.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f4e841da
...@@ -1628,84 +1628,10 @@ __flushw_user: ...@@ -1628,84 +1628,10 @@ __flushw_user:
2: retl 2: retl
nop nop
/* Read cpu ID from hardware, return in %g6. #ifdef CONFIG_SMP
* (callers_pc - 4) is in %g1. Patched at boot time. .globl hard_smp_processor_id
* hard_smp_processor_id:
* Default is spitfire implementation. __GET_CPUID(%o0)
*
* The instruction sequence needs to be 5 instructions
* in order to fit the longest implementation, which is
* currently starfire.
*/
.align 32
.globl __get_cpu_id
__get_cpu_id:
ldxa [%g0] ASI_UPA_CONFIG, %g6
srlx %g6, 17, %g6
jmpl %g1 + 0x4, %g0
and %g6, 0x1f, %g6
nop
__get_cpu_id_cheetah_safari:
ldxa [%g0] ASI_SAFARI_CONFIG, %g6
srlx %g6, 17, %g6
jmpl %g1 + 0x4, %g0
and %g6, 0x3ff, %g6
nop
__get_cpu_id_cheetah_jbus:
ldxa [%g0] ASI_JBUS_CONFIG, %g6
srlx %g6, 17, %g6
jmpl %g1 + 0x4, %g0
and %g6, 0x1f, %g6
nop
__get_cpu_id_starfire:
sethi %hi(0x1fff40000d0 >> 9), %g6
sllx %g6, 9, %g6
or %g6, 0xd0, %g6
jmpl %g1 + 0x4, %g0
lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6
.globl per_cpu_patch
per_cpu_patch:
sethi %hi(this_is_starfire), %o0
lduw [%o0 + %lo(this_is_starfire)], %o1
sethi %hi(__get_cpu_id_starfire), %o0
brnz,pn %o1, 10f
or %o0, %lo(__get_cpu_id_starfire), %o0
sethi %hi(tlb_type), %o0
lduw [%o0 + %lo(tlb_type)], %o1
brz,pt %o1, 11f
nop
rdpr %ver, %o0
srlx %o0, 32, %o0
sethi %hi(0x003e0016), %o1
or %o1, %lo(0x003e0016), %o1
cmp %o0, %o1
sethi %hi(__get_cpu_id_cheetah_jbus), %o0
be,pn %icc, 10f
or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0
sethi %hi(__get_cpu_id_cheetah_safari), %o0
or %o0, %lo(__get_cpu_id_cheetah_safari), %o0
10:
sethi %hi(__get_cpu_id), %o1
or %o1, %lo(__get_cpu_id), %o1
lduw [%o0 + 0x00], %o2
stw %o2, [%o1 + 0x00]
flush %o1 + 0x00
lduw [%o0 + 0x04], %o2
stw %o2, [%o1 + 0x04]
flush %o1 + 0x04
lduw [%o0 + 0x08], %o2
stw %o2, [%o1 + 0x08]
flush %o1 + 0x08
lduw [%o0 + 0x0c], %o2
stw %o2, [%o1 + 0x0c]
flush %o1 + 0x0c
lduw [%o0 + 0x10], %o2
stw %o2, [%o1 + 0x10]
flush %o1 + 0x10
11:
retl retl
nop nop
#endif
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/cpudata.h> #include <asm/cpudata.h>
#include <asm/auxio.h> #include <asm/auxio.h>
#include <asm/head.h>
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static void distribute_irqs(void); static void distribute_irqs(void);
...@@ -153,7 +154,8 @@ void enable_irq(unsigned int irq) ...@@ -153,7 +154,8 @@ void enable_irq(unsigned int irq)
unsigned long ver; unsigned long ver;
__asm__ ("rdpr %%ver, %0" : "=r" (ver)); __asm__ ("rdpr %%ver, %0" : "=r" (ver));
if ((ver >> 32) == 0x003e0016) { if ((ver >> 32) == __JALAPENO_ID ||
(ver >> 32) == __SERRANO_ID) {
/* We set it to our JBUS ID. */ /* We set it to our JBUS ID. */
__asm__ __volatile__("ldxa [%%g0] %1, %0" __asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (tid) : "=r" (tid)
......
...@@ -490,6 +490,58 @@ void register_prom_callbacks(void) ...@@ -490,6 +490,58 @@ void register_prom_callbacks(void)
"' linux-.soft2 to .soft2"); "' linux-.soft2 to .soft2");
} }
static void __init per_cpu_patch(void)
{
#ifdef CONFIG_SMP
struct cpuid_patch_entry *p;
unsigned long ver;
int is_jbus;
if (tlb_type == spitfire && !this_is_starfire)
return;
__asm__ ("rdpr %%ver, %0" : "=r" (ver));
is_jbus = ((ver >> 32) == __JALAPENO_ID ||
(ver >> 32) == __SERRANO_ID);
p = &__cpuid_patch;
while (p < &__cpuid_patch_end) {
unsigned long addr = p->addr;
unsigned int *insns;
switch (tlb_type) {
case spitfire:
insns = &p->starfire[0];
break;
case cheetah:
case cheetah_plus:
if (is_jbus)
insns = &p->cheetah_jbus[0];
else
insns = &p->cheetah_safari[0];
break;
default:
prom_printf("Unknown cpu type, halting.\n");
prom_halt();
};
*(unsigned int *) (addr + 0) = insns[0];
__asm__ __volatile__("flush %0" : : "r" (addr + 0));
*(unsigned int *) (addr + 4) = insns[1];
__asm__ __volatile__("flush %0" : : "r" (addr + 4));
*(unsigned int *) (addr + 8) = insns[2];
__asm__ __volatile__("flush %0" : : "r" (addr + 8));
*(unsigned int *) (addr + 12) = insns[3];
__asm__ __volatile__("flush %0" : : "r" (addr + 12));
p++;
}
#endif
}
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
/* Initialize PROM console and command line. */ /* Initialize PROM console and command line. */
...@@ -507,8 +559,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -507,8 +559,8 @@ void __init setup_arch(char **cmdline_p)
/* Work out if we are starfire early on */ /* Work out if we are starfire early on */
check_if_starfire(); check_if_starfire();
/* Now we know enough to patch the __get_cpu_id() /* Now we know enough to patch the get_cpuid sequences
* trampoline used by trap code. * used by trap code.
*/ */
per_cpu_patch(); per_cpu_patch();
......
...@@ -424,7 +424,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c ...@@ -424,7 +424,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{ {
u64 pstate, ver; u64 pstate, ver;
int nack_busy_id, is_jalapeno; int nack_busy_id, is_jbus;
if (cpus_empty(mask)) if (cpus_empty(mask))
return; return;
...@@ -434,7 +434,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -434,7 +434,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
* derivative processor. * derivative processor.
*/ */
__asm__ ("rdpr %%ver, %0" : "=r" (ver)); __asm__ ("rdpr %%ver, %0" : "=r" (ver));
is_jalapeno = ((ver >> 32) == 0x003e0016); is_jbus = ((ver >> 32) == __JALAPENO_ID ||
(ver >> 32) == __SERRANO_ID);
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
...@@ -459,7 +460,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -459,7 +460,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
for_each_cpu_mask(i, mask) { for_each_cpu_mask(i, mask) {
u64 target = (i << 14) | 0x70; u64 target = (i << 14) | 0x70;
if (!is_jalapeno) if (!is_jbus)
target |= (nack_busy_id << 24); target |= (nack_busy_id << 24);
__asm__ __volatile__( __asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t" "stxa %%g0, [%0] %1\n\t"
...@@ -512,7 +513,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -512,7 +513,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
for_each_cpu_mask(i, mask) { for_each_cpu_mask(i, mask) {
u64 check_mask; u64 check_mask;
if (is_jalapeno) if (is_jbus)
check_mask = (0x2UL << (2*i)); check_mask = (0x2UL << (2*i));
else else
check_mask = (0x2UL << check_mask = (0x2UL <<
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/timer.h> #include <asm/timer.h>
#include <asm/kdebug.h> #include <asm/kdebug.h>
#include <asm/head.h>
#ifdef CONFIG_KMOD #ifdef CONFIG_KMOD
#include <linux/kmod.h> #include <linux/kmod.h>
#endif #endif
...@@ -788,7 +789,8 @@ void __init cheetah_ecache_flush_init(void) ...@@ -788,7 +789,8 @@ void __init cheetah_ecache_flush_init(void)
cheetah_error_log[i].afsr = CHAFSR_INVALID; cheetah_error_log[i].afsr = CHAFSR_INVALID;
__asm__ ("rdpr %%ver, %0" : "=r" (ver)); __asm__ ("rdpr %%ver, %0" : "=r" (ver));
if ((ver >> 32) == 0x003e0016) { if ((ver >> 32) == __JALAPENO_ID ||
(ver >> 32) == __SERRANO_ID) {
cheetah_error_table = &__jalapeno_error_table[0]; cheetah_error_table = &__jalapeno_error_table[0];
cheetah_afsr_errors = JPAFSR_ERRORS; cheetah_afsr_errors = JPAFSR_ERRORS;
} else if ((ver >> 32) == 0x003e0015) { } else if ((ver >> 32) == 0x003e0015) {
......
...@@ -74,6 +74,9 @@ SECTIONS ...@@ -74,6 +74,9 @@ SECTIONS
__tsb_phys_patch = .; __tsb_phys_patch = .;
.tsb_phys_patch : { *(.tsb_phys_patch) } .tsb_phys_patch : { *(.tsb_phys_patch) }
__tsb_phys_patch_end = .; __tsb_phys_patch_end = .;
__cpuid_patch = .;
.cpuid_patch : { *(.cpuid_patch) }
__cpuid_patch_end = .;
. = ALIGN(8192); . = ALIGN(8192);
__initramfs_start = .; __initramfs_start = .;
.init.ramfs : { *(.init.ramfs) } .init.ramfs : { *(.init.ramfs) }
......
...@@ -60,9 +60,18 @@ struct trap_per_cpu { ...@@ -60,9 +60,18 @@ struct trap_per_cpu {
} __attribute__((aligned(64))); } __attribute__((aligned(64)));
extern struct trap_per_cpu trap_block[NR_CPUS]; extern struct trap_per_cpu trap_block[NR_CPUS];
extern void init_cur_cpu_trap(void); extern void init_cur_cpu_trap(void);
extern void per_cpu_patch(void);
extern void setup_tba(void); extern void setup_tba(void);
#ifdef CONFIG_SMP
struct cpuid_patch_entry {
unsigned int addr;
unsigned int cheetah_safari[4];
unsigned int cheetah_jbus[4];
unsigned int starfire[4];
};
extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
#endif
#endif /* !(__ASSEMBLY__) */ #endif /* !(__ASSEMBLY__) */
#define TRAP_PER_CPU_THREAD 0x00 #define TRAP_PER_CPU_THREAD 0x00
...@@ -70,35 +79,58 @@ extern void setup_tba(void); ...@@ -70,35 +79,58 @@ extern void setup_tba(void);
#define TRAP_BLOCK_SZ_SHIFT 6 #define TRAP_BLOCK_SZ_SHIFT 6
/* Clobbers %g1, loads %g6 with local processor's cpuid */ #ifdef CONFIG_SMP
#define __GET_CPUID \
ba,pt %xcc, __get_cpu_id; \ #define __GET_CPUID(REG) \
rd %pc, %g1; /* Spitfire implementation (default). */ \
661: ldxa [%g0] ASI_UPA_CONFIG, REG; \
srlx REG, 17, REG; \
and REG, 0x1f, REG; \
nop; \
.section .cpuid_patch, "ax"; \
/* Instruction location. */ \
.word 661b; \
/* Cheetah Safari implementation. */ \
ldxa [%g0] ASI_SAFARI_CONFIG, REG; \
srlx REG, 17, REG; \
and REG, 0x3ff, REG; \
nop; \
/* Cheetah JBUS implementation. */ \
ldxa [%g0] ASI_JBUS_CONFIG, REG; \
srlx REG, 17, REG; \
and REG, 0x1f, REG; \
nop; \
/* Starfire implementation. */ \
sethi %hi(0x1fff40000d0 >> 9), REG; \
sllx REG, 9, REG; \
or REG, 0xd0, REG; \
lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\
.previous;
/* Clobbers %g1, current address space PGD phys address into %g7. */ /* Clobbers %g1, current address space PGD phys address into %g7. */
#define TRAP_LOAD_PGD_PHYS \ #define TRAP_LOAD_PGD_PHYS \
__GET_CPUID \ __GET_CPUID(%g1) \
sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \
sethi %hi(trap_block), %g7; \ sethi %hi(trap_block), %g7; \
sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \
or %g7, %lo(trap_block), %g7; \ or %g7, %lo(trap_block), %g7; \
add %g7, %g6, %g7; \ add %g7, %g1, %g7; \
ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
/* Clobbers %g1, loads local processor's IRQ work area into %g6. */ /* Clobbers %g1, loads local processor's IRQ work area into %g6. */
#define TRAP_LOAD_IRQ_WORK \ #define TRAP_LOAD_IRQ_WORK \
__GET_CPUID \ __GET_CPUID(%g1) \
sethi %hi(__irq_work), %g1; \ sethi %hi(__irq_work), %g6; \
sllx %g6, 6, %g6; \ sllx %g1, 6, %g1; \
or %g1, %lo(__irq_work), %g1; \ or %g6, %lo(__irq_work), %g6; \
add %g1, %g6, %g6; add %g6, %g1, %g6;
/* Clobbers %g1, loads %g6 with current thread info pointer. */ /* Clobbers %g1, loads %g6 with current thread info pointer. */
#define TRAP_LOAD_THREAD_REG \ #define TRAP_LOAD_THREAD_REG \
__GET_CPUID \ __GET_CPUID(%g1) \
sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ sethi %hi(trap_block), %g6; \
sethi %hi(trap_block), %g1; \ sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \
or %g1, %lo(trap_block), %g1; \ or %g6, %lo(trap_block), %g6; \
ldx [%g1 + %g6], %g6; ldx [%g6 + %g1], %g6;
/* Given the current thread info pointer in %g6, load the per-cpu /* Given the current thread info pointer in %g6, load the per-cpu
* area base of the current processor into %g5. REG1, REG2, and REG3 are * area base of the current processor into %g5. REG1, REG2, and REG3 are
...@@ -109,7 +141,6 @@ extern void setup_tba(void); ...@@ -109,7 +141,6 @@ extern void setup_tba(void);
* trap will load the fully resolved %g5 per-cpu base. This can corrupt * trap will load the fully resolved %g5 per-cpu base. This can corrupt
* the calculations done by the macro mid-stream. * the calculations done by the macro mid-stream.
*/ */
#ifdef CONFIG_SMP
#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \ #define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \
ldub [%g6 + TI_CPU], REG1; \ ldub [%g6 + TI_CPU], REG1; \
sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(__per_cpu_shift), REG3; \
...@@ -118,8 +149,26 @@ extern void setup_tba(void); ...@@ -118,8 +149,26 @@ extern void setup_tba(void);
ldx [REG2 + %lo(__per_cpu_base)], REG2; \ ldx [REG2 + %lo(__per_cpu_base)], REG2; \
sllx REG1, REG3, REG3; \ sllx REG1, REG3, REG3; \
add REG3, REG2, %g5; add REG3, REG2, %g5;
#else #else
/* Uniprocessor versions, we know the cpuid is zero. */
#define TRAP_LOAD_PGD_PHYS \
sethi %hi(trap_block), %g7; \
or %g7, %lo(trap_block), %g7; \
ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
#define TRAP_LOAD_IRQ_WORK \
sethi %hi(__irq_work), %g6; \
or %g6, %lo(__irq_work), %g6;
#define TRAP_LOAD_THREAD_REG \
sethi %hi(trap_block), %g6; \
ldx [%g6 + %lo(trap_block)], %g6;
/* No per-cpu areas on uniprocessor, so no need to load %g5. */
#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) #define LOAD_PER_CPU_BASE(REG1, REG2, REG3)
#endif
#endif /* !(CONFIG_SMP) */
#endif /* _SPARC64_CPUDATA_H */ #endif /* _SPARC64_CPUDATA_H */
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#define __CHEETAH_ID 0x003e0014 #define __CHEETAH_ID 0x003e0014
#define __JALAPENO_ID 0x003e0016 #define __JALAPENO_ID 0x003e0016
#define __SERRANO_ID 0x003e0022
#define CHEETAH_MANUF 0x003e #define CHEETAH_MANUF 0x003e
#define CHEETAH_IMPL 0x0014 /* Ultra-III */ #define CHEETAH_IMPL 0x0014 /* Ultra-III */
......
...@@ -37,33 +37,7 @@ extern cpumask_t phys_cpu_present_map; ...@@ -37,33 +37,7 @@ extern cpumask_t phys_cpu_present_map;
* General functions that each host system must provide. * General functions that each host system must provide.
*/ */
static __inline__ int hard_smp_processor_id(void) extern int hard_smp_processor_id(void);
{
if (tlb_type == cheetah || tlb_type == cheetah_plus) {
unsigned long cfg, ver;
__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
if ((ver >> 32) == 0x003e0016) {
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (cfg)
: "i" (ASI_JBUS_CONFIG));
return ((cfg >> 17) & 0x1f);
} else {
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (cfg)
: "i" (ASI_SAFARI_CONFIG));
return ((cfg >> 17) & 0x3ff);
}
} else if (this_is_starfire != 0) {
return starfire_hard_smp_processor_id();
} else {
unsigned long upaconfig;
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (upaconfig)
: "i" (ASI_UPA_CONFIG));
return ((upaconfig >> 17) & 0x1f);
}
}
#define raw_smp_processor_id() (current_thread_info()->cpu) #define raw_smp_processor_id() (current_thread_info()->cpu)
extern void smp_setup_cpu_possible_map(void); extern void smp_setup_cpu_possible_map(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment