Commit 326f372c authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: SLB rewrite

From: Anton Blanchard <anton@samba.org>

The current SLB handling code has a number of problems:

- We loop trying to find an empty SLB entry before deciding to cast one
  out.  On large working sets this really hurts since the SLB is always full
  and we end up looping through all 64 entries unnecessarily.

- During castout we currently invalidate the entry we are replacing.  This
  is to avoid a nasty race where the entry is in the ERAT but not the SLB and
  another cpu does a tlbie that removes the ERAT at a critical point.  If
  this race is fixed the SLB can be removed.

- The SLB prefault code doesnt work properly

The following patch addresses all the above concerns and adds some more
optimisations:

- feature nop out some segment table only code

- slb invalidate the kernel segment on context switch (avoids us having to
  slb invalidate at each cast out)

- optimise flush on context switch, the lazy tlb stuff avoids it being
  called when going from userspace to kernel thread, but it gets called when
  going to kernel thread to userspace.  In many cases we are returning to the
  same userspace task, we now check for this and avoid the flush

- use the optimised POWER4 mtcrf where possible
parent 79c57724
...@@ -646,12 +646,14 @@ fast_exception_return: ...@@ -646,12 +646,14 @@ fast_exception_return:
*/ */
.globl DataAccess_common .globl DataAccess_common
DataAccess_common: DataAccess_common:
BEGIN_FTR_SECTION
mfspr r22,DAR mfspr r22,DAR
srdi r22,r22,60 srdi r22,r22,60
cmpi 0,r22,0xc cmpi 0,r22,0xc
/* Segment fault on a bolted segment. Go off and map that segment. */ /* Segment fault on a bolted segment. Go off and map that segment. */
beq- .do_stab_bolted beq- .do_stab_bolted
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
stab_bolted_user_return: stab_bolted_user_return:
EXCEPTION_PROLOG_COMMON EXCEPTION_PROLOG_COMMON
ld r3,_DSISR(r1) ld r3,_DSISR(r1)
...@@ -661,10 +663,12 @@ stab_bolted_user_return: ...@@ -661,10 +663,12 @@ stab_bolted_user_return:
rlwinm r4,r3,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */ rlwinm r4,r3,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */
ld r3,_DAR(r1) /* into the hash table */ ld r3,_DAR(r1) /* into the hash table */
BEGIN_FTR_SECTION
beq+ 2f /* If so handle it */ beq+ 2f /* If so handle it */
li r4,0x300 /* Trap number */ li r4,0x300 /* Trap number */
bl .do_stab_SI bl .do_stab_SI
b 1f b 1f
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
2: li r5,0x300 2: li r5,0x300
bl .do_hash_page_DSI /* Try to handle as hpte fault */ bl .do_hash_page_DSI /* Try to handle as hpte fault */
...@@ -690,7 +694,7 @@ DataAccessSLB_common: ...@@ -690,7 +694,7 @@ DataAccessSLB_common:
EXCEPTION_PROLOG_COMMON EXCEPTION_PROLOG_COMMON
ld r3,_DAR(r1) ld r3,_DAR(r1)
li r4,0x380 /* Exception vector */ li r4,0x380 /* Exception vector */
bl .ste_allocate bl .slb_allocate
or. r3,r3,r3 /* Check return code */ or. r3,r3,r3 /* Check return code */
beq fast_exception_return /* Return if we succeeded */ beq fast_exception_return /* Return if we succeeded */
addi r3,r1,STACK_FRAME_OVERHEAD addi r3,r1,STACK_FRAME_OVERHEAD
...@@ -705,12 +709,14 @@ DataAccessSLB_common: ...@@ -705,12 +709,14 @@ DataAccessSLB_common:
InstructionAccess_common: InstructionAccess_common:
EXCEPTION_PROLOG_COMMON EXCEPTION_PROLOG_COMMON
BEGIN_FTR_SECTION
andis. r0,r23,0x0020 /* no ste found? */ andis. r0,r23,0x0020 /* no ste found? */
beq+ 2f beq+ 2f
mr r3,r22 /* SRR0 at interrupt */ mr r3,r22 /* SRR0 at interrupt */
li r4,0x400 /* Trap number */ li r4,0x400 /* Trap number */
bl .do_stab_SI bl .do_stab_SI
b 1f b 1f
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
2: mr r3,r22 2: mr r3,r22
li r5,0x400 li r5,0x400
...@@ -730,7 +736,7 @@ InstructionAccessSLB_common: ...@@ -730,7 +736,7 @@ InstructionAccessSLB_common:
EXCEPTION_PROLOG_COMMON EXCEPTION_PROLOG_COMMON
mr r3,r22 /* SRR0 = NIA */ mr r3,r22 /* SRR0 = NIA */
li r4,0x480 /* Exception vector */ li r4,0x480 /* Exception vector */
bl .ste_allocate bl .slb_allocate
or. r3,r3,r3 /* Check return code */ or. r3,r3,r3 /* Check return code */
beq+ fast_exception_return /* Return if we succeeded */ beq+ fast_exception_return /* Return if we succeeded */
...@@ -1006,48 +1012,27 @@ _GLOBAL(do_stab_bolted) ...@@ -1006,48 +1012,27 @@ _GLOBAL(do_stab_bolted)
* r20 - r23, SRR0 and SRR1 are saved in the exception frame. * r20 - r23, SRR0 and SRR1 are saved in the exception frame.
* We assume we aren't going to take any exceptions during this procedure. * We assume we aren't going to take any exceptions during this procedure.
*/ */
/* XXX note fix masking in get_kernel_vsid to match */
_GLOBAL(do_slb_bolted) _GLOBAL(do_slb_bolted)
stw r23,EX_CCR(r21) /* save CR in exc. frame */ stw r23,EX_CCR(r21) /* save CR in exc. frame */
/* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ /*
mfspr r21,DAR * We take the next entry, round robin. Previously we tried
rldicl r20,r21,36,32 /* Permits a full 32b of ESID */ * to find a free slot first but that took too long. Unfortunately
rldicr r20,r20,15,48 * we dont have any LRU information to help us choose a slot.
rldicl r21,r21,4,60 */
or r20,r20,r21
li r21,9 /* VSID_RANDOMIZER */
sldi r21,r21,32
oris r21,r21,58231
ori r21,r21,39831
mulld r20,r20,r21
clrldi r20,r20,28 /* r20 = vsid */
/* Search the SLB for a free entry */
li r22,1
1:
slbmfee r23,r22
rldicl r23,r23,37,63
cmpwi r23,0
beq 4f /* Found an invalid entry */
addi r22,r22,1
cmpldi r22,64
blt 1b
/* No free entry - just take the next entry, round-robin */ /* r20 = paca */
/* XXX we should get the number of SLB entries from the naca */ /* use a cpu feature mask if we ever change our slb size */
SLB_NUM_ENTRIES = 64 SLB_NUM_ENTRIES = 64
2: mfspr r21,SPRG3 1: ld r22,PACASTABRR(r20)
ld r22,PACASTABRR(r21) addi r21,r22,1
addi r23,r22,1 cmpdi r21,SLB_NUM_ENTRIES
cmpdi r23,SLB_NUM_ENTRIES blt+ 2f
blt 3f li r21,1 /* dont touch bolted slot 0 */
li r23,1 2: std r21,PACASTABRR(r20)
3: std r23,PACASTABRR(r21)
/* r20 = vsid, r22 = entry */ /* r20 = paca, r22 = entry */
/* /*
* Never cast out the segment for our kernel stack. Since we * Never cast out the segment for our kernel stack. Since we
...@@ -1056,48 +1041,86 @@ SLB_NUM_ENTRIES = 64 ...@@ -1056,48 +1041,86 @@ SLB_NUM_ENTRIES = 64
* which gets invalidated due to a tlbie from another cpu at a * which gets invalidated due to a tlbie from another cpu at a
* non recoverable point (after setting srr0/1) - Anton * non recoverable point (after setting srr0/1) - Anton
*/ */
slbmfee r23,r22 slbmfee r21,r22
srdi r23,r23,28 srdi r21,r21,27
/* /*
* This is incorrect (r1 is not the kernel stack) if we entered * This is incorrect (r1 is not the kernel stack) if we entered
* from userspace but there is no critical window from userspace * from userspace but there is no critical window from userspace
* so this should be OK. Also if we cast out the userspace stack * so this should be OK. Also if we cast out the userspace stack
* segment while in userspace we will fault it straight back in. * segment while in userspace we will fault it straight back in.
*/ */
srdi r21,r1,28 srdi r23,r1,27
cmpd r21,r23 ori r23,r23,1
beq- 2b cmpd r23,r21
beq- 1b
/* Put together the vsid portion of the entry. */
4: li r21,0 /* r20 = paca, r22 = entry */
rldimi r21,r20,12,0
ori r20,r21,1024 /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
ori r20,r20,128 /* set class bit for kernel region */ mfspr r21,DAR
#ifndef CONFIG_PPC_ISERIES rldicl r23,r21,36,51
ori r20,r20,256 /* map kernel region with large ptes */ sldi r23,r23,15
#endif srdi r21,r21,60
or r23,r23,r21
/* VSID_RANDOMIZER */
li r21,9
sldi r21,r21,32
oris r21,r21,58231
ori r21,r21,39831
/* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */
mulld r23,r23,r21
clrldi r23,r23,28
/* r20 = paca, r22 = entry, r23 = vsid */
/* Put together slb word1 */
sldi r23,r23,12
BEGIN_FTR_SECTION
/* set kp and c bits */
ori r23,r23,0x480
END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
BEGIN_FTR_SECTION
/* set kp, l and c bits */
ori r23,r23,0x580
END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
/* r20 = paca, r22 = entry, r23 = slb word1 */
/* Put together slb word0 */
mfspr r21,DAR
rldicr r21,r21,0,35 /* get the new esid */
oris r21,r21,2048 /* set valid bit */
rldimi r21,r22,0,52 /* insert entry */
/* Put together the esid portion of the entry. */ /* r20 = paca, r21 = slb word0, r23 = slb word1 */
mfspr r21,DAR /* Get the new esid */
rldicl r21,r21,36,28 /* Permits a full 36b of ESID */
li r23,0
rldimi r23,r21,28,0 /* Insert esid */
oris r21,r23,2048 /* valid bit */
rldimi r21,r22,0,52 /* Insert entry */
/* /*
* No need for an isync before or after this slbmte. The exception * No need for an isync before or after this slbmte. The exception
* we enter with and the rfid we exit with are context synchronizing . * we enter with and the rfid we exit with are context synchronizing .
*/ */
slbmte r20,r21 slbmte r23,r21
/* All done -- return from exception. */ /* All done -- return from exception. */
mfsprg r20,3 /* Load the PACA pointer */ ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */
ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ addi r21,r21,EXC_FRAME_SIZE
addi r21,r21,EXC_FRAME_SIZE
lwz r23,EX_CCR(r21) /* get saved CR */ lwz r23,EX_CCR(r21) /* get saved CR */
/* note that this is almost identical to maskable_exception_exit */ /* note that this is almost identical to maskable_exception_exit */
mtcr r23 /* restore CR */
/*
* Until everyone updates binutils hardwire the POWER4 optimised
* single field mtcrf
*/
#if 0
.machine push
.machine "power4"
mtcrf 0x80,r23
.machine pop
#else
.long 0x7ef80120
#endif
mfmsr r22 mfmsr r22
li r23, MSR_RI li r23, MSR_RI
...@@ -1107,10 +1130,10 @@ SLB_NUM_ENTRIES = 64 ...@@ -1107,10 +1130,10 @@ SLB_NUM_ENTRIES = 64
ld r22,EX_SRR0(r21) /* Get SRR0 from exc. frame */ ld r22,EX_SRR0(r21) /* Get SRR0 from exc. frame */
ld r23,EX_SRR1(r21) /* Get SRR1 from exc. frame */ ld r23,EX_SRR1(r21) /* Get SRR1 from exc. frame */
mtspr SRR0,r22 mtspr SRR0,r22
mtspr SRR1,r23 mtspr SRR1,r23
ld r22,EX_R22(r21) /* restore r22 and r23 */ ld r22,EX_R22(r21) /* restore r22 and r23 */
ld r23,EX_R23(r21) ld r23,EX_R23(r21)
mfspr r20,SPRG2 ld r20,EX_R20(r21)
mfspr r21,SPRG1 mfspr r21,SPRG1
rfid rfid
......
...@@ -41,7 +41,6 @@ struct systemcfg *systemcfg; ...@@ -41,7 +41,6 @@ struct systemcfg *systemcfg;
.xStab_data = { \ .xStab_data = { \
.real = (asrr), /* Real pointer to segment table */ \ .real = (asrr), /* Real pointer to segment table */ \
.virt = (asrv), /* Virt pointer to segment table */ \ .virt = (asrv), /* Virt pointer to segment table */ \
.next_round_robin = 1 /* Round robin index */ \
}, \ }, \
.lpQueuePtr = (lpq), /* &xItLpQueue, */ \ .lpQueuePtr = (lpq), /* &xItLpQueue, */ \
/* .xRtas = { \ /* .xRtas = { \
......
...@@ -151,7 +151,31 @@ struct task_struct *__switch_to(struct task_struct *prev, ...@@ -151,7 +151,31 @@ struct task_struct *__switch_to(struct task_struct *prev,
local_irq_save(flags); local_irq_save(flags);
last = _switch(old_thread, new_thread); last = _switch(old_thread, new_thread);
/*
* force our kernel stack out of the ERAT and SLB, this is to
* avoid the race where we it hangs around in the ERAT but not the
* SLB and the ERAT gets invalidated at just the wrong moment by
* another CPU doing a tlbie.
*
* We definitely dont want to flush our bolted segment, so check
* for that first.
*/
if ((cur_cpu_spec->cpu_features & CPU_FTR_SLB) &&
GET_ESID((unsigned long)_get_SP()) != GET_ESID(PAGE_OFFSET)) {
union {
unsigned long word0;
slb_dword0 data;
} esid_data;
esid_data.word0 = 0;
/* class bit is in valid field for slbie instruction */
esid_data.data.v = 1;
esid_data.data.esid = GET_ESID((unsigned long)_get_SP());
asm volatile("isync; slbie %0; isync" : : "r" (esid_data));
}
local_irq_restore(flags); local_irq_restore(flags);
return last; return last;
} }
......
This diff is collapsed.
...@@ -135,10 +135,17 @@ extern firmware_feature_t firmware_features_table[]; ...@@ -135,10 +135,17 @@ extern firmware_feature_t firmware_features_table[];
#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \
PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU)
#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_SLB | CPU_FTR_16M_PAGE | \ #define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \
CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \ CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
CPU_FTR_NODSISRALIGN) CPU_FTR_NODSISRALIGN)
/* iSeries doesn't support large pages */
#ifdef CONFIG_PPC_ISERIES
#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE)
#else
#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE)
#endif
#define COMMON_PPC64_FW (0) #define COMMON_PPC64_FW (0)
#endif #endif
......
...@@ -27,14 +27,6 @@ typedef unsigned long mm_context_t; ...@@ -27,14 +27,6 @@ typedef unsigned long mm_context_t;
#define CONTEXT_LOW_HPAGES 0 #define CONTEXT_LOW_HPAGES 0
#endif #endif
/*
* Define the size of the cache used for segment table entries. The first
* entry is used as a cache pointer, therefore the actual number of entries
* stored is one less than defined here. Do not change this value without
* considering the impact it will have on the layout of the paca in paca.h.
*/
#define STAB_CACHE_SIZE 16
/* /*
* Hardware Segment Lookaside Buffer Entry * Hardware Segment Lookaside Buffer Entry
* This structure has been padded out to two 64b doublewords (actual SLBE's are * This structure has been padded out to two 64b doublewords (actual SLBE's are
......
...@@ -139,6 +139,7 @@ destroy_context(struct mm_struct *mm) ...@@ -139,6 +139,7 @@ destroy_context(struct mm_struct *mm)
} }
extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm);
extern void flush_slb(struct task_struct *tsk, struct mm_struct *mm);
/* /*
* switch_mm is the entry point called from the architecture independent * switch_mm is the entry point called from the architecture independent
...@@ -154,7 +155,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -154,7 +155,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
: : ); : : );
#endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_ALTIVEC */
flush_stab(tsk, next);
/* No need to flush userspace segments if the mm doesnt change */
if (prev == next)
return;
if (cur_cpu_spec->cpu_features & CPU_FTR_SLB)
flush_slb(tsk, next);
else
flush_stab(tsk, next);
cpu_set(smp_processor_id(), next->cpu_vm_mask); cpu_set(smp_processor_id(), next->cpu_vm_mask);
} }
......
...@@ -63,20 +63,15 @@ struct paca_struct { ...@@ -63,20 +63,15 @@ struct paca_struct {
u16 xPacaIndex; /* Logical processor number 0x18 */ u16 xPacaIndex; /* Logical processor number 0x18 */
u16 xHwProcNum; /* Physical processor number 0x1A */ u16 xHwProcNum; /* Physical processor number 0x1A */
u32 default_decr; /* Default decrementer value 0x1c */ u32 default_decr; /* Default decrementer value 0x1c */
u64 unused1; u64 xKsave; /* Saved Kernel stack addr or zero 0x20 */
u64 xKsave; /* Saved Kernel stack addr or zero 0x28 */ u64 pvr; /* Processor version register 0x28 */
u64 pvr; /* Processor version register 0x30 */ struct ItLpQueue *lpQueuePtr; /* LpQueue handled by this processor 0x30 */
u8 *exception_sp; /* 0x38 */ u64 xTOC; /* Kernel TOC address 0x38 */
STAB xStab_data; /* Segment table information 0x40,0x48,0x50 */
struct ItLpQueue *lpQueuePtr; /* LpQueue handled by this processor 0x40 */ u8 *exception_sp; /* 0x58 */
u64 xTOC; /* Kernel TOC address 0x48 */ u8 xProcEnabled; /* 0x59 */
STAB xStab_data; /* Segment table information 0x50,0x58,0x60 */ u8 prof_enabled; /* 1=iSeries profiling enabled 0x60 */
u8 xSegments[STAB_CACHE_SIZE]; /* Cache of used stab entries 0x68,0x70 */ u8 resv1[30]; /* 0x61-0x7F */
u8 xProcEnabled; /* 1=soft enabled 0x78 */
u8 unused2;
u8 prof_enabled; /* 1=iSeries profiling enabled 0x7A */
u8 stab_cache_pointer;
u8 resv1[4]; /* 0x7B-0x7F */
/*===================================================================================== /*=====================================================================================
* CACHE_LINE_2 0x0080 - 0x00FF * CACHE_LINE_2 0x0080 - 0x00FF
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment