Commit a7159a87 authored by Anthony Yznaga's avatar Anthony Yznaga Committed by David S. Miller

sparc64: speed up etrap/rtrap on NG2 and later processors

For many sun4v processor types, reading or writing a privileged register
has a latency of 40 to 70 cycles.  Use a combination of the low-latency
allclean, otherw, normalw, and nop instructions in etrap and rtrap to
replace 2 rdpr and 5 wrpr instructions and improve etrap/rtrap
performance.  allclean, otherw, and normalw are available on NG2 and
later processors.

The average ticks to execute the flush windows trap ("ta 0x3") with and
without this patch on select platforms:

 CPU            Not patched     Patched    % Latency Reduction

 NG2            1762            1558            -11.58
 NG4            3619            3204            -11.47
 M7             3015            2624            -12.97
 SPARC64-X      829             770              -7.12
Signed-off-by: default avatarAnthony Yznaga <anthony.yznaga@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5bd0ea91
...@@ -73,6 +73,8 @@ struct sun4v_1insn_patch_entry { ...@@ -73,6 +73,8 @@ struct sun4v_1insn_patch_entry {
}; };
extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch, extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
__sun4v_1insn_patch_end; __sun4v_1insn_patch_end;
extern struct sun4v_1insn_patch_entry __fast_win_ctrl_1insn_patch,
__fast_win_ctrl_1insn_patch_end;
struct sun4v_2insn_patch_entry { struct sun4v_2insn_patch_entry {
unsigned int addr; unsigned int addr;
......
...@@ -38,7 +38,11 @@ etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1) ...@@ -38,7 +38,11 @@ etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1)
or %g1, %g3, %g1 or %g1, %g3, %g1
bne,pn %xcc, 1f bne,pn %xcc, 1f
sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2 sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
wrpr %g0, 7, %cleanwin 661: wrpr %g0, 7, %cleanwin
.section .fast_win_ctrl_1insn_patch, "ax"
.word 661b
.word 0x85880000 ! allclean
.previous
sethi %hi(TASK_REGOFF), %g2 sethi %hi(TASK_REGOFF), %g2
sethi %hi(TSTATE_PEF), %g3 sethi %hi(TSTATE_PEF), %g3
...@@ -88,16 +92,30 @@ etrap_save: save %g2, -STACK_BIAS, %sp ...@@ -88,16 +92,30 @@ etrap_save: save %g2, -STACK_BIAS, %sp
bne,pn %xcc, 3f bne,pn %xcc, 3f
mov PRIMARY_CONTEXT, %l4 mov PRIMARY_CONTEXT, %l4
rdpr %canrestore, %g3 661: rdpr %canrestore, %g3
.section .fast_win_ctrl_1insn_patch, "ax"
.word 661b
nop
.previous
rdpr %wstate, %g2 rdpr %wstate, %g2
wrpr %g0, 0, %canrestore 661: wrpr %g0, 0, %canrestore
.section .fast_win_ctrl_1insn_patch, "ax"
.word 661b
nop
.previous
sll %g2, 3, %g2 sll %g2, 3, %g2
/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */ /* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */
mov 1, %l5 mov 1, %l5
sth %l5, [%l6 + TI_SYS_NOERROR] sth %l5, [%l6 + TI_SYS_NOERROR]
wrpr %g3, 0, %otherwin 661: wrpr %g3, 0, %otherwin
.section .fast_win_ctrl_1insn_patch, "ax"
.word 661b
.word 0x87880000 ! otherw
.previous
wrpr %g2, 0, %wstate wrpr %g2, 0, %wstate
sethi %hi(sparc64_kern_pri_context), %g2 sethi %hi(sparc64_kern_pri_context), %g2
ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
......
...@@ -224,10 +224,19 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ...@@ -224,10 +224,19 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
rdpr %otherwin, %l2 rdpr %otherwin, %l2
srl %l1, 3, %l1 srl %l1, 3, %l1
wrpr %l2, %g0, %canrestore 661: wrpr %l2, %g0, %canrestore
.section .fast_win_ctrl_1insn_patch, "ax"
.word 661b
.word 0x89880000 ! normalw
.previous
wrpr %l1, %g0, %wstate wrpr %l1, %g0, %wstate
brnz,pt %l2, user_rtt_restore brnz,pt %l2, user_rtt_restore
wrpr %g0, %g0, %otherwin 661: wrpr %g0, %g0, %otherwin
.section .fast_win_ctrl_1insn_patch, "ax"
.word 661b
nop
.previous
ldx [%g6 + TI_FLAGS], %g3 ldx [%g6 + TI_FLAGS], %g3
wr %g0, ASI_AIUP, %asi wr %g0, ASI_AIUP, %asi
......
...@@ -300,6 +300,11 @@ static void __init sun4v_patch(void) ...@@ -300,6 +300,11 @@ static void __init sun4v_patch(void)
break; break;
} }
if (sun4v_chip_type != SUN4V_CHIP_NIAGARA1) {
sun4v_patch_1insn_range(&__fast_win_ctrl_1insn_patch,
&__fast_win_ctrl_1insn_patch_end);
}
sun4v_hvapi_init(); sun4v_hvapi_init();
} }
......
...@@ -159,6 +159,11 @@ SECTIONS ...@@ -159,6 +159,11 @@ SECTIONS
*(.pud_huge_patch) *(.pud_huge_patch)
__pud_huge_patch_end = .; __pud_huge_patch_end = .;
} }
.fast_win_ctrl_1insn_patch : {
__fast_win_ctrl_1insn_patch = .;
*(.fast_win_ctrl_1insn_patch)
__fast_win_ctrl_1insn_patch_end = .;
}
PERCPU_SECTION(SMP_CACHE_BYTES) PERCPU_SECTION(SMP_CACHE_BYTES)
#ifdef CONFIG_JUMP_LABEL #ifdef CONFIG_JUMP_LABEL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment