Commit ba9ccbcf authored by David Mosberger's avatar David Mosberger

ia64: Finish the fsyscall support (finally!). Now fsyscall stubs will

	run faster than break-based syscall stubs, even if there is
	no light-weight syscall handler.
	Adds a new boot command-line option "nolwsys" which can be used
	to turn off light-weight system call handlers.  Good for
	performance measurement and (potentially) for debugging.
parent ce2070ec
...@@ -1464,3 +1464,6 @@ sys_call_table: ...@@ -1464,3 +1464,6 @@ sys_call_table:
data8 ia64_ni_syscall data8 ia64_ni_syscall
data8 ia64_ni_syscall data8 ia64_ni_syscall
data8 ia64_ni_syscall data8 ia64_ni_syscall
data8 ia64_ni_syscall
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
This diff is collapsed.
...@@ -11,12 +11,10 @@ ...@@ -11,12 +11,10 @@
#include <asm/sigcontext.h> #include <asm/sigcontext.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/page.h>
.section .text.gate, "ax" .section .text.gate, "ax"
.start_gate: .start_gate:
#if CONFIG_FSYS #if CONFIG_FSYS
#include <asm/errno.h> #include <asm/errno.h>
...@@ -49,6 +47,7 @@ END(syscall_via_break) ...@@ -49,6 +47,7 @@ END(syscall_via_break)
* all other "scratch" registers: undefined * all other "scratch" registers: undefined
* all "preserved" registers: same as on entry * all "preserved" registers: same as on entry
*/ */
GLOBAL_ENTRY(syscall_via_epc) GLOBAL_ENTRY(syscall_via_epc)
.prologue .prologue
.altrp b6 .altrp b6
...@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc) ...@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc)
} }
;; ;;
rsm psr.be rsm psr.be
movl r18=fsyscall_table movl r14=fsyscall_table
mov r16=IA64_KR(CURRENT) mov r16=IA64_KR(CURRENT) // 12 cycle read latency
mov r19=255 mov r19=NR_syscalls-1
;;
shladd r18=r17,3,r18
cmp.geu p6,p0=r19,r17 // (syscall > 0 && syscall <= 1024+255)?
;; ;;
shladd r18=r17,3,r14
srlz.d // ensure little-endian byteorder is in effect srlz.d // ensure little-endian byteorder is in effect
cmp.ne p8,p0=r0,r0 // p8 <- FALSE
/* Note: if r17 is a NaT, p6 will be set to zero. */
cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
;;
(p6) ld8 r18=[r18] (p6) ld8 r18=[r18]
mov r29=psr // read psr (12 cyc load latency)
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;; ;;
(p6) mov b7=r18 (p6) mov b7=r18
(p6) tbit.z p8,p0=r18,0
(p8) br.dptk.many b7
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
#if 1/*def CONFIG_ITANIUM*/
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
(p6) br.sptk.many b7 (p6) br.sptk.many b7
#else
/* We can't do this until gate is a proper ELF DSO. */
(p6) brl.cond.sptk fsys_bubble_down
#endif
mov r10=-1 mov r10=-1
mov r8=ENOSYS mov r8=ENOSYS
...@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc) ...@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc)
br.ret.sptk.many b6 br.ret.sptk.many b6
END(syscall_via_epc) END(syscall_via_epc)
#if 0
GLOBAL_ENTRY(fsys_fallback_syscall)
/*
* It would be better/fsyser to do the SAVE_MIN magic directly here, but for now
* we simply fall back on doing a system-call via break. Good enough
* to get started. (Note: we have to do this through the gate page again, since
* the br.ret will switch us back to user-level privilege.)
*
* XXX Move this back to fsys.S after changing it over to avoid break 0x100000.
*/
movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
;;
MCKINLEY_E9_WORKAROUND
mov b7=r2
br.ret.sptk.many b7
END(fsys_fallback_syscall)
#endif
#endif /* CONFIG_FSYS */ #endif /* CONFIG_FSYS */
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) # define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
......
...@@ -637,7 +637,6 @@ END(daccess_bit) ...@@ -637,7 +637,6 @@ END(daccess_bit)
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
ENTRY(break_fault) ENTRY(break_fault)
.global ia64_enter_syscall
/* /*
* The streamlined system call entry/exit paths only save/restore the initial part * The streamlined system call entry/exit paths only save/restore the initial part
* of pt_regs. This implies that the callers of system-calls must adhere to the * of pt_regs. This implies that the callers of system-calls must adhere to the
...@@ -654,7 +653,7 @@ ENTRY(break_fault) ...@@ -654,7 +653,7 @@ ENTRY(break_fault)
* to prevent leaking bits from kernel to user level. * to prevent leaking bits from kernel to user level.
*/ */
DBG_FAULT(11) DBG_FAULT(11)
mov r16=IA64_KR(CURRENT) // r16 = current (physical); 12 cycle read lat. mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
mov r17=cr.iim mov r17=cr.iim
mov r18=__IA64_BREAK_SYSCALL mov r18=__IA64_BREAK_SYSCALL
mov r21=ar.fpsr mov r21=ar.fpsr
...@@ -673,7 +672,7 @@ ENTRY(break_fault) ...@@ -673,7 +672,7 @@ ENTRY(break_fault)
;; ;;
ld1 r17=[r16] // load current->thread.on_ustack flag ld1 r17=[r16] // load current->thread.on_ustack flag
st1 [r16]=r0 // clear current->thread.on_ustack flag st1 [r16]=r0 // clear current->thread.on_ustack flag
adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
;; ;;
invala invala
...@@ -682,6 +681,7 @@ ENTRY(break_fault) ...@@ -682,6 +681,7 @@ ENTRY(break_fault)
extr.u r8=r29,41,2 // extract ei field from cr.ipsr extr.u r8=r29,41,2 // extract ei field from cr.ipsr
;; ;;
cmp.eq p6,p7=2,r8 // isr.ei==2? cmp.eq p6,p7=2,r8 // isr.ei==2?
mov r2=r1 // setup r2 for ia64_syscall_setup
;; ;;
(p6) mov r8=0 // clear ei to 0 (p6) mov r8=0 // clear ei to 0
(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped (p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
...@@ -691,19 +691,25 @@ ENTRY(break_fault) ...@@ -691,19 +691,25 @@ ENTRY(break_fault)
dep r29=r8,r29,41,2 // insert new ei into cr.ipsr dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
;; ;;
ia64_enter_syscall:
// switch from user to kernel RBS: // switch from user to kernel RBS:
MINSTATE_START_SAVE_MIN_VIRT MINSTATE_START_SAVE_MIN_VIRT
br.call.sptk.many b7=setup_syscall_via_break br.call.sptk.many b7=ia64_syscall_setup
;; ;;
mov r3=255 MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS
;;
srlz.i // guarantee that interruption collection is on
;;
(p15) ssm psr.i // restore psr.i
;;
mov r3=NR_syscalls - 1
movl r16=sys_call_table movl r16=sys_call_table
adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
movl r2=ia64_ret_from_syscall movl r2=ia64_ret_from_syscall
;; ;;
shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ? cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
mov rp=r2 // set the real return addr mov rp=r2 // set the real return addr
;; ;;
(p7) add r20=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall (p7) add r20=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall
...@@ -764,11 +770,44 @@ END(interrupt) ...@@ -764,11 +770,44 @@ END(interrupt)
* fault ever gets "unreserved", simply moved the following code to a more * fault ever gets "unreserved", simply moved the following code to a more
* suitable spot... * suitable spot...
* *
* setup_syscall_via_break() is a separate subroutine so that it can * ia64_syscall_setup() is a separate subroutine so that it can
* allocate stacked registers so it can safely demine any * allocate stacked registers so it can safely demine any
* potential NaT values from the input registers. * potential NaT values from the input registers.
*
* On entry:
* - executing on bank 0 or bank 1 register set (doesn't matter)
* - r1: stack pointer
* - r2: current task pointer
* - r3: preserved
* - r11: original contents (saved ar.pfs to be saved)
* - r12: original contents (sp to be saved)
* - r13: original contents (tp to be saved)
* - r15: original contents (syscall # to be saved)
* - r18: saved bsp (after switching to kernel stack)
* - r19: saved b6
* - r20: saved r1 (gp)
* - r21: saved ar.fpsr
* - r22: kernel's register backing store base (krbs_base)
* - r23: saved ar.bspstore
* - r24: saved ar.rnat
* - r25: saved ar.unat
* - r26: saved ar.pfs
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
* - executing on bank 1 registers
* - psr.ic enabled, interrupts restored
* - r1: kernel's gp
* - r3: preserved (same as on entry)
* - r12: points to kernel stack
* - r13: points to current task
* - p15: TRUE if interrupts need to be re-enabled
* - ar.fpsr: set to kernel settings
*/ */
ENTRY(setup_syscall_via_break) GLOBAL_ENTRY(ia64_syscall_setup)
#if PT(B6) != 0 #if PT(B6) != 0
# error This code assumes that b6 is the first field in pt_regs. # error This code assumes that b6 is the first field in pt_regs.
#endif #endif
...@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break) ...@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break)
;; ;;
st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
mov r28=b0 mov r28=b0 // save b0 (2 cyc)
(p8) mov in0=-1 (p8) mov in0=-1
;; ;;
...@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break) ...@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break)
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
(p14) mov in6=-1 (p14) mov in6=-1
mov r13=IA64_KR(CURRENT) // establish `current' mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer movl r1=__gp // establish kernel global pointer
;; ;;
(p8) mov in7=-1 (p8) mov in7=-1
tnat.nz p9,p0=r15 tnat.nz p9,p0=r15
MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
movl r17=FPSR_DEFAULT movl r17=FPSR_DEFAULT
;; ;;
srlz.i // guarantee that interruption collection is on
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
(p9) mov r15=-1
(p15) ssm psr.i // restore psr.i
mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
(p9) mov r15=-1
br.ret.sptk.many b7 br.ret.sptk.many b7
END(setup_syscall_via_break) END(ia64_syscall_setup)
.org ia64_ivt+0x3c00 .org ia64_ivt+0x3c00
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/sal.h> #include <asm/sal.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/tlb.h> #include <asm/tlb.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
...@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg) ...@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg)
return 0; return 0;
} }
#ifdef CONFIG_FSYS
/*
* Boot command-line option "nolwsys" can be used to disable the use of any light-weight
* system call handler. When this option is in effect, all fsyscalls will end up bubbling
* down into the kernel and calling the normal (heavy-weight) syscall handler. This is
* useful for performance testing, but conceivably could also come in handy for debugging
* purposes.
*/
static int nolwsys;
static int __init
nolwsys_setup (char *s)
{
nolwsys = 1;
return 1;
}
__setup("nolwsys", nolwsys_setup);
#endif /* CONFIG_FSYS */
void void
mem_init (void) mem_init (void)
{ {
...@@ -622,6 +646,25 @@ mem_init (void) ...@@ -622,6 +646,25 @@ mem_init (void)
if (num_pgt_pages > (u64) pgt_cache_water[1]) if (num_pgt_pages > (u64) pgt_cache_water[1])
pgt_cache_water[1] = num_pgt_pages; pgt_cache_water[1] = num_pgt_pages;
#ifdef CONFIG_FSYS
{
int i;
/*
* For fsyscall entrpoints with no light-weight handler, use the ordinary
* (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
* code can tell them apart.
*/
for (i = 0; i < NR_syscalls; ++i) {
extern unsigned long fsyscall_table[NR_syscalls];
extern unsigned long sys_call_table[NR_syscalls];
if (!fsyscall_table[i] || nolwsys)
fsyscall_table[i] = sys_call_table[i] | 1;
}
}
#endif
/* install the gate page in the global page table: */ /* install the gate page in the global page table: */
put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR); put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR);
......
...@@ -247,6 +247,8 @@ ...@@ -247,6 +247,8 @@
#define __NR_sys_clock_getres 1255 #define __NR_sys_clock_getres 1255
#define __NR_sys_clock_nanosleep 1256 #define __NR_sys_clock_nanosleep 1256
#define NR_syscalls 256 /* length of syscall table */
#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr); extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment