Commit ba9ccbcf authored by David Mosberger's avatar David Mosberger

ia64: Finish the fsyscall support (finally!). Now fsyscall stubs will

	run faster than break-based syscall stubs, even if there is
	no light-weight syscall handler.
	Adds a new boot command-line option "nolwsys" which can be used
	to turn off light-weight system call handlers.  Good for
	performance measurement and (potentially) for debugging.
parent ce2070ec
......@@ -1464,3 +1464,6 @@ sys_call_table:
data8 ia64_ni_syscall
data8 ia64_ni_syscall
data8 ia64_ni_syscall
data8 ia64_ni_syscall
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
......@@ -16,6 +16,7 @@
#include <asm/thread_info.h>
#include <asm/sal.h>
#include <asm/system.h>
#include <asm/unistd.h>
#include "entry.h"
......@@ -41,59 +42,10 @@
* ar.pfs = previous frame-state (as passed into the fsyscall handler)
*/
#if 1
ENTRY(fsys_fallback_syscall)
/*
* This is called for system calls which are entered via epc, but don't
* have a light-weight handler. We need to bubble down into the kernel,
* and that requires setting up a minimal pt_regs structure, and initializing
* the CPU state more or less as if an interruption had occurred. To make
* syscall-restarts work, we setup pt_regs such that cr_iip points to the
* second instruction in syscall_via_break. Decrementing the IP hence will
* restart the syscall via break and not decrementing IP will return us
* to the caller, as usual.
*/
# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
| IA64_PSR_DT | IA64_PSR_RT)
/*
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
* to synthesize.
*/
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN)
mov r29=psr
movl r9=PSR_PRESERVED_BITS
mov r20=r1
movl r8=PSR_ONE_BITS
;;
mov r1=IA64_KR(CURRENT) // r16 = current (physical); 12 cycle read lat.
and r9=r9,r29
or r29=r8,r29
;;
mov psr.l=r9 // slam the door
mov r21=ar.fpsr
mov r26=ar.pfs
mov r25=ar.unat
mov r27=ar.rsc
mov r19=b6
;;
srlz.i // ensure new psr.l has been established
movl r28=GATE_ADDR // cr.iip XXX fix me!! Should be: GATE_ADDR(syscall_via_break)
invala
mov r31=pr
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r1
;;
st1 [r16]=r0 // clear current->thread.on_ustack flag
cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
br.cond.sptk.many ia64_enter_syscall
END(fsys_fallback_syscall)
#endif
ENTRY(fsys_ni_syscall)
.prologue
.altrp b6
.body
mov r8=ENOSYS
mov r10=-1
MCKINLEY_E9_WORKAROUND
......@@ -101,6 +53,9 @@ ENTRY(fsys_ni_syscall)
END(fsys_ni_syscall)
ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
......@@ -116,6 +71,9 @@ ENTRY(fsys_getpid)
END(fsys_getpid)
ENTRY(fsys_getppid)
.prologue
.altrp b6
.body
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
;;
ld8 r17=[r17] // r17 = current->group_leader
......@@ -161,6 +119,9 @@ ENTRY(fsys_getppid)
END(fsys_getppid)
ENTRY(fsys_set_tid_address)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
......@@ -200,6 +161,9 @@ END(fsys_set_tid_address)
*/
ENTRY(fsys_gettimeofday)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
movl r3=THIS_CPU(cpu_info)
......@@ -213,7 +177,7 @@ ENTRY(fsys_gettimeofday)
movl r19=xtime // xtime is a timespec struct
ld8 r10=[r10] // r10 <- __per_cpu_offset[0]
movl r21=cpu_info__per_cpu
movl r21=THIS_CPU(cpu_info)
;;
add r10=r21, r10 // r10 <- &cpu_data(time_keeper_id)
tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT
......@@ -368,262 +332,373 @@ EX(.fail, st8 [r9]=r3) // store them in the timeval struct
br.ret.spnt.many b6 // return with r8 set to EINVAL
END(fsys_gettimeofday)
ENTRY(fsys_fallback_syscall)
.prologue
.altrp b6
.body
/*
* We only get here from light-weight syscall handlers. Thus, we already
* know that r15 contains a valid syscall number. No need to re-check.
*/
adds r17=-1024,r15
movl r14=sys_call_table
;;
shladd r18=r17,3,r14
;;
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
mov r29=psr // read psr (12 cyc load latency)
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
END(fsys_fallback_syscall)
/* FALL THROUGH */
GLOBAL_ENTRY(fsys_bubble_down)
.prologue
.altrp b6
.body
/*
* We get here for syscalls that don't have a lightweight handler. For those, we
* need to bubble down into the kernel and that requires setting up a minimal
* pt_regs structure, and initializing the CPU state more or less as if an
* interruption had occurred. To make syscall-restarts work, we setup pt_regs
* such that cr_iip points to the second instruction in syscall_via_break.
* Decrementing the IP hence will restart the syscall via break and not
* decrementing IP will return us to the caller, as usual. Note that we preserve
* the value of psr.pp rather than initializing it from dcr.pp. This makes it
* possible to distinguish fsyscall execution from other privileged execution.
*
* On entry:
* - normal fsyscall handler register usage, except that we also have:
* - r18: address of syscall entry point
* - r21: ar.fpsr
* - r26: ar.pfs
* - r27: ar.rsc
* - r29: psr
*/
# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
| IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_RT | IA64_PSR_IC)
/*
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
* to synthesize.
*/
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN)
invala
movl r8=PSR_ONE_BITS
mov r25=ar.unat // save ar.unat (5 cyc)
movl r9=PSR_PRESERVED_BITS
mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
movl r28=GATE_ADDR // cr.iip XXX fix me!! Should be: GATE_ADDR(syscall_via_break)
;;
mov r23=ar.bspstore // save ar.bspstore (12 cyc)
mov r31=pr // save pr (2 cyc)
mov r20=r1 // save caller's gp in r20
;;
mov r2=r16 // copy current task addr to addl-addressable register
and r9=r9,r29
mov r19=b6 // save b6 (2 cyc)
;;
mov psr.l=r9 // slam the door (17 cyc to srlz.i)
or r29=r8,r29 // construct cr.ipsr value to save
addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
;;
mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
lfetch.fault.excl.nt1 [r22]
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
// ensure previous insn group is issued before we stall for srlz.i:
;;
srlz.i // ensure new psr.l has been established
/////////////////////////////////////////////////////////////////////////////
////////// from this point on, execution is not interruptible anymore
/////////////////////////////////////////////////////////////////////////////
addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
;;
st1 [r16]=r0 // clear current->thread.on_ustack flag
mov ar.bspstore=r22 // switch to kernel RBS
mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
add r3=TI_FLAGS+IA64_TASK_SIZE,r2
;;
ld4 r3=[r3] // r2 = current_thread_info()->flags
mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
br.call.sptk.many b7=ia64_syscall_setup
;;
ssm psr.i
movl r2=ia64_ret_from_syscall
;;
mov rp=r2 // set the real return addr
tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
(p8) br.call.sptk.many b6=b6 // ignore this return addr
br.cond.sptk ia64_trace_syscall
END(fsys_bubble_down)
.rodata
.align 8
.globl fsyscall_table
data8 fsys_bubble_down
fsyscall_table:
data8 fsys_ni_syscall
data8 fsys_fallback_syscall // exit // 1025
data8 fsys_fallback_syscall // read
data8 fsys_fallback_syscall // write
data8 fsys_fallback_syscall // open
data8 fsys_fallback_syscall // close
data8 fsys_fallback_syscall // creat // 1030
data8 fsys_fallback_syscall // link
data8 fsys_fallback_syscall // unlink
data8 fsys_fallback_syscall // execve
data8 fsys_fallback_syscall // chdir
data8 fsys_fallback_syscall // fchdir // 1035
data8 fsys_fallback_syscall // utimes
data8 fsys_fallback_syscall // mknod
data8 fsys_fallback_syscall // chmod
data8 fsys_fallback_syscall // chown
data8 fsys_fallback_syscall // lseek // 1040
data8 0 // exit // 1025
data8 0 // read
data8 0 // write
data8 0 // open
data8 0 // close
data8 0 // creat // 1030
data8 0 // link
data8 0 // unlink
data8 0 // execve
data8 0 // chdir
data8 0 // fchdir // 1035
data8 0 // utimes
data8 0 // mknod
data8 0 // chmod
data8 0 // chown
data8 0 // lseek // 1040
data8 fsys_getpid // getpid
data8 fsys_getppid // getppid
data8 fsys_fallback_syscall // mount
data8 fsys_fallback_syscall // umount
data8 fsys_fallback_syscall // setuid // 1045
data8 fsys_fallback_syscall // getuid
data8 fsys_fallback_syscall // geteuid
data8 fsys_fallback_syscall // ptrace
data8 fsys_fallback_syscall // access
data8 fsys_fallback_syscall // sync // 1050
data8 fsys_fallback_syscall // fsync
data8 fsys_fallback_syscall // fdatasync
data8 fsys_fallback_syscall // kill
data8 fsys_fallback_syscall // rename
data8 fsys_fallback_syscall // mkdir // 1055
data8 fsys_fallback_syscall // rmdir
data8 fsys_fallback_syscall // dup
data8 fsys_fallback_syscall // pipe
data8 fsys_fallback_syscall // times
data8 fsys_fallback_syscall // brk // 1060
data8 fsys_fallback_syscall // setgid
data8 fsys_fallback_syscall // getgid
data8 fsys_fallback_syscall // getegid
data8 fsys_fallback_syscall // acct
data8 fsys_fallback_syscall // ioctl // 1065
data8 fsys_fallback_syscall // fcntl
data8 fsys_fallback_syscall // umask
data8 fsys_fallback_syscall // chroot
data8 fsys_fallback_syscall // ustat
data8 fsys_fallback_syscall // dup2 // 1070
data8 fsys_fallback_syscall // setreuid
data8 fsys_fallback_syscall // setregid
data8 fsys_fallback_syscall // getresuid
data8 fsys_fallback_syscall // setresuid
data8 fsys_fallback_syscall // getresgid // 1075
data8 fsys_fallback_syscall // setresgid
data8 fsys_fallback_syscall // getgroups
data8 fsys_fallback_syscall // setgroups
data8 fsys_fallback_syscall // getpgid
data8 fsys_fallback_syscall // setpgid // 1080
data8 fsys_fallback_syscall // setsid
data8 fsys_fallback_syscall // getsid
data8 fsys_fallback_syscall // sethostname
data8 fsys_fallback_syscall // setrlimit
data8 fsys_fallback_syscall // getrlimit // 1085
data8 fsys_fallback_syscall // getrusage
data8 0 // mount
data8 0 // umount
data8 0 // setuid // 1045
data8 0 // getuid
data8 0 // geteuid
data8 0 // ptrace
data8 0 // access
data8 0 // sync // 1050
data8 0 // fsync
data8 0 // fdatasync
data8 0 // kill
data8 0 // rename
data8 0 // mkdir // 1055
data8 0 // rmdir
data8 0 // dup
data8 0 // pipe
data8 0 // times
data8 0 // brk // 1060
data8 0 // setgid
data8 0 // getgid
data8 0 // getegid
data8 0 // acct
data8 0 // ioctl // 1065
data8 0 // fcntl
data8 0 // umask
data8 0 // chroot
data8 0 // ustat
data8 0 // dup2 // 1070
data8 0 // setreuid
data8 0 // setregid
data8 0 // getresuid
data8 0 // setresuid
data8 0 // getresgid // 1075
data8 0 // setresgid
data8 0 // getgroups
data8 0 // setgroups
data8 0 // getpgid
data8 0 // setpgid // 1080
data8 0 // setsid
data8 0 // getsid
data8 0 // sethostname
data8 0 // setrlimit
data8 0 // getrlimit // 1085
data8 0 // getrusage
data8 fsys_gettimeofday // gettimeofday
data8 fsys_fallback_syscall // settimeofday
data8 fsys_fallback_syscall // select
data8 fsys_fallback_syscall // poll // 1090
data8 fsys_fallback_syscall // symlink
data8 fsys_fallback_syscall // readlink
data8 fsys_fallback_syscall // uselib
data8 fsys_fallback_syscall // swapon
data8 fsys_fallback_syscall // swapoff // 1095
data8 fsys_fallback_syscall // reboot
data8 fsys_fallback_syscall // truncate
data8 fsys_fallback_syscall // ftruncate
data8 fsys_fallback_syscall // fchmod
data8 fsys_fallback_syscall // fchown // 1100
data8 fsys_fallback_syscall // getpriority
data8 fsys_fallback_syscall // setpriority
data8 fsys_fallback_syscall // statfs
data8 fsys_fallback_syscall // fstatfs
data8 fsys_fallback_syscall // gettid // 1105
data8 fsys_fallback_syscall // semget
data8 fsys_fallback_syscall // semop
data8 fsys_fallback_syscall // semctl
data8 fsys_fallback_syscall // msgget
data8 fsys_fallback_syscall // msgsnd // 1110
data8 fsys_fallback_syscall // msgrcv
data8 fsys_fallback_syscall // msgctl
data8 fsys_fallback_syscall // shmget
data8 fsys_fallback_syscall // shmat
data8 fsys_fallback_syscall // shmdt // 1115
data8 fsys_fallback_syscall // shmctl
data8 fsys_fallback_syscall // syslog
data8 fsys_fallback_syscall // setitimer
data8 fsys_fallback_syscall // getitimer
data8 fsys_fallback_syscall // 1120
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // vhangup
data8 fsys_fallback_syscall // lchown
data8 fsys_fallback_syscall // remap_file_pages // 1125
data8 fsys_fallback_syscall // wait4
data8 fsys_fallback_syscall // sysinfo
data8 fsys_fallback_syscall // clone
data8 fsys_fallback_syscall // setdomainname
data8 fsys_fallback_syscall // newuname // 1130
data8 fsys_fallback_syscall // adjtimex
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // init_module
data8 fsys_fallback_syscall // delete_module
data8 fsys_fallback_syscall // 1135
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // quotactl
data8 fsys_fallback_syscall // bdflush
data8 fsys_fallback_syscall // sysfs
data8 fsys_fallback_syscall // personality // 1140
data8 fsys_fallback_syscall // afs_syscall
data8 fsys_fallback_syscall // setfsuid
data8 fsys_fallback_syscall // setfsgid
data8 fsys_fallback_syscall // getdents
data8 fsys_fallback_syscall // flock // 1145
data8 fsys_fallback_syscall // readv
data8 fsys_fallback_syscall // writev
data8 fsys_fallback_syscall // pread64
data8 fsys_fallback_syscall // pwrite64
data8 fsys_fallback_syscall // sysctl // 1150
data8 fsys_fallback_syscall // mmap
data8 fsys_fallback_syscall // munmap
data8 fsys_fallback_syscall // mlock
data8 fsys_fallback_syscall // mlockall
data8 fsys_fallback_syscall // mprotect // 1155
data8 fsys_fallback_syscall // mremap
data8 fsys_fallback_syscall // msync
data8 fsys_fallback_syscall // munlock
data8 fsys_fallback_syscall // munlockall
data8 fsys_fallback_syscall // sched_getparam // 1160
data8 fsys_fallback_syscall // sched_setparam
data8 fsys_fallback_syscall // sched_getscheduler
data8 fsys_fallback_syscall // sched_setscheduler
data8 fsys_fallback_syscall // sched_yield
data8 fsys_fallback_syscall // sched_get_priority_max // 1165
data8 fsys_fallback_syscall // sched_get_priority_min
data8 fsys_fallback_syscall // sched_rr_get_interval
data8 fsys_fallback_syscall // nanosleep
data8 fsys_fallback_syscall // nfsservctl
data8 fsys_fallback_syscall // prctl // 1170
data8 fsys_fallback_syscall // getpagesize
data8 fsys_fallback_syscall // mmap2
data8 fsys_fallback_syscall // pciconfig_read
data8 fsys_fallback_syscall // pciconfig_write
data8 fsys_fallback_syscall // perfmonctl // 1175
data8 fsys_fallback_syscall // sigaltstack
data8 fsys_fallback_syscall // rt_sigaction
data8 fsys_fallback_syscall // rt_sigpending
data8 fsys_fallback_syscall // rt_sigprocmask
data8 fsys_fallback_syscall // rt_sigqueueinfo // 1180
data8 fsys_fallback_syscall // rt_sigreturn
data8 fsys_fallback_syscall // rt_sigsuspend
data8 fsys_fallback_syscall // rt_sigtimedwait
data8 fsys_fallback_syscall // getcwd
data8 fsys_fallback_syscall // capget // 1185
data8 fsys_fallback_syscall // capset
data8 fsys_fallback_syscall // sendfile
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // socket // 1190
data8 fsys_fallback_syscall // bind
data8 fsys_fallback_syscall // connect
data8 fsys_fallback_syscall // listen
data8 fsys_fallback_syscall // accept
data8 fsys_fallback_syscall // getsockname // 1195
data8 fsys_fallback_syscall // getpeername
data8 fsys_fallback_syscall // socketpair
data8 fsys_fallback_syscall // send
data8 fsys_fallback_syscall // sendto
data8 fsys_fallback_syscall // recv // 1200
data8 fsys_fallback_syscall // recvfrom
data8 fsys_fallback_syscall // shutdown
data8 fsys_fallback_syscall // setsockopt
data8 fsys_fallback_syscall // getsockopt
data8 fsys_fallback_syscall // sendmsg // 1205
data8 fsys_fallback_syscall // recvmsg
data8 fsys_fallback_syscall // pivot_root
data8 fsys_fallback_syscall // mincore
data8 fsys_fallback_syscall // madvise
data8 fsys_fallback_syscall // newstat // 1210
data8 fsys_fallback_syscall // newlstat
data8 fsys_fallback_syscall // newfstat
data8 fsys_fallback_syscall // clone2
data8 fsys_fallback_syscall // getdents64
data8 fsys_fallback_syscall // getunwind // 1215
data8 fsys_fallback_syscall // readahead
data8 fsys_fallback_syscall // setxattr
data8 fsys_fallback_syscall // lsetxattr
data8 fsys_fallback_syscall // fsetxattr
data8 fsys_fallback_syscall // getxattr // 1220
data8 fsys_fallback_syscall // lgetxattr
data8 fsys_fallback_syscall // fgetxattr
data8 fsys_fallback_syscall // listxattr
data8 fsys_fallback_syscall // llistxattr
data8 fsys_fallback_syscall // flistxattr // 1225
data8 fsys_fallback_syscall // removexattr
data8 fsys_fallback_syscall // lremovexattr
data8 fsys_fallback_syscall // fremovexattr
data8 fsys_fallback_syscall // tkill
data8 fsys_fallback_syscall // futex // 1230
data8 fsys_fallback_syscall // sched_setaffinity
data8 fsys_fallback_syscall // sched_getaffinity
data8 0 // settimeofday
data8 0 // select
data8 0 // poll // 1090
data8 0 // symlink
data8 0 // readlink
data8 0 // uselib
data8 0 // swapon
data8 0 // swapoff // 1095
data8 0 // reboot
data8 0 // truncate
data8 0 // ftruncate
data8 0 // fchmod
data8 0 // fchown // 1100
data8 0 // getpriority
data8 0 // setpriority
data8 0 // statfs
data8 0 // fstatfs
data8 0 // gettid // 1105
data8 0 // semget
data8 0 // semop
data8 0 // semctl
data8 0 // msgget
data8 0 // msgsnd // 1110
data8 0 // msgrcv
data8 0 // msgctl
data8 0 // shmget
data8 0 // shmat
data8 0 // shmdt // 1115
data8 0 // shmctl
data8 0 // syslog
data8 0 // setitimer
data8 0 // getitimer
data8 0 // 1120
data8 0
data8 0
data8 0 // vhangup
data8 0 // lchown
data8 0 // remap_file_pages // 1125
data8 0 // wait4
data8 0 // sysinfo
data8 0 // clone
data8 0 // setdomainname
data8 0 // newuname // 1130
data8 0 // adjtimex
data8 0
data8 0 // init_module
data8 0 // delete_module
data8 0 // 1135
data8 0
data8 0 // quotactl
data8 0 // bdflush
data8 0 // sysfs
data8 0 // personality // 1140
data8 0 // afs_syscall
data8 0 // setfsuid
data8 0 // setfsgid
data8 0 // getdents
data8 0 // flock // 1145
data8 0 // readv
data8 0 // writev
data8 0 // pread64
data8 0 // pwrite64
data8 0 // sysctl // 1150
data8 0 // mmap
data8 0 // munmap
data8 0 // mlock
data8 0 // mlockall
data8 0 // mprotect // 1155
data8 0 // mremap
data8 0 // msync
data8 0 // munlock
data8 0 // munlockall
data8 0 // sched_getparam // 1160
data8 0 // sched_setparam
data8 0 // sched_getscheduler
data8 0 // sched_setscheduler
data8 0 // sched_yield
data8 0 // sched_get_priority_max // 1165
data8 0 // sched_get_priority_min
data8 0 // sched_rr_get_interval
data8 0 // nanosleep
data8 0 // nfsservctl
data8 0 // prctl // 1170
data8 0 // getpagesize
data8 0 // mmap2
data8 0 // pciconfig_read
data8 0 // pciconfig_write
data8 0 // perfmonctl // 1175
data8 0 // sigaltstack
data8 0 // rt_sigaction
data8 0 // rt_sigpending
data8 0 // rt_sigprocmask
data8 0 // rt_sigqueueinfo // 1180
data8 0 // rt_sigreturn
data8 0 // rt_sigsuspend
data8 0 // rt_sigtimedwait
data8 0 // getcwd
data8 0 // capget // 1185
data8 0 // capset
data8 0 // sendfile
data8 0
data8 0
data8 0 // socket // 1190
data8 0 // bind
data8 0 // connect
data8 0 // listen
data8 0 // accept
data8 0 // getsockname // 1195
data8 0 // getpeername
data8 0 // socketpair
data8 0 // send
data8 0 // sendto
data8 0 // recv // 1200
data8 0 // recvfrom
data8 0 // shutdown
data8 0 // setsockopt
data8 0 // getsockopt
data8 0 // sendmsg // 1205
data8 0 // recvmsg
data8 0 // pivot_root
data8 0 // mincore
data8 0 // madvise
data8 0 // newstat // 1210
data8 0 // newlstat
data8 0 // newfstat
data8 0 // clone2
data8 0 // getdents64
data8 0 // getunwind // 1215
data8 0 // readahead
data8 0 // setxattr
data8 0 // lsetxattr
data8 0 // fsetxattr
data8 0 // getxattr // 1220
data8 0 // lgetxattr
data8 0 // fgetxattr
data8 0 // listxattr
data8 0 // llistxattr
data8 0 // flistxattr // 1225
data8 0 // removexattr
data8 0 // lremovexattr
data8 0 // fremovexattr
data8 0 // tkill
data8 0 // futex // 1230
data8 0 // sched_setaffinity
data8 0 // sched_getaffinity
data8 fsys_set_tid_address // set_tid_address
data8 fsys_fallback_syscall // unused
data8 fsys_fallback_syscall // unused // 1235
data8 fsys_fallback_syscall // exit_group
data8 fsys_fallback_syscall // lookup_dcookie
data8 fsys_fallback_syscall // io_setup
data8 fsys_fallback_syscall // io_destroy
data8 fsys_fallback_syscall // io_getevents // 1240
data8 fsys_fallback_syscall // io_submit
data8 fsys_fallback_syscall // io_cancel
data8 fsys_fallback_syscall // epoll_create
data8 fsys_fallback_syscall // epoll_ctl
data8 fsys_fallback_syscall // epoll_wait // 1245
data8 fsys_fallback_syscall // restart_syscall
data8 fsys_fallback_syscall // semtimedop
data8 fsys_fallback_syscall // timer_create
data8 fsys_fallback_syscall // timer_settime
data8 fsys_fallback_syscall // timer_gettime // 1250
data8 fsys_fallback_syscall // timer_getoverrun
data8 fsys_fallback_syscall // timer_delete
data8 fsys_fallback_syscall // clock_settime
data8 fsys_fallback_syscall // clock_gettime
data8 fsys_fallback_syscall // clock_getres // 1255
data8 fsys_fallback_syscall // clock_nanosleep
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // 1260
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // 1265
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // 1270
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall // 1275
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 fsys_fallback_syscall
data8 0 // unused
data8 0 // unused // 1235
data8 0 // exit_group
data8 0 // lookup_dcookie
data8 0 // io_setup
data8 0 // io_destroy
data8 0 // io_getevents // 1240
data8 0 // io_submit
data8 0 // io_cancel
data8 0 // epoll_create
data8 0 // epoll_ctl
data8 0 // epoll_wait // 1245
data8 0 // restart_syscall
data8 0 // semtimedop
data8 0 // timer_create
data8 0 // timer_settime
data8 0 // timer_gettime // 1250
data8 0 // timer_getoverrun
data8 0 // timer_delete
data8 0 // clock_settime
data8 0 // clock_gettime
data8 0 // clock_getres // 1255
data8 0 // clock_nanosleep
data8 0
data8 0
data8 0
data8 0 // 1260
data8 0
data8 0
data8 0
data8 0
data8 0 // 1265
data8 0
data8 0
data8 0
data8 0
data8 0 // 1270
data8 0
data8 0
data8 0
data8 0
data8 0 // 1275
data8 0
data8 0
data8 0
data8 0
.org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
......@@ -11,12 +11,10 @@
#include <asm/sigcontext.h>
#include <asm/system.h>
#include <asm/unistd.h>
#include <asm/page.h>
.section .text.gate, "ax"
.start_gate:
#if CONFIG_FSYS
#include <asm/errno.h>
......@@ -49,6 +47,7 @@ END(syscall_via_break)
* all other "scratch" registers: undefined
* all "preserved" registers: same as on entry
*/
GLOBAL_ENTRY(syscall_via_epc)
.prologue
.altrp b6
......@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc)
}
;;
rsm psr.be
movl r18=fsyscall_table
movl r14=fsyscall_table
mov r16=IA64_KR(CURRENT)
mov r19=255
;;
shladd r18=r17,3,r18
cmp.geu p6,p0=r19,r17 // (syscall > 0 && syscall <= 1024+255)?
mov r16=IA64_KR(CURRENT) // 12 cycle read latency
mov r19=NR_syscalls-1
;;
shladd r18=r17,3,r14
srlz.d // ensure little-endian byteorder is in effect
cmp.ne p8,p0=r0,r0 // p8 <- FALSE
/* Note: if r17 is a NaT, p6 will be set to zero. */
cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
;;
(p6) ld8 r18=[r18]
mov r29=psr // read psr (12 cyc load latency)
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;;
(p6) mov b7=r18
(p6) tbit.z p8,p0=r18,0
(p8) br.dptk.many b7
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
#if 1/*def CONFIG_ITANIUM*/
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
(p6) br.sptk.many b7
#else
/* We can't do this until gate is a proper ELF DSO. */
(p6) brl.cond.sptk fsys_bubble_down
#endif
mov r10=-1
mov r8=ENOSYS
......@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc)
br.ret.sptk.many b6
END(syscall_via_epc)
#if 0
GLOBAL_ENTRY(fsys_fallback_syscall)
/*
* It would be better/fsyser to do the SAVE_MIN magic directly here, but for now
* we simply fall back on doing a system-call via break. Good enough
* to get started. (Note: we have to do this through the gate page again, since
* the br.ret will switch us back to user-level privilege.)
*
* XXX Move this back to fsys.S after changing it over to avoid break 0x100000.
*/
movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
;;
MCKINLEY_E9_WORKAROUND
mov b7=r2
br.ret.sptk.many b7
END(fsys_fallback_syscall)
#endif
#endif /* CONFIG_FSYS */
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
......
......@@ -637,7 +637,6 @@ END(daccess_bit)
/////////////////////////////////////////////////////////////////////////////////////////
// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
ENTRY(break_fault)
.global ia64_enter_syscall
/*
* The streamlined system call entry/exit paths only save/restore the initial part
* of pt_regs. This implies that the callers of system-calls must adhere to the
......@@ -654,7 +653,7 @@ ENTRY(break_fault)
* to prevent leaking bits from kernel to user level.
*/
DBG_FAULT(11)
mov r16=IA64_KR(CURRENT) // r16 = current (physical); 12 cycle read lat.
mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
mov r17=cr.iim
mov r18=__IA64_BREAK_SYSCALL
mov r21=ar.fpsr
......@@ -673,7 +672,7 @@ ENTRY(break_fault)
;;
ld1 r17=[r16] // load current->thread.on_ustack flag
st1 [r16]=r0 // clear current->thread.on_ustack flag
adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
;;
invala
......@@ -682,6 +681,7 @@ ENTRY(break_fault)
extr.u r8=r29,41,2 // extract ei field from cr.ipsr
;;
cmp.eq p6,p7=2,r8 // isr.ei==2?
mov r2=r1 // setup r2 for ia64_syscall_setup
;;
(p6) mov r8=0 // clear ei to 0
(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
......@@ -691,19 +691,25 @@ ENTRY(break_fault)
dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
;;
ia64_enter_syscall:
// switch from user to kernel RBS:
MINSTATE_START_SAVE_MIN_VIRT
br.call.sptk.many b7=setup_syscall_via_break
br.call.sptk.many b7=ia64_syscall_setup
;;
mov r3=255
MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS
;;
srlz.i // guarantee that interruption collection is on
;;
(p15) ssm psr.i // restore psr.i
;;
mov r3=NR_syscalls - 1
movl r16=sys_call_table
adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
movl r2=ia64_ret_from_syscall
;;
shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
mov rp=r2 // set the real return addr
;;
(p7) add r20=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall
......@@ -764,11 +770,44 @@ END(interrupt)
* fault ever gets "unreserved", simply moved the following code to a more
* suitable spot...
*
* setup_syscall_via_break() is a separate subroutine so that it can
* ia64_syscall_setup() is a separate subroutine so that it can
* allocate stacked registers so it can safely demine any
* potential NaT values from the input registers.
*
* On entry:
* - executing on bank 0 or bank 1 register set (doesn't matter)
* - r1: stack pointer
* - r2: current task pointer
* - r3: preserved
* - r11: original contents (saved ar.pfs to be saved)
* - r12: original contents (sp to be saved)
* - r13: original contents (tp to be saved)
* - r15: original contents (syscall # to be saved)
* - r18: saved bsp (after switching to kernel stack)
* - r19: saved b6
* - r20: saved r1 (gp)
* - r21: saved ar.fpsr
* - r22: kernel's register backing store base (krbs_base)
* - r23: saved ar.bspstore
* - r24: saved ar.rnat
* - r25: saved ar.unat
* - r26: saved ar.pfs
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
* - executing on bank 1 registers
* - psr.ic enabled, interrupts restored
* - r1: kernel's gp
* - r3: preserved (same as on entry)
* - r12: points to kernel stack
* - r13: points to current task
* - p15: TRUE if interrupts need to be re-enabled
* - ar.fpsr: set to kernel settings
*/
ENTRY(setup_syscall_via_break)
GLOBAL_ENTRY(ia64_syscall_setup)
#if PT(B6) != 0
# error This code assumes that b6 is the first field in pt_regs.
#endif
......@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break)
;;
st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
mov r28=b0
mov r28=b0 // save b0 (2 cyc)
(p8) mov in0=-1
;;
......@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break)
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
(p14) mov in6=-1
mov r13=IA64_KR(CURRENT) // establish `current'
mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer
;;
(p8) mov in7=-1
tnat.nz p9,p0=r15
MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
movl r17=FPSR_DEFAULT
;;
srlz.i // guarantee that interruption collection is on
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
(p9) mov r15=-1
(p15) ssm psr.i // restore psr.i
mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
(p9) mov r15=-1
br.ret.sptk.many b7
END(setup_syscall_via_break)
END(ia64_syscall_setup)
.org ia64_ivt+0x3c00
/////////////////////////////////////////////////////////////////////////////////////////
......
......@@ -27,6 +27,7 @@
#include <asm/sal.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/tlb.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
......@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg)
return 0;
}
#ifdef CONFIG_FSYS
/*
* Boot command-line option "nolwsys" can be used to disable the use of any light-weight
* system call handler. When this option is in effect, all fsyscalls will end up bubbling
* down into the kernel and calling the normal (heavy-weight) syscall handler. This is
* useful for performance testing, but conceivably could also come in handy for debugging
* purposes.
*/
static int nolwsys;
static int __init
nolwsys_setup (char *s)
{
nolwsys = 1;
return 1;
}
__setup("nolwsys", nolwsys_setup);
#endif /* CONFIG_FSYS */
void
mem_init (void)
{
......@@ -622,6 +646,25 @@ mem_init (void)
if (num_pgt_pages > (u64) pgt_cache_water[1])
pgt_cache_water[1] = num_pgt_pages;
#ifdef CONFIG_FSYS
{
int i;
/*
* For fsyscall entrpoints with no light-weight handler, use the ordinary
* (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
* code can tell them apart.
*/
for (i = 0; i < NR_syscalls; ++i) {
extern unsigned long fsyscall_table[NR_syscalls];
extern unsigned long sys_call_table[NR_syscalls];
if (!fsyscall_table[i] || nolwsys)
fsyscall_table[i] = sys_call_table[i] | 1;
}
}
#endif
/* install the gate page in the global page table: */
put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR);
......
......@@ -247,6 +247,8 @@
#define __NR_sys_clock_getres 1255
#define __NR_sys_clock_nanosleep 1256
#define NR_syscalls 256 /* length of syscall table */
#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment