Commit 47651db7 authored by David Mosberger's avatar David Mosberger Committed by Tony Luck

[IA64] speedup ptrace by avoiding kernel-stack walk

This patch changes the syscall entry path to store the
current-frame-mask (CFM) in pt_regs->cr_ifs.  This just takes one
extra instruction (a "dep" to clear the bits other than 0-37) and is
free in terms of cycles.

The advantage of doing this is that it lets ptrace() avoid having to
walk the stack to determine the end of the user-level backing-store of
a process which is in the middle of a system-call.  Since this is what
strace does all the time, this speeds up strace quite a bit (by ~50%).
More importantly, it makes the syscall vs. non-syscall case much more
symmetric, which is always something I wanted.

Note that the change to ivt.S looks big but this is just a rippling
effect of instruction-scheduling to keep syscall latency the same.
All that's really going on there is that instead of storing 0 into
cr_ifs member we store the low 38 bits of ar.pfs.
Signed-off-by: default avatarDavid Mosberger <davidm@hpl.hp.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent de4514ac
......@@ -852,7 +852,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
add r17=PT(R11),r1 // initialize second base pointer
;;
alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
st8 [r16]=r29,PT(CR_IFS)-PT(CR_IPSR) // save cr.ipsr
st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
tnat.nz p8,p0=in0
st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
......@@ -860,31 +860,31 @@ GLOBAL_ENTRY(ia64_syscall_setup)
(pKStk) mov r18=r0 // make sure r18 isn't NaT
;;
st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
mov r28=b0 // save b0 (2 cyc)
(p8) mov in0=-1
;;
st8 [r16]=r0,PT(AR_PFS)-PT(CR_IFS) // clear cr.ifs
st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
(p9) mov in1=-1
dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
(p8) mov in0=-1
;;
st8 [r16]=r26,PT(AR_RNAT)-PT(AR_PFS) // save ar.pfs
st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
tnat.nz p10,p0=in2
(p9) mov in1=-1
(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
tbit.nz p15,p0=r29,IA64_PSR_I_BIT
tnat.nz p11,p0=in3
tnat.nz p10,p0=in2
;;
(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
tnat.nz p11,p0=in3
;;
(p10) mov in2=-1
tnat.nz p12,p0=in4 // [I0]
(p11) mov in3=-1
tnat.nz p12,p0=in4
tnat.nz p13,p0=in5
;;
(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
......@@ -892,28 +892,29 @@ GLOBAL_ENTRY(ia64_syscall_setup)
;;
st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
st8 [r17]=r28,PT(R1)-PT(B0) // save b0
(p12) mov in4=-1
tnat.nz p13,p0=in5 // [I0]
;;
st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
(p13) mov in5=-1
(p12) mov in4=-1
;;
.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
tnat.nz p14,p0=in6
(p13) mov in5=-1
;;
st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
st8.spill [r17]=r15 // save r15
tnat.nz p8,p0=in7
tnat.nz p14,p0=in6
;;
stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
(p14) mov in6=-1
tnat.nz p8,p0=in7
mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer
;;
(p14) mov in6=-1
(p8) mov in7=-1
tnat.nz p9,p0=r15
......
/*
* Kernel support for the ptrace() and syscall tracing interfaces.
*
* Copyright (C) 1999-2003 Hewlett-Packard Co
* Copyright (C) 1999-2004 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* Derived from the x86 and Alpha versions. Most of the code in here
......@@ -304,7 +304,6 @@ put_rnat (struct task_struct *task, struct switch_stack *sw,
long num_regs, nbits;
struct pt_regs *pt;
unsigned long cfm, *urbs_kargs;
struct unw_frame_info info;
pt = ia64_task_regs(task);
kbsp = (unsigned long *) sw->ar_bspstore;
......@@ -316,11 +315,8 @@ put_rnat (struct task_struct *task, struct switch_stack *sw,
* If entered via syscall, don't allow user to set rnat bits
* for syscall args.
*/
unw_init_from_blocked_task(&info,task);
if (unw_unwind_to_user(&info) == 0) {
unw_get_cfm(&info,&cfm);
urbs_kargs = ia64_rse_skip_regs(urbs_end,-(cfm & 0x7f));
}
cfm = pt->cr_ifs;
urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
}
if (urbs_kargs >= urnat_addr)
......@@ -480,27 +476,18 @@ ia64_poke (struct task_struct *child, struct switch_stack *child_stack, unsigned
unsigned long
ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, unsigned long *cfmp)
{
unsigned long *krbs, *bspstore, cfm;
struct unw_frame_info info;
unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
long ndirty;
krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
bspstore = (unsigned long *) pt->ar_bspstore;
ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
cfm = pt->cr_ifs & ~(1UL << 63);
if (in_syscall(pt)) {
/*
* If bit 63 of cr.ifs is cleared, the kernel was entered via a system
* call and we need to recover the CFM that existed on entry to the
* kernel by unwinding the kernel stack.
*/
unw_init_from_blocked_task(&info, child);
if (unw_unwind_to_user(&info) == 0) {
unw_get_cfm(&info, &cfm);
ndirty += (cfm & 0x7f);
}
}
if (in_syscall(pt))
ndirty += (cfm & 0x7f);
else
cfm &= ~(1UL << 63); /* clear valid bit */
if (cfmp)
*cfmp = cfm;
return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
......
......@@ -290,12 +290,10 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc
if (on_sig_stack((unsigned long) sc))
flags |= IA64_SC_FLAG_ONSTACK;
if ((ifs & (1UL << 63)) == 0) {
/* if cr_ifs isn't valid, we got here through a syscall */
if ((ifs & (1UL << 63)) == 0)
/* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
flags |= IA64_SC_FLAG_IN_SYSCALL;
cfm = scr->ar_pfs & ((1UL << 38) - 1);
} else
cfm = ifs & ((1UL << 38) - 1);
cfm = ifs & ((1UL << 38) - 1);
ia64_flush_fph(current);
if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
flags |= IA64_SC_FLAG_FPH_VALID;
......
......@@ -2,7 +2,7 @@
#define _ASM_IA64_PTRACE_H
/*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* Copyright (C) 1998-2004 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com>
* Copyright (C) 2003 Intel Co
......@@ -110,7 +110,11 @@ struct pt_regs {
unsigned long cr_ipsr; /* interrupted task's psr */
unsigned long cr_iip; /* interrupted task's instruction pointer */
unsigned long cr_ifs; /* interrupted task's function state */
/*
* interrupted task's function state; if bit 63 is cleared, it
* contains syscall's ar.pfs.pfm:
*/
unsigned long cr_ifs;
unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
unsigned long ar_pfs; /* prev function state */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment