Commit 47651db7 authored by David Mosberger's avatar David Mosberger Committed by Tony Luck

[IA64] speedup ptrace by avoiding kernel-stack walk

This patch changes the syscall entry path to store the
current-frame-mask (CFM) in pt_regs->cr_ifs.  This just takes one
extra instruction (a "dep" to clear the bits other than 0-37) and is
free in terms of cycles.

The advantage of doing this is that it lets ptrace() avoid having to
walk the stack to determine the end of the user-level backing-store of
a process which is in the middle of a system-call.  Since this is what
strace does all the time, this speeds up strace quite a bit (by ~50%).
More importantly, it makes the syscall vs. non-syscall case much more
symmetric, which is always something I wanted.

Note that the change to ivt.S looks big but this is just a rippling
effect of instruction-scheduling to keep syscall latency the same.
All that's really going on there is that instead of storing 0 into
cr_ifs member we store the low 38 bits of ar.pfs.
Signed-off-by: default avatarDavid Mosberger <davidm@hpl.hp.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent de4514ac
...@@ -852,7 +852,7 @@ GLOBAL_ENTRY(ia64_syscall_setup) ...@@ -852,7 +852,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
add r17=PT(R11),r1 // initialize second base pointer add r17=PT(R11),r1 // initialize second base pointer
;; ;;
alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
st8 [r16]=r29,PT(CR_IFS)-PT(CR_IPSR) // save cr.ipsr st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
tnat.nz p8,p0=in0 tnat.nz p8,p0=in0
st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
...@@ -860,31 +860,31 @@ GLOBAL_ENTRY(ia64_syscall_setup) ...@@ -860,31 +860,31 @@ GLOBAL_ENTRY(ia64_syscall_setup)
(pKStk) mov r18=r0 // make sure r18 isn't NaT (pKStk) mov r18=r0 // make sure r18 isn't NaT
;; ;;
st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
mov r28=b0 // save b0 (2 cyc) mov r28=b0 // save b0 (2 cyc)
(p8) mov in0=-1
;; ;;
st8 [r16]=r0,PT(AR_PFS)-PT(CR_IFS) // clear cr.ifs
st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
(p9) mov in1=-1 dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
(p8) mov in0=-1
;; ;;
st8 [r16]=r26,PT(AR_RNAT)-PT(AR_PFS) // save ar.pfs st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
tnat.nz p10,p0=in2 (p9) mov in1=-1
(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 (pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
tbit.nz p15,p0=r29,IA64_PSR_I_BIT tbit.nz p15,p0=r29,IA64_PSR_I_BIT
tnat.nz p11,p0=in3 tnat.nz p10,p0=in2
;; ;;
(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field (pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field (pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
tnat.nz p11,p0=in3
;;
(p10) mov in2=-1 (p10) mov in2=-1
tnat.nz p12,p0=in4 // [I0]
(p11) mov in3=-1 (p11) mov in3=-1
tnat.nz p12,p0=in4
tnat.nz p13,p0=in5
;; ;;
(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat (pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore (pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
...@@ -892,28 +892,29 @@ GLOBAL_ENTRY(ia64_syscall_setup) ...@@ -892,28 +892,29 @@ GLOBAL_ENTRY(ia64_syscall_setup)
;; ;;
st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
st8 [r17]=r28,PT(R1)-PT(B0) // save b0 st8 [r17]=r28,PT(R1)-PT(B0) // save b0
(p12) mov in4=-1 tnat.nz p13,p0=in5 // [I0]
;; ;;
st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
(p13) mov in5=-1 (p12) mov in4=-1
;; ;;
.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 .mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 .mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
tnat.nz p14,p0=in6 (p13) mov in5=-1
;; ;;
st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
st8.spill [r17]=r15 // save r15 st8.spill [r17]=r15 // save r15
tnat.nz p8,p0=in7 tnat.nz p14,p0=in6
;; ;;
stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
(p14) mov in6=-1 tnat.nz p8,p0=in7
mov r13=r2 // establish `current' mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer movl r1=__gp // establish kernel global pointer
;; ;;
(p14) mov in6=-1
(p8) mov in7=-1 (p8) mov in7=-1
tnat.nz p9,p0=r15 tnat.nz p9,p0=r15
......
/* /*
* Kernel support for the ptrace() and syscall tracing interfaces. * Kernel support for the ptrace() and syscall tracing interfaces.
* *
* Copyright (C) 1999-2003 Hewlett-Packard Co * Copyright (C) 1999-2004 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
* *
* Derived from the x86 and Alpha versions. Most of the code in here * Derived from the x86 and Alpha versions. Most of the code in here
...@@ -304,7 +304,6 @@ put_rnat (struct task_struct *task, struct switch_stack *sw, ...@@ -304,7 +304,6 @@ put_rnat (struct task_struct *task, struct switch_stack *sw,
long num_regs, nbits; long num_regs, nbits;
struct pt_regs *pt; struct pt_regs *pt;
unsigned long cfm, *urbs_kargs; unsigned long cfm, *urbs_kargs;
struct unw_frame_info info;
pt = ia64_task_regs(task); pt = ia64_task_regs(task);
kbsp = (unsigned long *) sw->ar_bspstore; kbsp = (unsigned long *) sw->ar_bspstore;
...@@ -316,11 +315,8 @@ put_rnat (struct task_struct *task, struct switch_stack *sw, ...@@ -316,11 +315,8 @@ put_rnat (struct task_struct *task, struct switch_stack *sw,
* If entered via syscall, don't allow user to set rnat bits * If entered via syscall, don't allow user to set rnat bits
* for syscall args. * for syscall args.
*/ */
unw_init_from_blocked_task(&info,task); cfm = pt->cr_ifs;
if (unw_unwind_to_user(&info) == 0) { urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
unw_get_cfm(&info,&cfm);
urbs_kargs = ia64_rse_skip_regs(urbs_end,-(cfm & 0x7f));
}
} }
if (urbs_kargs >= urnat_addr) if (urbs_kargs >= urnat_addr)
...@@ -480,27 +476,18 @@ ia64_poke (struct task_struct *child, struct switch_stack *child_stack, unsigned ...@@ -480,27 +476,18 @@ ia64_poke (struct task_struct *child, struct switch_stack *child_stack, unsigned
unsigned long unsigned long
ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, unsigned long *cfmp) ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, unsigned long *cfmp)
{ {
unsigned long *krbs, *bspstore, cfm; unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
struct unw_frame_info info;
long ndirty; long ndirty;
krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
bspstore = (unsigned long *) pt->ar_bspstore; bspstore = (unsigned long *) pt->ar_bspstore;
ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
cfm = pt->cr_ifs & ~(1UL << 63);
if (in_syscall(pt)) { if (in_syscall(pt))
/*
* If bit 63 of cr.ifs is cleared, the kernel was entered via a system
* call and we need to recover the CFM that existed on entry to the
* kernel by unwinding the kernel stack.
*/
unw_init_from_blocked_task(&info, child);
if (unw_unwind_to_user(&info) == 0) {
unw_get_cfm(&info, &cfm);
ndirty += (cfm & 0x7f); ndirty += (cfm & 0x7f);
} else
} cfm &= ~(1UL << 63); /* clear valid bit */
if (cfmp) if (cfmp)
*cfmp = cfm; *cfmp = cfm;
return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
......
...@@ -290,11 +290,9 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc ...@@ -290,11 +290,9 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc
if (on_sig_stack((unsigned long) sc)) if (on_sig_stack((unsigned long) sc))
flags |= IA64_SC_FLAG_ONSTACK; flags |= IA64_SC_FLAG_ONSTACK;
if ((ifs & (1UL << 63)) == 0) { if ((ifs & (1UL << 63)) == 0)
/* if cr_ifs isn't valid, we got here through a syscall */ /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
flags |= IA64_SC_FLAG_IN_SYSCALL; flags |= IA64_SC_FLAG_IN_SYSCALL;
cfm = scr->ar_pfs & ((1UL << 38) - 1);
} else
cfm = ifs & ((1UL << 38) - 1); cfm = ifs & ((1UL << 38) - 1);
ia64_flush_fph(current); ia64_flush_fph(current);
if ((current->thread.flags & IA64_THREAD_FPH_VALID)) { if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define _ASM_IA64_PTRACE_H #define _ASM_IA64_PTRACE_H
/* /*
* Copyright (C) 1998-2003 Hewlett-Packard Co * Copyright (C) 1998-2004 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com> * Stephane Eranian <eranian@hpl.hp.com>
* Copyright (C) 2003 Intel Co * Copyright (C) 2003 Intel Co
...@@ -110,7 +110,11 @@ struct pt_regs { ...@@ -110,7 +110,11 @@ struct pt_regs {
unsigned long cr_ipsr; /* interrupted task's psr */ unsigned long cr_ipsr; /* interrupted task's psr */
unsigned long cr_iip; /* interrupted task's instruction pointer */ unsigned long cr_iip; /* interrupted task's instruction pointer */
unsigned long cr_ifs; /* interrupted task's function state */ /*
* interrupted task's function state; if bit 63 is cleared, it
* contains syscall's ar.pfs.pfm:
*/
unsigned long cr_ifs;
unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
unsigned long ar_pfs; /* prev function state */ unsigned long ar_pfs; /* prev function state */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment