Commit b8fcff74 authored by David Mosberger's avatar David Mosberger

ia64: Fix fsys_gettimeofday() and tune it some more.

parent 740a17e5
...@@ -3,11 +3,16 @@ ...@@ -3,11 +3,16 @@
* *
* Copyright (C) 2003 Hewlett-Packard Co * Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*
* 18-Feb-03 louisk Implement fsys_gettimeofday().
* 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
* probably broke it along the way... ;-)
*/ */
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
#include <asm/errno.h> #include <asm/errno.h>
#include <asm/offsets.h> #include <asm/offsets.h>
#include <asm/percpu.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
/* /*
...@@ -123,187 +128,171 @@ ENTRY(fsys_set_tid_address) ...@@ -123,187 +128,171 @@ ENTRY(fsys_set_tid_address)
br.ret.sptk.many b6 br.ret.sptk.many b6
END(fsys_set_tid_address) END(fsys_set_tid_address)
/*
* Note 1: This routine uses floating-point registers, but only with registers that
* operate on integers. Because of that, we don't need to set ar.fpsr to the
* kernel default value.
*
* Note 2: For now, we will assume that all CPUs run at the same clock-frequency.
* If that wasn't the case, we would have to disable preemption (e.g.,
* by disabling interrupts) between reading the ITC and reading
* local_cpu_data->nsec_per_cyc.
*
* Note 3: On platforms where the ITC-drift bit is set in the SAL feature vector,
* we ought to either skip the ITC-based interpolation or run an ntp-like
* daemon to keep the ITCs from drifting too far apart.
*/
ENTRY(fsys_gettimeofday) ENTRY(fsys_gettimeofday)
add r9=TI_FLAGS+IA64_TASK_SIZE,r16 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
movl r3=THIS_CPU(cpu_info)
mov.m r31=ar.itc // put time stamp into r31 (ITC) == now (35 cyc)
movl r19=xtime // xtime is a timespec struct
;; ;;
ld4 r9=[r9]
#ifdef CONFIG_SMP
movl r10 = __per_cpu_offset
;; ;;
and r9=TIF_ALLWORK_MASK,r9 ld8 r10 = [r10] // r10 <- __per_cpu_offset[0]
movl r21 = cpu_info__per_cpu
;;
add r10 = r21, r10 // r10 <- &cpu_data(time_keeper_id)
#else
mov r10 = r3
#endif
ld4 r9=[r9]
movl r17=xtime_lock
;; ;;
// r32, r33 should contain the 2 args of gettimeofday
tnat.nz p6,p7=r32 // in case the args are NaT // r32, r33 should contain the 2 args of gettimeofday
cmp.ne p8, p0=0, r9 adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10
mov r2 = -1
tnat.nz p6,p7=r32 // guard against NaT args
;; ;;
adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
(p7) tnat.nz p6,p0=r33 (p7) tnat.nz p6,p0=r33
(p8) br.spnt.many fsys_fallback_syscall (p6) br.cond.spnt.few .fail
;;
(p6) adds r8=EINVAL, r0 // r8 = EINVAL
(p6) adds r10=-1, r0 // r10 = -1
(p6) br.ret.spnt.many b6 // return with r8 set to EINVAL
movl r17=xtime_lock adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
movl r19=xtime // xtime is a timespec struct
movl r20=cpu_info__per_cpu
movl r26=jiffies
movl r27=wall_jiffies
movl r31=last_nsec_offset
movl r24=2361183241434822607 // for division hack (only for / 1000) movl r24=2361183241434822607 // for division hack (only for / 1000)
;; ;;
setf.sig f9=r24 // f9 is used for division hack
adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r20
adds r22=IA64_CPUINFO_ITM_DELTA_OFFSET, r20
adds r30=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r20
adds r3=IA64_TIMESPEC_TV_NSEC_OFFSET, r19
// r3 = &xtime->tv_nsec
ldf8 f7=[r10] // f7 now contains itm_delta
setf.sig f11 = r2
nop 0
while_loop_1: adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19 // r20 = &xtime->tv_nsec
movl r26=jiffies
// *** seq = read_seqbegin(&xtime_lock); *** setf.sig f9=r24 // f9 is used for division hack
movl r27=wall_jiffies
ld4 r23=[r17] // since &xtime_lock == &xtime_lock->sequence and r9=TIF_ALLWORK_MASK,r9
#ifdef CONFIG_SMP movl r25=last_nsec_offset
mf ;;
#endif
;; // barrier()
// now r23 = seq
ld8 r14=[r31] // r14 = old = last_nsec_offset ldf8 f10=[r8] // f10 <- local_cpu_data->nsec_per_cyc value
cmp.ne p8, p0=0, r9
(p8) br.spnt.many fsys_fallback_syscall
;;
.retry: // *** seq = read_seqbegin(&xtime_lock); ***
ld4.acq r23=[r17] // since &xtime_lock == &xtime_lock->sequence
ld8 r14=[r25] // r14 (old) = last_nsec_offset
ld8 r28=[r26] // r28 = jiffies ld8 r28=[r26] // r28 = jiffies
ld8 r29=[r27] // r29 = wall_jiffies ld8 r29=[r27] // r29 = wall_jiffies
;; ;;
ld8 r24=[r21] // r24 now contains itm_next ldf8 f8=[r21] // f8 now contains itm_next
ld8 r25=[r22] // r25 now contains itm_delta sub r28=r29, r28, 1 // r28 now contains "-(lost + 1)"
tbit.nz p9, p10=r23, 0 // p9 <- is_odd(r23), p10 <- is_even(r23)
sub r28=r28, r29 // r28 now contains "lost"
;; ;;
adds r28=1, r28 // r28 now contains "lost + 1"
;;
setf.sig f6=r28
setf.sig f7=r25
ld8 r2=[r19] // r2 = sec = xtime.tv_sec ld8 r2=[r19] // r2 = sec = xtime.tv_sec
;; ld8 r29=[r20] // r29 = nsec = xtime.tv_nsec
ld8 r28=[r3] // r28 = nsec = xtime.tv_nsec
xma.l f8=f6, f7, f0 // put lower 64-bits result of f6 * f7 in f8
;;
getf.sig r18=f8 // r18 now contains the (lost + 1) * itm_delta
;;
sub r18=r24, r18 // r18 is last_tick
mov r25=ar.itc // put time stamp into r25 (ITC) == now
;;
cmp.leu p7, p8 = r18, r25 // if last_tick <= now, p7 = 1
;;
(p7) ld8 r24=[r30] // r24 contains local_cpu_data->nsec_per_cyc value
(p7) sub r25=r25, r18 // elasped_cycles in r25
;;
(p7) setf.sig f6=r24
(p7) setf.sig f7=r25
;;
(p7) xma.l f8=f6, f7, f0
setf.sig f6=r28 // f6 <- -(lost + 1) (6 cyc)
;; ;;
(p7) getf.sig r18=f8 // r18 = clasped_cycles * local_cpu_data->nsec_per_cyc
;;
(p7) shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT
(p8) ld8 r18=[r31] // r18 = last_time_offset (is unsigned long) mf
xma.l f8=f6, f7, f8 // f8 (last_tick) <- -(lost + 1)*itm_delta + itm_next (5 cyc)
// now end of gettimeoffset, r18 should contain the desire result (offset) nop 0
setf.sig f12=r31 // f12 <- ITC (6 cyc)
// *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; *** // *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; ***
ld4 r24=[r17] // r24 = xtime_lock->sequence (re-read)
;; // barrier() nop 0
#ifdef CONFIG_SMP
mf
#endif
adds r24=1, r0 // r24 = 1
ld4 r25=[r17] // r25 = xtime_lock->sequence (load again)
;;
and r24=r24, r23 // r24 = seq & 1
xor r25=r25, r23 // r25 = xtime_lock->sequence ^ seq
;;
or r24=r24, r25 // now r24 = read_seqretry(&xtime_lock, seq)
;;
cmp.ne p7, p0=r24, r0
;; ;;
(p7) br.spnt.many while_loop_1 // continue
cmp.leu p7, p8 = r18, r14 // if (offset <= old) mov r31 = ar.itc // re-read ITC in case we .retry (35 cyc)
xma.l f8=f11, f8, f12 // f8 (elapsed_cycles) <- (-1*last_tick + now) = (now - last_tick)
nop 0
;; ;;
(p7) mov r18=r14 // offset = old
(p7) br.spnt.few loop_exit_1 // break
mov ar.ccv=r18 // ar.ccv = offset getf.sig r18=f8 // r18 <- (now - last_tick)
xmpy.l f8=f8, f10 // f8 <- elapsed_cycles*nsec_per_cyc (5 cyc)
add r3=r29, r14 // r3 = (nsec + old)
;; ;;
cmpxchg8.acq r25=[r31], r14, ar.ccv
// compare-and-exchange (atomic!)
;;
cmp.eq p8,p0 = r25, r14
;;
(p8) br.sptk.many loop_exit_1
br.sptk.many while_loop_1
loop_exit_1: cmp.lt p7, p8 = r18, r0 // if now < last_tick, set p7 = 1, p8 = 0
getf.sig r18=f8 // r18 = elapsed_cycles*nsec_per_cyc (6 cyc)
nop 0
;;
// at this point, r28 is nsec and r18 is offset (p10) cmp.ne p9, p0=r23, r24 // if xtime_lock->sequence != seq, set p9
shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT // r18 <- offset
(p9) br.spnt.many .retry
;;
add r3=r28, r18 // r3 = (nsec + offset) mov ar.ccv=r14 // ar.ccv = old (1 cyc)
cmp.leu p7, p8=r18, r14 // if (offset <= old), set p7 = 1, p8 = 0
;; ;;
// now we try to divide r3 by 1000 to get the value in usec instead of nsec
shr.u r24 = r3, 3 (p8) cmpxchg8.rel r24=[r25], r18, ar.ccv // compare-and-exchange (atomic!)
(p8) add r3=r29, r18 // r3 = (nsec + offset)
;; ;;
setf.sig f7 = r24 shr.u r3 = r3, 3 // initiate dividing r3 by 1000
;; ;;
xmpy.hu f6 = f7, f9 setf.sig f8 = r3 // (6 cyc)
mov r10=1000000 // r10 = 1000000
;; ;;
getf.sig r3 = f6 (p8) cmp.ne.unc p9, p0 = r24, r14
xmpy.hu f6 = f8, f9 // (5 cyc)
(p9) br.spnt.many .retry
;; ;;
shr.u r3 = r3, 4
// end of division, r3 is divided by 1000 (=usec)
addl r24=1000000, r0 // r24 = 1000000 getf.sig r3 = f6 // (6 cyc)
;; ;;
shr.u r3 = r3, 4 // end of division, r3 is divided by 1000 (=usec)
while_loop_2:
cmp.geu p7, p8=r3, r24 // while (usec >= 1000000)
;; ;;
(p8) br.sptk.many loop_exit_2
sub r3=r3, r24 // usec -= 1000000
adds r2=1, r2 // ++sec
br.many while_loop_2
loop_exit_2:
// finally, r2 = sec
// r3 = usec
mov r24=r32 // we need to preserve this... 1: cmp.geu p7, p0=r3, r10 // while (usec >= 1000000)
;; ;;
st8 [r32]=r2, 8 (p7) sub r3=r3, r10 // usec -= 1000000
;; (p7) adds r2=1, r2 // ++sec
st8 [r32]=r3 // store them in the timeval struct (p7) br.spnt.many 1b
;;
mov r32=r24
// finally: r2 = sec, r3 = usec
EX(.fail, st8 [r32]=r2)
adds r9=8, r32
mov r8=r0 // success mov r8=r0 // success
;;
EX(.fail, st8 [r9]=r3) // store them in the timeval struct
mov r10=0
MCKINLEY_E9_WORKAROUND MCKINLEY_E9_WORKAROUND
br.ret.sptk.many b6 // return to caller
/*
* Note: We are NOT clearing the scratch registers here. Since the only things
* in those registers are time-related variables and some addresses (which
* can be obtained from System.map), none of this should be security-sensitive
* and we should be fine.
*/
br.ret.sptk.many b6 .fail: adds r8=EINVAL, r0 // r8 = EINVAL
// return to caller adds r10=-1, r0 // r10 = -1
MCKINLEY_E9_WORKAROUND
br.ret.spnt.many b6 // return with r8 set to EINVAL
END(fsys_gettimeofday) END(fsys_gettimeofday)
.rodata .rodata
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment