Commit 0840c8ec authored by Richard Henderson's avatar Richard Henderson Committed by Linus Torvalds

[PATCH] alpha strncpy fix

Ported across from a nearly identical fix to the glibc tree.  Under
some conditions we'd read one too many source words and segfault.
parent 1307ef66
...@@ -205,35 +205,30 @@ $u_head: ...@@ -205,35 +205,30 @@ $u_head:
cmpbge zero, t6, t8 # E : cmpbge zero, t6, t8 # E :
beq a2, $u_eocfin # U : beq a2, $u_eocfin # U :
nop lda t6, -1 # E :
nop nop
bne t8, $u_final # U : bne t8, $u_final # U :
lda t6, -1 # E : mask out the bits we have mskql t6, a1, t6 # U : mask out bits already seen
mskql t6, a1, t6 # U : already seen (stall)
stq_u t0, 0(a0) # L : store first output word stq_u t0, 0(a0) # L : store first output word
or t6, t2, t2 # E : (stall)
or t6, t2, t2 # E : cmpbge zero, t2, t8 # E : find nulls in second partial
cmpbge zero, t2, t8 # E : find nulls in second partial (stall) addq a0, 8, a0 # E :
addq a0, 8, a0 # E : subq a2, 1, a2 # E :
subq a2, 1, a2 # E :
bne t8, $u_late_head_exit # U : bne t8, $u_late_head_exit # U :
/* Finally, we've got all the stupid leading edge cases taken care /* Finally, we've got all the stupid leading edge cases taken care
of and we can set up to enter the main loop. */ of and we can set up to enter the main loop. */
extql t2, a1, t1 # U : position hi-bits of lo word extql t2, a1, t1 # U : position hi-bits of lo word
beq a2, $u_eoc # U :
ldq_u t2, 8(a1) # L : read next high-order source word ldq_u t2, 8(a1) # L : read next high-order source word
addq a1, 8, a1 # E : addq a1, 8, a1 # E :
cmpbge zero, t2, t8 # E : (stall) extqh t2, a1, t0 # U : position lo-bits of hi word (stall)
beq a2, $u_eoc # U : cmpbge zero, t2, t8 # E :
nop
nop
bne t8, $u_eos # e1 :
nop
nop
nop nop
bne t8, $u_eos # U :
/* Unaligned copy main loop. In order to avoid reading too much, /* Unaligned copy main loop. In order to avoid reading too much,
the loop is structured to detect zeros in aligned source words. the loop is structured to detect zeros in aligned source words.
...@@ -243,6 +238,7 @@ $u_head: ...@@ -243,6 +238,7 @@ $u_head:
to run as fast as possible. to run as fast as possible.
On entry to this basic block: On entry to this basic block:
t0 == the shifted low-order bits from the current source word
t1 == the shifted high-order bits from the previous source word t1 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word t2 == the unshifted current source word
...@@ -250,25 +246,20 @@ $u_head: ...@@ -250,25 +246,20 @@ $u_head:
.align 4 .align 4
$u_loop: $u_loop:
extqh t2, a1, t0 # U : extract high bits for current word or t0, t1, t0 # E : current dst word now complete
addq a1, 8, a1 # E : subq a2, 1, a2 # E : decrement word count
extql t2, a1, t3 # U : extract low bits for next time extql t2, a1, t1 # U : extract low bits for next time
addq a0, 8, a0 # E : addq a0, 8, a0 # E :
or t0, t1, t0 # E : current dst word now complete stq_u t0, -8(a0) # U : save the current word
beq a2, $u_eoc # U :
ldq_u t2, 0(a1) # U : Latency=3 load high word for next time ldq_u t2, 0(a1) # U : Latency=3 load high word for next time
stq_u t0, -8(a0) # U : save the current word (stall) addq a1, 8, a1 # E :
mov t3, t1 # E :
subq a2, 1, a2 # E : extqh t2, a1, t0 # U : extract low bits (2 cycle stall)
cmpbge zero, t2, t8 # E : test new word for eos (2 cycle stall for data) cmpbge zero, t2, t8 # E : test new word for eos
beq a2, $u_eoc # U : (stall)
nop nop
beq t8, $u_loop # U : beq t8, $u_loop # U :
nop
nop
nop
/* We've found a zero somewhere in the source word we just read. /* We've found a zero somewhere in the source word we just read.
If it resides in the lower half, we have one (probably partial) If it resides in the lower half, we have one (probably partial)
...@@ -276,11 +267,12 @@ $u_loop: ...@@ -276,11 +267,12 @@ $u_loop:
have one full and one partial word left to write out. have one full and one partial word left to write out.
On entry to this basic block: On entry to this basic block:
t0 == the shifted low-order bits from the current source word
t1 == the shifted high-order bits from the previous source word t1 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word. */ t2 == the unshifted current source word. */
$u_eos: $u_eos:
extqh t2, a1, t0 # U : or t0, t1, t0 # E : first (partial) source word complete
or t0, t1, t0 # E : first (partial) source word complete (stall) nop
cmpbge zero, t0, t8 # E : is the null in this first bit? (stall) cmpbge zero, t0, t8 # E : is the null in this first bit? (stall)
bne t8, $u_final # U : (stall) bne t8, $u_final # U : (stall)
...@@ -318,17 +310,26 @@ $u_final: ...@@ -318,17 +310,26 @@ $u_final:
1: stq_u t0, 0(a0) # L : 1: stq_u t0, 0(a0) # L :
ret (t9) # L0 : Latency=3 ret (t9) # L0 : Latency=3
$u_eoc: # end-of-count /* Got to end-of-count before end of string.
extqh t2, a1, t0 # U : On entry to this basic block:
or t0, t1, t0 # E : (stall) t1 == the shifted high-order bits from the previous source word */
cmpbge zero, t0, t8 # E : (stall) $u_eoc:
and a1, 7, t6 # E : avoid final load if possible
sll t10, t6, t6 # U : (stall)
and t6, 0xff, t6 # E : (stall)
bne t6, 1f # U : (stall)
ldq_u t2, 8(a1) # L : load final src word
nop nop
extqh t2, a1, t0 # U : extract low bits for last word (stall)
or t1, t0, t1 # E : (stall)
cmpbge zero, t1, t8 # E :
mov t1, t0 # E :
$u_eocfin: # end-of-count, final word $u_eocfin: # end-of-count, final word
or t10, t8, t8 # E : or t10, t8, t8 # E :
br $u_final # L0 : Latency=3 br $u_final # L0 : Latency=3
nop
nop
/* Unaligned copy entry point. */ /* Unaligned copy entry point. */
.align 4 .align 4
...@@ -349,9 +350,7 @@ $unaligned: ...@@ -349,9 +350,7 @@ $unaligned:
mskql t6, a0, t6 # U : mskql t6, a0, t6 # U :
nop nop
nop nop
nop 1: subq a1, t4, a1 # E : sub dest misalignment from src addr
1:
subq a1, t4, a1 # E : sub dest misalignment from src addr
/* If source misalignment is larger than dest misalignment, we need /* If source misalignment is larger than dest misalignment, we need
extra startup checks to avoid SEGV. */ extra startup checks to avoid SEGV. */
...@@ -396,4 +395,3 @@ $unaligned: ...@@ -396,4 +395,3 @@ $unaligned:
nop nop
.end __stxncpy .end __stxncpy
...@@ -164,26 +164,29 @@ $u_head: ...@@ -164,26 +164,29 @@ $u_head:
or t0, t6, t6 # e1 : mask original data for zero test or t0, t6, t6 # e1 : mask original data for zero test
cmpbge zero, t6, t8 # e0 : cmpbge zero, t6, t8 # e0 :
beq a2, $u_eocfin # .. e1 : beq a2, $u_eocfin # .. e1 :
bne t8, $u_final # e1 : lda t6, -1 # e0 :
bne t8, $u_final # .. e1 :
lda t6, -1 # e1 : mask out the bits we have
mskql t6, a1, t6 # e0 : already seen mskql t6, a1, t6 # e0 : mask out bits already seen
stq_u t0, 0(a0) # e0 : store first output word nop # .. e1 :
or t6, t2, t2 # .. e1 : stq_u t0, 0(a0) # e0 : store first output word
cmpbge zero, t2, t8 # e0 : find nulls in second partial or t6, t2, t2 # .. e1 :
addq a0, 8, a0 # .. e1 : cmpbge zero, t2, t8 # e0 : find nulls in second partial
subq a2, 1, a2 # e0 : addq a0, 8, a0 # .. e1 :
subq a2, 1, a2 # e0 :
bne t8, $u_late_head_exit # .. e1 : bne t8, $u_late_head_exit # .. e1 :
/* Finally, we've got all the stupid leading edge cases taken care /* Finally, we've got all the stupid leading edge cases taken care
of and we can set up to enter the main loop. */ of and we can set up to enter the main loop. */
extql t2, a1, t1 # e0 : position hi-bits of lo word extql t2, a1, t1 # e0 : position hi-bits of lo word
ldq_u t2, 8(a1) # .. e1 : read next high-order source word beq a2, $u_eoc # .. e1 :
addq a1, 8, a1 # e0 : ldq_u t2, 8(a1) # e0 : read next high-order source word
cmpbge zero, t2, t8 # e1 (stall) addq a1, 8, a1 # .. e1 :
beq a2, $u_eoc # e1 : extqh t2, a1, t0 # e0 : position lo-bits of hi word (stall)
bne t8, $u_eos # e1 : cmpbge zero, t2, t8 # .. e1 :
nop # e0 :
bne t8, $u_eos # .. e1 :
/* Unaligned copy main loop. In order to avoid reading too much, /* Unaligned copy main loop. In order to avoid reading too much,
the loop is structured to detect zeros in aligned source words. the loop is structured to detect zeros in aligned source words.
...@@ -193,6 +196,7 @@ $u_head: ...@@ -193,6 +196,7 @@ $u_head:
to run as fast as possible. to run as fast as possible.
On entry to this basic block: On entry to this basic block:
t0 == the shifted low-order bits from the current source word
t1 == the shifted high-order bits from the previous source word t1 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word t2 == the unshifted current source word
...@@ -200,18 +204,18 @@ $u_head: ...@@ -200,18 +204,18 @@ $u_head:
.align 3 .align 3
$u_loop: $u_loop:
extqh t2, a1, t0 # e0 : extract high bits for current word
addq a1, 8, a1 # .. e1 :
extql t2, a1, t3 # e0 : extract low bits for next time
addq a0, 8, a0 # .. e1 :
or t0, t1, t0 # e0 : current dst word now complete or t0, t1, t0 # e0 : current dst word now complete
ldq_u t2, 0(a1) # .. e1 : load high word for next time subq a2, 1, a2 # .. e1 : decrement word count
stq_u t0, -8(a0) # e0 : save the current word stq_u t0, 0(a0) # e0 : save the current word
mov t3, t1 # .. e1 : addq a0, 8, a0 # .. e1 :
subq a2, 1, a2 # e0 : extql t2, a1, t1 # e0 : extract high bits for next time
cmpbge zero, t2, t8 # .. e1 : test new word for eos beq a2, $u_eoc # .. e1 :
beq a2, $u_eoc # e1 : ldq_u t2, 0(a1) # e0 : load high word for next time
beq t8, $u_loop # e1 : addq a1, 8, a1 # .. e1 :
nop # e0 :
cmpbge zero, t2, t8 # e1 : test new word for eos (stall)
extqh t2, a1, t0 # e0 : extract low bits for current word
beq t8, $u_loop # .. e1 :
/* We've found a zero somewhere in the source word we just read. /* We've found a zero somewhere in the source word we just read.
If it resides in the lower half, we have one (probably partial) If it resides in the lower half, we have one (probably partial)
...@@ -219,12 +223,12 @@ $u_loop: ...@@ -219,12 +223,12 @@ $u_loop:
have one full and one partial word left to write out. have one full and one partial word left to write out.
On entry to this basic block: On entry to this basic block:
t0 == the shifted low-order bits from the current source word
t1 == the shifted high-order bits from the previous source word t1 == the shifted high-order bits from the previous source word
t2 == the unshifted current source word. */ t2 == the unshifted current source word. */
$u_eos: $u_eos:
extqh t2, a1, t0 # e0 : or t0, t1, t0 # e0 : first (partial) source word complete
or t0, t1, t0 # e1 : first (partial) source word complete nop # .. e1 :
cmpbge zero, t0, t8 # e0 : is the null in this first bit? cmpbge zero, t0, t8 # e0 : is the null in this first bit?
bne t8, $u_final # .. e1 (zdb) bne t8, $u_final # .. e1 (zdb)
...@@ -260,10 +264,22 @@ $u_final: ...@@ -260,10 +264,22 @@ $u_final:
1: stq_u t0, 0(a0) # e0 : 1: stq_u t0, 0(a0) # e0 :
ret (t9) # .. e1 : ret (t9) # .. e1 :
$u_eoc: # end-of-count /* Got to end-of-count before end of string.
extqh t2, a1, t0 On entry to this basic block:
or t0, t1, t0 t1 == the shifted high-order bits from the previous source word */
cmpbge zero, t0, t8 $u_eoc:
and a1, 7, t6 # e1 :
sll t12, t6, t6 # e0 :
and t6, 0xff, t6 # e0 :
bne t6, 1f # .. e1 :
ldq_u t2, 8(a1) # e0 : load final src word
nop # .. e1 :
extqh t2, a1, t0 # e0 : extract low bits for last word
or t1, t0, t1 # e1 :
1: cmpbge zero, t1, t7
mov t1, t0
$u_eocfin: # end-of-count, final word $u_eocfin: # end-of-count, final word
or t10, t8, t8 or t10, t8, t8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment