• Christophe Leroy's avatar
    powerpc/uaccess: Switch __put_user_size_allowed() to __put_user_asm_goto() · ee0a49a6
    Christophe Leroy authored
    __put_user_asm_goto() provides more flexibility to GCC and avoids using
    a local variable to tell if the write succeeded or not.
    GCC can then avoid implementing a cmp in the fast path.
    
    See the difference for a small function like the PPC64 version of
    save_general_regs() in arch/powerpc/kernel/signal_32.c:
    
    Before the patch (unreachable nop removed):
    
    0000000000000c10 <.save_general_regs>:
         c10:	39 20 00 2c 	li      r9,44
         c14:	39 40 00 00 	li      r10,0
         c18:	7d 29 03 a6 	mtctr   r9
         c1c:	38 c0 00 00 	li      r6,0
         c20:	48 00 00 14 	b       c34 <.save_general_regs+0x24>
         c30:	42 40 00 40 	bdz     c70 <.save_general_regs+0x60>
         c34:	28 2a 00 27 	cmpldi  r10,39
         c38:	7c c8 33 78 	mr      r8,r6
         c3c:	79 47 1f 24 	rldicr  r7,r10,3,60
         c40:	39 20 00 01 	li      r9,1
         c44:	41 82 00 0c 	beq     c50 <.save_general_regs+0x40>
         c48:	7d 23 38 2a 	ldx     r9,r3,r7
         c4c:	79 29 00 20 	clrldi  r9,r9,32
         c50:	91 24 00 00 	stw     r9,0(r4)
         c54:	2c 28 00 00 	cmpdi   r8,0
         c58:	39 4a 00 01 	addi    r10,r10,1
         c5c:	38 84 00 04 	addi    r4,r4,4
         c60:	41 82 ff d0 	beq     c30 <.save_general_regs+0x20>
         c64:	38 60 ff f2 	li      r3,-14
         c68:	4e 80 00 20 	blr
         c70:	38 60 00 00 	li      r3,0
         c74:	4e 80 00 20 	blr
    
    0000000000000000 <.fixup>:
      cc:	39 00 ff f2 	li      r8,-14
      d0:	48 00 00 00 	b       d0 <.fixup+0xd0>
    			d0: R_PPC64_REL24	.text+0xc54
    
    After the patch:
    
    0000000000001490 <.save_general_regs>:
        1490:	39 20 00 2c 	li      r9,44
        1494:	39 40 00 00 	li      r10,0
        1498:	7d 29 03 a6 	mtctr   r9
        149c:	60 00 00 00 	nop
        14a0:	28 2a 00 27 	cmpldi  r10,39
        14a4:	79 48 1f 24 	rldicr  r8,r10,3,60
        14a8:	39 20 00 01 	li      r9,1
        14ac:	41 82 00 0c 	beq     14b8 <.save_general_regs+0x28>
        14b0:	7d 23 40 2a 	ldx     r9,r3,r8
        14b4:	79 29 00 20 	clrldi  r9,r9,32
        14b8:	91 24 00 00 	stw     r9,0(r4)
        14bc:	39 4a 00 01 	addi    r10,r10,1
        14c0:	38 84 00 04 	addi    r4,r4,4
        14c4:	42 00 ff dc 	bdnz    14a0 <.save_general_regs+0x10>
        14c8:	38 60 00 00 	li      r3,0
        14cc:	4e 80 00 20 	blr
        14d0:	38 60 ff f2 	li      r3,-14
        14d4:	4e 80 00 20 	blr
    Signed-off-by: default avatarChristophe Leroy <christophe.leroy@csgroup.eu>
    Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
    Link: https://lore.kernel.org/r/94ba5a5138f99522e1562dbcdb38d31aa790dc89.1599216721.git.christophe.leroy@csgroup.eu
    ee0a49a6
uaccess.h 17.4 KB