• Uros Bizjak's avatar
    locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions · 95ece481
    Uros Bizjak authored
    Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions to
    use arch_atomic64_try_cmpxchg().  This implementation avoids one extra
    trip through the CMPXCHG loop.
    
    The value preload before the cmpxchg loop does not need to be atomic.
    Use arch_atomic64_read_nonatomic(v) to load the value from atomic_t
    location in a non-atomic way.
    
    The generated code improves from:
    
      1917d5:	31 c9                	xor    %ecx,%ecx
      1917d7:	31 db                	xor    %ebx,%ebx
      1917d9:	89 4c 24 3c          	mov    %ecx,0x3c(%esp)
      1917dd:	8b 74 24 24          	mov    0x24(%esp),%esi
      1917e1:	89 c8                	mov    %ecx,%eax
      1917e3:	89 5c 24 34          	mov    %ebx,0x34(%esp)
      1917e7:	8b 7c 24 28          	mov    0x28(%esp),%edi
      1917eb:	21 ce                	and    %ecx,%esi
      1917ed:	89 74 24 4c          	mov    %esi,0x4c(%esp)
      1917f1:	21 df                	and    %ebx,%edi
      1917f3:	89 de                	mov    %ebx,%esi
      1917f5:	89 7c 24 50          	mov    %edi,0x50(%esp)
      1917f9:	8b 54 24 4c          	mov    0x4c(%esp),%edx
      1917fd:	8b 7c 24 2c          	mov    0x2c(%esp),%edi
      191801:	8b 4c 24 50          	mov    0x50(%esp),%ecx
      191805:	89 d3                	mov    %edx,%ebx
      191807:	89 f2                	mov    %esi,%edx
      191809:	f0 0f c7 0f          	lock cmpxchg8b (%edi)
      19180d:	89 c1                	mov    %eax,%ecx
      19180f:	8b 74 24 34          	mov    0x34(%esp),%esi
      191813:	89 d3                	mov    %edx,%ebx
      191815:	89 44 24 4c          	mov    %eax,0x4c(%esp)
      191819:	8b 44 24 3c          	mov    0x3c(%esp),%eax
      19181d:	89 df                	mov    %ebx,%edi
      19181f:	89 54 24 44          	mov    %edx,0x44(%esp)
      191823:	89 ca                	mov    %ecx,%edx
      191825:	31 de                	xor    %ebx,%esi
      191827:	31 c8                	xor    %ecx,%eax
      191829:	09 f0                	or     %esi,%eax
      19182b:	75 ac                	jne    1917d9 <...>
    
    to:
    
      1912ba:	8b 06                	mov    (%esi),%eax
      1912bc:	8b 56 04             	mov    0x4(%esi),%edx
      1912bf:	89 44 24 3c          	mov    %eax,0x3c(%esp)
      1912c3:	89 c1                	mov    %eax,%ecx
      1912c5:	23 4c 24 34          	and    0x34(%esp),%ecx
      1912c9:	89 d3                	mov    %edx,%ebx
      1912cb:	23 5c 24 38          	and    0x38(%esp),%ebx
      1912cf:	89 54 24 40          	mov    %edx,0x40(%esp)
      1912d3:	89 4c 24 2c          	mov    %ecx,0x2c(%esp)
      1912d7:	89 5c 24 30          	mov    %ebx,0x30(%esp)
      1912db:	8b 5c 24 2c          	mov    0x2c(%esp),%ebx
      1912df:	8b 4c 24 30          	mov    0x30(%esp),%ecx
      1912e3:	f0 0f c7 0e          	lock cmpxchg8b (%esi)
      1912e7:	0f 85 f3 02 00 00    	jne    1915e0 <...>
    Signed-off-by: default avatarUros Bizjak <ubizjak@gmail.com>
    Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Link: https://lore.kernel.org/r/20240410062957.322614-3-ubizjak@gmail.com
    95ece481
atomic64_32.h 7.8 KB