Commit a7bea830 authored by Jan Beulich's avatar Jan Beulich Committed by Ingo Molnar

x86/asm/64: Use 32-bit XOR to zero registers

Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms. Zeroing
idioms don't require execution bandwidth, as they're being taken care
of in the frontend (through register renaming). Use 32-bit XORs instead.
Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: davem@davemloft.net
Cc: herbert@gondor.apana.org.au
Cc: pavel@ucw.cz
Cc: rjw@rjwysocki.net
Link: http://lkml.kernel.org/r/5B39FF1A02000078001CFB54@prv1-mh.provo.novell.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent c5fcdbf1
...@@ -75,7 +75,7 @@ ...@@ -75,7 +75,7 @@
* %r9 * %r9
*/ */
__load_partial: __load_partial:
xor %r9, %r9 xor %r9d, %r9d
pxor MSG, MSG pxor MSG, MSG
mov LEN, %r8 mov LEN, %r8
......
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
* %r9 * %r9
*/ */
__load_partial: __load_partial:
xor %r9, %r9 xor %r9d, %r9d
pxor MSG0, MSG0 pxor MSG0, MSG0
pxor MSG1, MSG1 pxor MSG1, MSG1
......
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
* %r9 * %r9
*/ */
__load_partial: __load_partial:
xor %r9, %r9 xor %r9d, %r9d
pxor MSG, MSG pxor MSG, MSG
mov LEN, %r8 mov LEN, %r8
......
...@@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff ...@@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.macro GCM_INIT Iv SUBKEY AAD AADLEN .macro GCM_INIT Iv SUBKEY AAD AADLEN
mov \AADLEN, %r11 mov \AADLEN, %r11
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
xor %r11, %r11 xor %r11d, %r11d
mov %r11, InLen(%arg2) # ctx_data.in_length = 0 mov %r11, InLen(%arg2) # ctx_data.in_length = 0
mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
...@@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff ...@@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu HashKey(%arg2), %xmm13 movdqu HashKey(%arg2), %xmm13
add %arg5, InLen(%arg2) add %arg5, InLen(%arg2)
xor %r11, %r11 # initialise the data pointer offset as zero xor %r11d, %r11d # initialise the data pointer offset as zero
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
sub %r11, %arg5 # sub partial block data used sub %r11, %arg5 # sub partial block data used
...@@ -702,7 +702,7 @@ _no_extra_mask_1_\@: ...@@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
# GHASH computation for the last <16 Byte block # GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %rax,%rax xor %eax, %eax
mov %rax, PBlockLen(%arg2) mov %rax, PBlockLen(%arg2)
jmp _dec_done_\@ jmp _dec_done_\@
...@@ -737,7 +737,7 @@ _no_extra_mask_2_\@: ...@@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
# GHASH computation for the last <16 Byte block # GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %rax,%rax xor %eax, %eax
mov %rax, PBlockLen(%arg2) mov %rax, PBlockLen(%arg2)
jmp _encode_done_\@ jmp _encode_done_\@
......
...@@ -463,7 +463,7 @@ _get_AAD_rest_final\@: ...@@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
_get_AAD_done\@: _get_AAD_done\@:
# initialize the data pointer offset as zero # initialize the data pointer offset as zero
xor %r11, %r11 xor %r11d, %r11d
# start AES for num_initial_blocks blocks # start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0 mov arg5, %rax # rax = *Y0
...@@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@: ...@@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
_get_AAD_done\@: _get_AAD_done\@:
# initialize the data pointer offset as zero # initialize the data pointer offset as zero
xor %r11, %r11 xor %r11d, %r11d
# start AES for num_initial_blocks blocks # start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0 mov arg5, %rax # rax = *Y0
......
...@@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero) ...@@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero)
* %r9 * %r9
*/ */
__load_partial: __load_partial:
xor %r9, %r9 xor %r9d, %r9d
vpxor MSG, MSG, MSG vpxor MSG, MSG, MSG
mov %rcx, %r8 mov %rcx, %r8
......
...@@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero) ...@@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero)
* %r9 * %r9
*/ */
__load_partial: __load_partial:
xor %r9, %r9 xor %r9d, %r9d
pxor MSG_LO, MSG_LO pxor MSG_LO, MSG_LO
pxor MSG_HI, MSG_HI pxor MSG_HI, MSG_HI
......
...@@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero) ...@@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero)
* %r9 * %r9
*/ */
__load_partial: __load_partial:
xor %r9, %r9 xor %r9d, %r9d
pxor MSG, MSG pxor MSG, MSG
mov %rcx, %r8 mov %rcx, %r8
......
...@@ -96,7 +96,7 @@ ...@@ -96,7 +96,7 @@
# cleanup workspace # cleanup workspace
mov $8, %ecx mov $8, %ecx
mov %rsp, %rdi mov %rsp, %rdi
xor %rax, %rax xor %eax, %eax
rep stosq rep stosq
mov %rbp, %rsp # deallocate workspace mov %rbp, %rsp # deallocate workspace
......
...@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64) ...@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
* address given in m16:64. * address given in m16:64.
*/ */
pushq $.Lafter_lret # put return address on stack for unwinder pushq $.Lafter_lret # put return address on stack for unwinder
xorq %rbp, %rbp # clear frame pointer xorl %ebp, %ebp # clear frame pointer
movq initial_code(%rip), %rax movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space pushq %rax # target address in negative space
......
...@@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax"); ...@@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS) #if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
#endif #endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
......
...@@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe) ...@@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe)
/* Copy successful. Return zero */ /* Copy successful. Return zero */
.L_done_memcpy_trap: .L_done_memcpy_trap:
xorq %rax, %rax xorl %eax, %eax
ret ret
ENDPROC(__memcpy_mcsafe) ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
......
...@@ -137,7 +137,7 @@ ENTRY(restore_registers) ...@@ -137,7 +137,7 @@ ENTRY(restore_registers)
/* Saved in save_processor_state. */ /* Saved in save_processor_state. */
lgdt saved_context_gdt_desc(%rax) lgdt saved_context_gdt_desc(%rax)
xorq %rax, %rax xorl %eax, %eax
/* tell the hibernation core that we've just restored the memory */ /* tell the hibernation core that we've just restored the memory */
movq %rax, in_suspend(%rip) movq %rax, in_suspend(%rip)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment