Commit 5d7244e7 authored by Jan Beulich's avatar Jan Beulich Committed by Ingo Molnar

x86-64: Fix memset() to support sizes of 4Gb and above

While currently there doesn't appear to be any reachable in-tree
case where such large memory blocks may be passed to memset()
(alloc_bootmem() being the primary non-reachable one, as it gets
called with suitably large sizes in FLATMEM configurations), we
have recently hit the problem a second time in our Xen kernels.

Rather than working around it a second time, prevent others from
falling into the same trap by fixing this long standing
limitation.
Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F05D992020000780006AA09@nat28.tlf.novell.comSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 42693290
...@@ -19,16 +19,15 @@ ...@@ -19,16 +19,15 @@
.section .altinstr_replacement, "ax", @progbits .section .altinstr_replacement, "ax", @progbits
.Lmemset_c: .Lmemset_c:
movq %rdi,%r9 movq %rdi,%r9
movl %edx,%r8d movq %rdx,%rcx
andl $7,%r8d andl $7,%edx
movl %edx,%ecx shrq $3,%rcx
shrl $3,%ecx
/* expand byte value */ /* expand byte value */
movzbl %sil,%esi movzbl %sil,%esi
movabs $0x0101010101010101,%rax movabs $0x0101010101010101,%rax
mulq %rsi /* with rax, clobbers rdx */ imulq %rsi,%rax
rep stosq rep stosq
movl %r8d,%ecx movl %edx,%ecx
rep stosb rep stosb
movq %r9,%rax movq %r9,%rax
ret ret
...@@ -50,7 +49,7 @@ ...@@ -50,7 +49,7 @@
.Lmemset_c_e: .Lmemset_c_e:
movq %rdi,%r9 movq %rdi,%r9
movb %sil,%al movb %sil,%al
movl %edx,%ecx movq %rdx,%rcx
rep stosb rep stosb
movq %r9,%rax movq %r9,%rax
ret ret
...@@ -61,12 +60,11 @@ ENTRY(memset) ...@@ -61,12 +60,11 @@ ENTRY(memset)
ENTRY(__memset) ENTRY(__memset)
CFI_STARTPROC CFI_STARTPROC
movq %rdi,%r10 movq %rdi,%r10
movq %rdx,%r11
/* expand byte value */ /* expand byte value */
movzbl %sil,%ecx movzbl %sil,%ecx
movabs $0x0101010101010101,%rax movabs $0x0101010101010101,%rax
mul %rcx /* with rax, clobbers rdx */ imulq %rcx,%rax
/* align dst */ /* align dst */
movl %edi,%r9d movl %edi,%r9d
...@@ -75,13 +73,13 @@ ENTRY(__memset) ...@@ -75,13 +73,13 @@ ENTRY(__memset)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
.Lafter_bad_alignment: .Lafter_bad_alignment:
movl %r11d,%ecx movq %rdx,%rcx
shrl $6,%ecx shrq $6,%rcx
jz .Lhandle_tail jz .Lhandle_tail
.p2align 4 .p2align 4
.Lloop_64: .Lloop_64:
decl %ecx decq %rcx
movq %rax,(%rdi) movq %rax,(%rdi)
movq %rax,8(%rdi) movq %rax,8(%rdi)
movq %rax,16(%rdi) movq %rax,16(%rdi)
...@@ -97,7 +95,7 @@ ENTRY(__memset) ...@@ -97,7 +95,7 @@ ENTRY(__memset)
to predict jump tables. */ to predict jump tables. */
.p2align 4 .p2align 4
.Lhandle_tail: .Lhandle_tail:
movl %r11d,%ecx movl %edx,%ecx
andl $63&(~7),%ecx andl $63&(~7),%ecx
jz .Lhandle_7 jz .Lhandle_7
shrl $3,%ecx shrl $3,%ecx
...@@ -109,12 +107,11 @@ ENTRY(__memset) ...@@ -109,12 +107,11 @@ ENTRY(__memset)
jnz .Lloop_8 jnz .Lloop_8
.Lhandle_7: .Lhandle_7:
movl %r11d,%ecx andl $7,%edx
andl $7,%ecx
jz .Lende jz .Lende
.p2align 4 .p2align 4
.Lloop_1: .Lloop_1:
decl %ecx decl %edx
movb %al,(%rdi) movb %al,(%rdi)
leaq 1(%rdi),%rdi leaq 1(%rdi),%rdi
jnz .Lloop_1 jnz .Lloop_1
...@@ -125,13 +122,13 @@ ENTRY(__memset) ...@@ -125,13 +122,13 @@ ENTRY(__memset)
CFI_RESTORE_STATE CFI_RESTORE_STATE
.Lbad_alignment: .Lbad_alignment:
cmpq $7,%r11 cmpq $7,%rdx
jbe .Lhandle_7 jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */ movq %rax,(%rdi) /* unaligned store */
movq $8,%r8 movq $8,%r8
subq %r9,%r8 subq %r9,%r8
addq %r8,%rdi addq %r8,%rdi
subq %r8,%r11 subq %r8,%rdx
jmp .Lafter_bad_alignment jmp .Lafter_bad_alignment
.Lfinal: .Lfinal:
CFI_ENDPROC CFI_ENDPROC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment