Commit b9ec40af authored by Christoph Lameter's avatar Christoph Lameter Committed by Tejun Heo

percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support

Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b
instructions.

-tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and
     other cosmetic changes.
Signed-off-by: default avatarChristoph Lameter <cl@linux.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 7c334339
...@@ -451,6 +451,26 @@ do { \ ...@@ -451,6 +451,26 @@ do { \
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#endif /* !CONFIG_M386 */ #endif /* !CONFIG_M386 */
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \
({ \
char __ret; \
typeof(o1) __o1 = o1; \
typeof(o1) __n1 = n1; \
typeof(o2) __o2 = o2; \
typeof(o2) __n2 = n2; \
typeof(o2) __dummy = n2; \
asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
: "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \
: "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \
__ret; \
})
#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#endif /* CONFIG_X86_CMPXCHG64 */
/* /*
* Per cpu atomic 64 bit operations are only available under 64 bit. * Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations. * 32 bit must fall back to generic operations.
...@@ -480,6 +500,34 @@ do { \ ...@@ -480,6 +500,34 @@ do { \
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
* is not supported on early AMD64 processors so we must be able to emulate
* it in software. The address used in the cmpxchg16 instruction must be
* aligned to a 16 byte boundary.
*/
#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
({ \
char __ret; \
typeof(o1) __o1 = o1; \
typeof(o1) __n1 = n1; \
typeof(o2) __o2 = o2; \
typeof(o2) __n2 = n2; \
typeof(o2) __dummy; \
alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
"cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \
X86_FEATURE_CX16, \
ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
"S" (&pcp1), "b"(__n1), "c"(__n2), \
"a"(__o1), "d"(__o2)); \
__ret; \
})
#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
#endif #endif
/* This is not atomic against other CPUs -- CPU preemption needs to be off */ /* This is not atomic against other CPUs -- CPU preemption needs to be off */
......
...@@ -42,4 +42,5 @@ else ...@@ -42,4 +42,5 @@ else
lib-y += memmove_64.o memset_64.o lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
lib-y += cmpxchg16b_emu.o
endif endif
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*
*/
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>
.text
/*
* Inputs:
* %rsi : memory location to compare
* %rax : low 64 bits of old value
* %rdx : high 64 bits of old value
* %rbx : low 64 bits of new value
* %rcx : high 64 bits of new value
* %al : Operation successful
*/
ENTRY(this_cpu_cmpxchg16b_emu)
CFI_STARTPROC
#
# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
# via the ZF. Caller will access %al to get result.
#
# Note that this is only useful for a cpuops operation. Meaning that we
# do *not* have a fully atomic operation but just an operation that is
# *atomic* on a single cpu (as provided by the this_cpu_xx class of
# macros).
#
this_cpu_cmpxchg16b_emu:
pushf
cli
cmpq %gs:(%rsi), %rax
jne not_same
cmpq %gs:8(%rsi), %rdx
jne not_same
movq %rbx, %gs:(%rsi)
movq %rcx, %gs:8(%rsi)
popf
mov $1, %al
ret
not_same:
popf
xor %al,%al
ret
CFI_ENDPROC
ENDPROC(this_cpu_cmpxchg16b_emu)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment