Commit f233f7f1 authored by Peter Zijlstra (Intel)'s avatar Peter Zijlstra (Intel) Committed by Ingo Molnar

locking/pvqspinlock, x86: Implement the paravirt qspinlock call patching

We use the regular paravirt call patching to switch between:

  native_queued_spin_lock_slowpath()	__pv_queued_spin_lock_slowpath()
  native_queued_spin_unlock()		__pv_queued_spin_unlock()

We use a callee saved call for the unlock function which reduces the
i-cache footprint and allows 'inlining' of SPIN_UNLOCK functions
again.

We further optimize the unlock path by patching the direct call with a
"movb $0,%arg1" if we are indeed using the native unlock code. This
makes the unlock code almost as fast as the !PARAVIRT case.

This significantly lowers the overhead of having
CONFIG_PARAVIRT_SPINLOCKS enabled, even for native code.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarWaiman Long <Waiman.Long@hp.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Daniel J Blueman <daniel@numascale.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paolo Bonzini <paolo.bonzini@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: virtualization@lists.linux-foundation.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1429901803-29771-10-git-send-email-Waiman.Long@hp.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent a23db284
...@@ -667,7 +667,7 @@ config PARAVIRT_DEBUG ...@@ -667,7 +667,7 @@ config PARAVIRT_DEBUG
config PARAVIRT_SPINLOCKS config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks" bool "Paravirtualization layer for spinlocks"
depends on PARAVIRT && SMP depends on PARAVIRT && SMP
select UNINLINE_SPIN_UNLOCK select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCK
---help--- ---help---
Paravirtualized spinlocks allow a pvops backend to replace the Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly spinlock implementation with something virtualization-friendly
......
...@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, ...@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#ifdef CONFIG_QUEUED_SPINLOCK
static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
u32 val)
{
PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
}
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
{
PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
}
static __always_inline void pv_wait(u8 *ptr, u8 val)
{
PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
}
static __always_inline void pv_kick(int cpu)
{
PVOP_VCALL1(pv_lock_ops.kick, cpu);
}
#else /* !CONFIG_QUEUED_SPINLOCK */
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket) __ticket_t ticket)
{ {
...@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, ...@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
} }
#endif #endif /* CONFIG_QUEUED_SPINLOCK */
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" #define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
......
...@@ -333,9 +333,19 @@ struct arch_spinlock; ...@@ -333,9 +333,19 @@ struct arch_spinlock;
typedef u16 __ticket_t; typedef u16 __ticket_t;
#endif #endif
struct qspinlock;
struct pv_lock_ops { struct pv_lock_ops {
#ifdef CONFIG_QUEUED_SPINLOCK
void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
struct paravirt_callee_save queued_spin_unlock;
void (*wait)(u8 *ptr, u8 val);
void (*kick)(int cpu);
#else /* !CONFIG_QUEUED_SPINLOCK */
struct paravirt_callee_save lock_spinning; struct paravirt_callee_save lock_spinning;
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
#endif /* !CONFIG_QUEUED_SPINLOCK */
}; };
/* This contains all the paravirt structures: we get a convenient /* This contains all the paravirt structures: we get a convenient
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm-generic/qspinlock_types.h> #include <asm-generic/qspinlock_types.h>
#include <asm/paravirt.h>
#define queued_spin_unlock queued_spin_unlock #define queued_spin_unlock queued_spin_unlock
/** /**
...@@ -11,11 +12,33 @@ ...@@ -11,11 +12,33 @@
* *
* A smp_store_release() on the least-significant byte. * A smp_store_release() on the least-significant byte.
*/ */
static inline void queued_spin_unlock(struct qspinlock *lock) static inline void native_queued_spin_unlock(struct qspinlock *lock)
{ {
smp_store_release((u8 *)lock, 0); smp_store_release((u8 *)lock, 0);
} }
#ifdef CONFIG_PARAVIRT_SPINLOCKS
extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
pv_queued_spin_lock_slowpath(lock, val);
}
static inline void queued_spin_unlock(struct qspinlock *lock)
{
pv_queued_spin_unlock(lock);
}
#else
static inline void queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
}
#endif
#define virt_queued_spin_lock virt_queued_spin_lock #define virt_queued_spin_lock virt_queued_spin_lock
static inline bool virt_queued_spin_lock(struct qspinlock *lock) static inline bool virt_queued_spin_lock(struct qspinlock *lock)
......
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
#endif
...@@ -8,11 +8,33 @@ ...@@ -8,11 +8,33 @@
#include <asm/paravirt.h> #include <asm/paravirt.h>
#ifdef CONFIG_QUEUED_SPINLOCK
__visible void __native_queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
}
PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
bool pv_is_native_spin_unlock(void)
{
return pv_lock_ops.queued_spin_unlock.func ==
__raw_callee_save___native_queued_spin_unlock;
}
#endif
struct pv_lock_ops pv_lock_ops = { struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#ifdef CONFIG_QUEUED_SPINLOCK
.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
.wait = paravirt_nop,
.kick = paravirt_nop,
#else /* !CONFIG_QUEUED_SPINLOCK */
.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
.unlock_kick = paravirt_nop, .unlock_kick = paravirt_nop,
#endif #endif /* !CONFIG_QUEUED_SPINLOCK */
#endif /* SMP */
}; };
EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL(pv_lock_ops);
......
...@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); ...@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{ {
/* arg in %eax, return in %eax */ /* arg in %eax, return in %eax */
...@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) ...@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
return 0; return 0;
} }
extern bool pv_is_native_spin_unlock(void);
unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
{ {
...@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_cpu_ops, read_tsc); PATCH_SITE(pv_cpu_ops, read_tsc);
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
patch_site: case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
ret = paravirt_patch_insns(ibuf, len, start, end); if (pv_is_native_spin_unlock()) {
break; start = start_pv_lock_ops_queued_spin_unlock;
end = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
#endif
default: default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break; break;
patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end);
break;
} }
#undef PATCH_SITE #undef PATCH_SITE
return ret; return ret;
......
...@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); ...@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax"); DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCK)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{ {
return paravirt_patch_insns(insnbuf, len, return paravirt_patch_insns(insnbuf, len,
...@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) ...@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
start__mov64, end__mov64); start__mov64, end__mov64);
} }
extern bool pv_is_native_spin_unlock(void);
unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
{ {
...@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_cpu_ops, clts); PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd); PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCK)
patch_site: case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
ret = paravirt_patch_insns(ibuf, len, start, end); if (pv_is_native_spin_unlock()) {
break; start = start_pv_lock_ops_queued_spin_unlock;
end = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
#endif
default: default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break; break;
patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end);
break;
} }
#undef PATCH_SITE #undef PATCH_SITE
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment