Commit 3a6bfbc9 authored by Davidlohr Bueso's avatar Davidlohr Bueso Committed by Ingo Molnar

arch, locking: Ciao arch_mutex_cpu_relax()

The arch_mutex_cpu_relax() function, introduced by 34b133f8, is
hacky and ugly. It was added a few years ago to address the fact
that common cpu_relax() calls include yielding on s390, and thus
impact the optimistic spinning functionality of mutexes. Nowadays
we use this function well beyond mutexes: rwsem, qrwlock, mcs and
lockref. Since the macro that defines the call is in the mutex header,
any users must include mutex.h and the naming is misleading as well.

This patch (i) renames the call to cpu_relax_lowlatency  ("relax, but
only if you can do it with very low latency") and (ii) defines it in
each arch's asm/processor.h local header, just like for regular cpu_relax
functions. On all archs, except s390, cpu_relax_lowlatency is simply cpu_relax,
and thus we can take it out of mutex.h. While this can seem redundant,
I believe it is a good choice as it allows us to move out arch specific
logic from generic locking primitives and enables future(?) archs to
transparently define it, similarly to System Z.
Signed-off-by: default avatarDavidlohr Bueso <davidlohr@hp.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bharat Bhushan <r65777@freescale.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Howells <dhowells@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Cc: Dominik Dingel <dingel@linux.vnet.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Joseph Myers <joseph@codesourcery.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Stratos Karafotis <stratosk@semaphore.gr>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vasily Kulikov <segoon@openwall.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: adi-buildroot-devel@lists.sourceforge.net
Cc: linux390@de.ibm.com
Cc: linux-alpha@vger.kernel.org
Cc: linux-am33-list@redhat.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-cris-kernel@axis.com
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux@lists.openrisc.net
Cc: linux-m32r-ja@ml.linux-m32r.org
Cc: linux-m32r@ml.linux-m32r.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-metag@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/1404079773.2619.4.camel@buesod1.americas.hpqcorp.netSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent acf59377
......@@ -57,6 +57,7 @@ unsigned long get_wchan(struct task_struct *p);
((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp)
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
......
......@@ -62,6 +62,8 @@ unsigned long thread_saved_pc(struct task_struct *t);
#define cpu_relax() do { } while (0)
#endif
#define cpu_relax_lowlatency() cpu_relax()
#define copy_segments(tsk, mm) do { } while (0)
#define release_segments(mm) do { } while (0)
......
......@@ -82,6 +82,8 @@ unsigned long get_wchan(struct task_struct *p);
#define cpu_relax() barrier()
#endif
#define cpu_relax_lowlatency() cpu_relax()
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
......
......@@ -129,6 +129,7 @@ extern void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
/* Thread switching */
extern struct task_struct *cpu_switch_to(struct task_struct *prev,
......
......@@ -92,6 +92,7 @@ extern struct avr32_cpuinfo boot_cpu_data;
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory")
struct cpu_context {
......
......@@ -99,7 +99,7 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp)
#define cpu_relax() smp_mb()
#define cpu_relax_lowlatency() cpu_relax()
/* Get the Silicon Revision of the chip */
static inline uint32_t __pure bfin_revid(void)
......
......@@ -121,6 +121,7 @@ extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#define cpu_relax() do { } while (0)
#define cpu_relax_lowlatency() cpu_relax()
extern const struct seq_operations cpuinfo_op;
......
......@@ -63,6 +63,7 @@ static inline void release_thread(struct task_struct *dead_task)
#define init_stack (init_thread_union.stack)
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
void default_idle(void);
......
......@@ -56,6 +56,7 @@ struct thread_struct {
}
#define cpu_relax() __vmyield()
#define cpu_relax_lowlatency() cpu_relax()
/*
* Decides where the kernel will search for a free chunk of vm space during
......
......@@ -548,6 +548,7 @@ ia64_eoi (void)
}
#define cpu_relax() ia64_hint(ia64_hint_pause)
#define cpu_relax_lowlatency() cpu_relax()
static inline int
ia64_get_irr(unsigned int vector)
......
......@@ -133,5 +133,6 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) ((tsk)->thread.sp)
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#endif /* _ASM_M32R_PROCESSOR_H */
......@@ -176,5 +176,6 @@ unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) ((tsk)->thread.esp0))
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#endif
......@@ -155,6 +155,7 @@ unsigned long get_wchan(struct task_struct *p);
#define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0)
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
extern void setup_priv(void);
......
......@@ -22,6 +22,7 @@
extern const struct seq_operations cpuinfo_op;
# define cpu_relax() barrier()
# define cpu_relax_lowlatency() cpu_relax()
#define task_pt_regs(tsk) \
(((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1)
......
......@@ -367,6 +367,7 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
/*
* Return_address is a replacement for __builtin_return_address(count)
......
......@@ -68,7 +68,9 @@ extern struct mn10300_cpuinfo cpu_data[];
extern void identify_cpu(struct mn10300_cpuinfo *);
extern void print_cpu_info(struct mn10300_cpuinfo *);
extern void dodgy_tsc(void);
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
/*
* User space process size: 1.75GB (default).
......
......@@ -101,6 +101,7 @@ extern unsigned long thread_saved_pc(struct task_struct *t);
#define init_stack (init_thread_union.stack)
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#endif /* __ASSEMBLY__ */
#endif /* __ASM_OPENRISC_PROCESSOR_H */
......@@ -338,6 +338,7 @@ extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30])
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
/* Used as a macro to identify the combined VIPT/PIPT cached
* CPUs which require a guarantee of coherency (no inequivalent
......
......@@ -400,6 +400,8 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#define cpu_relax() barrier()
#endif
#define cpu_relax_lowlatency() cpu_relax()
/* Check that a certain kernel stack pointer is valid in task_struct p */
int validate_sp(unsigned long sp, struct task_struct *p,
unsigned long nbytes);
......
......@@ -217,7 +217,7 @@ static inline void cpu_relax(void)
barrier();
}
#define arch_mutex_cpu_relax() barrier()
#define cpu_relax_lowlatency() barrier()
static inline void psw_set_key(unsigned int key)
{
......
......@@ -24,6 +24,7 @@ extern unsigned long get_wchan(struct task_struct *p);
#define current_text_addr() ({ __label__ _l; _l: &&_l; })
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#define release_thread(thread) do {} while (0)
/*
......
......@@ -97,6 +97,7 @@ extern struct sh_cpuinfo cpu_data[];
#define cpu_sleep() __asm__ __volatile__ ("sleep" : : : "memory")
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
void default_idle(void);
void stop_this_cpu(void *);
......
......@@ -119,6 +119,8 @@ extern struct task_struct *last_task_used_math;
int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
extern void (*sparc_idle)(void);
#endif
......
......@@ -216,6 +216,7 @@ unsigned long get_wchan(struct task_struct *task);
"nop\n\t" \
".previous" \
::: "memory")
#define cpu_relax_lowlatency() cpu_relax()
/* Prefetch support. This is tuned for UltraSPARC-III and later.
* UltraSPARC-I will treat these as nops, and UltraSPARC-II has
......
......@@ -266,6 +266,8 @@ static inline void cpu_relax(void)
barrier();
}
#define cpu_relax_lowlatency() cpu_relax()
/* Info on this processor (see fs/proc/cpuinfo.c) */
struct seq_operations;
extern const struct seq_operations cpuinfo_op;
......
......@@ -71,6 +71,7 @@ extern void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
......
......@@ -696,6 +696,8 @@ static inline void cpu_relax(void)
rep_nop();
}
#define cpu_relax_lowlatency() cpu_relax()
/* Stop speculative execution and prefetching of modified code. */
static inline void sync_core(void)
{
......
......@@ -26,6 +26,7 @@ static inline void rep_nop(void)
}
#define cpu_relax() rep_nop()
#define cpu_relax_lowlatency() cpu_relax()
#include <asm/processor-generic.h>
......
......@@ -182,6 +182,7 @@ extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1])
#define cpu_relax() barrier()
#define cpu_relax_lowlatency() cpu_relax()
/* Special register access. */
......
......@@ -176,8 +176,4 @@ extern void mutex_unlock(struct mutex *lock);
extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
#ifndef arch_mutex_cpu_relax
# define arch_mutex_cpu_relax() cpu_relax()
#endif
#endif /* __LINUX_MUTEX_H */
#include <linux/percpu.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include "mcs_spinlock.h"
......@@ -79,7 +77,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
break;
}
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
return next;
......@@ -120,7 +118,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
if (need_resched())
goto unqueue;
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
return true;
......@@ -146,7 +144,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
if (smp_load_acquire(&node->locked))
return true;
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
/*
* Or we race against a concurrent unqueue()'s step-B, in which
......
......@@ -27,7 +27,7 @@ struct mcs_spinlock {
#define arch_mcs_spin_lock_contended(l) \
do { \
while (!(smp_load_acquire(l))) \
arch_mutex_cpu_relax(); \
cpu_relax_lowlatency(); \
} while (0)
#endif
......@@ -104,7 +104,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
return;
/* Wait until the next pointer is set */
while (!(next = ACCESS_ONCE(node->next)))
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
/* Pass lock to next waiter. */
......
......@@ -146,7 +146,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
if (need_resched())
break;
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
rcu_read_unlock();
......@@ -464,7 +464,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
osq_unlock(&lock->osq);
slowpath:
......
......@@ -20,7 +20,6 @@
#include <linux/cpumask.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/mutex.h>
#include <asm/qrwlock.h>
/**
......@@ -35,7 +34,7 @@ static __always_inline void
rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
{
while ((cnts & _QW_WMASK) == _QW_LOCKED) {
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
cnts = smp_load_acquire((u32 *)&lock->cnts);
}
}
......@@ -75,7 +74,7 @@ void queue_read_lock_slowpath(struct qrwlock *lock)
* to make sure that the write lock isn't taken.
*/
while (atomic_read(&lock->cnts) & _QW_WMASK)
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
rspin_until_writer_unlock(lock, cnts);
......@@ -114,7 +113,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
cnts | _QW_WAITING) == cnts))
break;
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
/* When no more readers, set the locked flag */
......@@ -125,7 +124,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
_QW_LOCKED) == _QW_WAITING))
break;
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
unlock:
arch_spin_unlock(&lock->lock);
......
......@@ -329,7 +329,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
if (need_resched())
break;
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
rcu_read_unlock();
......@@ -381,7 +381,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
arch_mutex_cpu_relax();
cpu_relax_lowlatency();
}
osq_unlock(&sem->osq);
done:
......
#include <linux/export.h>
#include <linux/lockref.h>
#include <linux/mutex.h>
#if USE_CMPXCHG_LOCKREF
......@@ -29,7 +28,7 @@
if (likely(old.lock_count == prev.lock_count)) { \
SUCCESS; \
} \
arch_mutex_cpu_relax(); \
cpu_relax_lowlatency(); \
} \
} while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment