Commit 2c3956c9 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: strengthen I/O and memory barriers

From: Paul Mackerras <paulus@samba.org>

After I sent the recent patch to include/asm-ppc64/io.h which put stronger
barriers in the I/O accessor macros, Paul McKenney pointed out to me that a
writex/outx could still slide out from inside a spinlocked region.  This patch
makes the barriers a bit stronger so that this can't happen.  It means that we
need to use a sync instruction for wmb (a full "heavyweight" sync), since
drivers rely on wmb for ordering between writes to system memory and writes to
a device.

I have left smb_wmb() as a lighter-weight barrier that orders stores, and
doesn't impose an ordering between cacheable and non-cacheable accesses (the
amusingly-named eieio instruction).  I am assuming here that smp_wmb is only
used for ordering stores to system memory so that another cpu will see them in
order.  It can't be used for enforcing any ordering that a device will see,
because it is just a gcc barrier on UP.

This also changes the spinlock/rwlock unlock code to use lwsync ("light-weight
sync") rather than eieio, since eieio doesn't order loads, and we need to
ensure that loads stay inside the spinlocked region.
parent 24f59760
......@@ -316,6 +316,8 @@ _GLOBAL(_insb)
eieio
stbu r5,1(r4)
bdnz 00b
twi 0,r5,0
isync
blr
_GLOBAL(_outsb)
......@@ -325,8 +327,8 @@ _GLOBAL(_outsb)
blelr-
00: lbzu r5,1(r4)
stb r5,0(r3)
eieio
bdnz 00b
sync
blr
_GLOBAL(_insw)
......@@ -338,6 +340,8 @@ _GLOBAL(_insw)
eieio
sthu r5,2(r4)
bdnz 00b
twi 0,r5,0
isync
blr
_GLOBAL(_outsw)
......@@ -346,9 +350,9 @@ _GLOBAL(_outsw)
subi r4,r4,2
blelr-
00: lhzu r5,2(r4)
eieio
sthbrx r5,0,r3
bdnz 00b
sync
blr
_GLOBAL(_insl)
......@@ -360,6 +364,8 @@ _GLOBAL(_insl)
eieio
stwu r5,4(r4)
bdnz 00b
twi 0,r5,0
isync
blr
_GLOBAL(_outsl)
......@@ -369,8 +375,8 @@ _GLOBAL(_outsl)
blelr-
00: lwzu r5,4(r4)
stwbrx r5,0,r3
eieio
bdnz 00b
sync
blr
/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */
......@@ -383,6 +389,8 @@ _GLOBAL(_insw_ns)
eieio
sthu r5,2(r4)
bdnz 00b
twi 0,r5,0
isync
blr
/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */
......@@ -393,8 +401,8 @@ _GLOBAL(_outsw_ns)
blelr-
00: lhzu r5,2(r4)
sth r5,0(r3)
eieio
bdnz 00b
sync
blr
_GLOBAL(_insl_ns)
......@@ -406,6 +414,8 @@ _GLOBAL(_insl_ns)
eieio
stwu r5,4(r4)
bdnz 00b
twi 0,r5,0
isync
blr
_GLOBAL(_outsl_ns)
......@@ -415,8 +425,8 @@ _GLOBAL(_outsl_ns)
blelr-
00: lwzu r5,4(r4)
stw r5,0(r3)
eieio
bdnz 00b
sync
blr
_GLOBAL(abs)
......
......@@ -240,14 +240,14 @@ static inline int in_8(volatile unsigned char *addr)
{
int ret;
__asm__ __volatile__("eieio; lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
__asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr));
return ret;
}
static inline void out_8(volatile unsigned char *addr, int val)
{
__asm__ __volatile__("sync; stb%U0%X0 %1,%0"
__asm__ __volatile__("stb%U0%X0 %1,%0; sync"
: "=m" (*addr) : "r" (val));
}
......@@ -255,7 +255,7 @@ static inline int in_le16(volatile unsigned short *addr)
{
int ret;
__asm__ __volatile__("eieio; lhbrx %0,0,%1; twi 0,%0,0; isync"
__asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "r" (addr), "m" (*addr));
return ret;
}
......@@ -264,20 +264,20 @@ static inline int in_be16(volatile unsigned short *addr)
{
int ret;
__asm__ __volatile__("eieio; lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
__asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr));
return ret;
}
static inline void out_le16(volatile unsigned short *addr, int val)
{
__asm__ __volatile__("sync; sthbrx %1,0,%2"
__asm__ __volatile__("sthbrx %1,0,%2; sync"
: "=m" (*addr) : "r" (val), "r" (addr));
}
static inline void out_be16(volatile unsigned short *addr, int val)
{
__asm__ __volatile__("sync; sth%U0%X0 %1,%0"
__asm__ __volatile__("sth%U0%X0 %1,%0; sync"
: "=m" (*addr) : "r" (val));
}
......@@ -285,7 +285,7 @@ static inline unsigned in_le32(volatile unsigned *addr)
{
unsigned ret;
__asm__ __volatile__("eieio; lwbrx %0,0,%1; twi 0,%0,0; isync"
__asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "r" (addr), "m" (*addr));
return ret;
}
......@@ -294,20 +294,20 @@ static inline unsigned in_be32(volatile unsigned *addr)
{
unsigned ret;
__asm__ __volatile__("eieio; lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
__asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr));
return ret;
}
static inline void out_le32(volatile unsigned *addr, int val)
{
__asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr)
__asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr)
: "r" (val), "r" (addr));
}
static inline void out_be32(volatile unsigned *addr, int val)
{
__asm__ __volatile__("sync; stw%U0%X0 %1,%0; eieio"
__asm__ __volatile__("stw%U0%X0 %1,%0; eieio"
: "=m" (*addr) : "r" (val));
}
......@@ -316,7 +316,7 @@ static inline unsigned long in_le64(volatile unsigned long *addr)
unsigned long tmp, ret;
__asm__ __volatile__(
"eieio; ld %1,0(%2)\n"
"ld %1,0(%2)\n"
"twi 0,%1,0\n"
"isync\n"
"rldimi %0,%1,5*8,1*8\n"
......@@ -334,7 +334,7 @@ static inline unsigned long in_be64(volatile unsigned long *addr)
{
unsigned long ret;
__asm__ __volatile__("eieio; ld %0,0(%1); twi 0,%0,0; isync"
__asm__ __volatile__("ld %0,0(%1); twi 0,%0,0; isync"
: "=r" (ret) : "m" (*addr));
return ret;
}
......@@ -351,13 +351,14 @@ static inline void out_le64(volatile unsigned long *addr, int val)
"rldicl %1,%1,32,0\n"
"rlwimi %0,%1,8,8,31\n"
"rlwimi %0,%1,24,16,23\n"
"sync; std %0,0(%2)\n"
"std %0,0(%2)\n"
"sync"
: "=r" (tmp) : "r" (val), "b" (addr) , "m" (*addr));
}
static inline void out_be64(volatile unsigned long *addr, int val)
{
__asm__ __volatile__("sync; std %1,0(%0)" : "=m" (*addr) : "r" (val));
__asm__ __volatile__("std %1,0(%0); sync" : "=m" (*addr) : "r" (val));
}
#ifndef CONFIG_PPC_ISERIES
......
......@@ -28,7 +28,7 @@ typedef struct {
static __inline__ void _raw_spin_unlock(spinlock_t *lock)
{
__asm__ __volatile__("eieio # spin_unlock": : :"memory");
__asm__ __volatile__("lwsync # spin_unlock": : :"memory");
lock->lock = 0;
}
......@@ -159,7 +159,7 @@ static __inline__ int is_write_locked(rwlock_t *rw)
static __inline__ void _raw_write_unlock(rwlock_t *rw)
{
__asm__ __volatile__("eieio # write_unlock": : :"memory");
__asm__ __volatile__("lwsync # write_unlock": : :"memory");
rw->lock = 0;
}
......@@ -223,7 +223,7 @@ static __inline__ void _raw_read_unlock(rwlock_t *rw)
unsigned int tmp;
__asm__ __volatile__(
"eieio # read_unlock\n\
"lwsync # read_unlock\n\
1: lwarx %0,0,%1\n\
addic %0,%0,-1\n\
stwcx. %0,0,%1\n\
......
......@@ -29,22 +29,26 @@
* read_barrier_depends() prevents data-dependent loads being reordered
* across this point (nop on PPC).
*
* We can use the eieio instruction for wmb, but since it doesn't
* give any ordering guarantees about loads, we have to use the
* stronger but slower sync instruction for mb and rmb.
* We have to use the sync instructions for mb(), since lwsync doesn't
* order loads with respect to previous stores. Lwsync is fine for
* rmb(), though.
* For wmb(), we use sync since wmb is used in drivers to order
* stores to system memory with respect to writes to the device.
* However, smp_wmb() can be a lighter-weight eieio barrier on
* SMP since it is only used to order updates to system memory.
*/
#define mb() __asm__ __volatile__ ("sync" : : : "memory")
#define rmb() __asm__ __volatile__ ("lwsync" : : : "memory")
#define wmb() __asm__ __volatile__ ("eieio" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
#define read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; mb(); } while (0)
#define set_wmb(var, value) do { var = value; wmb(); } while (0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define set_wmb(var, value) do { var = value; smp_wmb(); } while (0)
#ifdef CONFIG_SMP
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() wmb()
#define smp_wmb() __asm__ __volatile__ ("eieio" : : : "memory")
#define smp_read_barrier_depends() read_barrier_depends()
#else
#define smp_mb() __asm__ __volatile__("": : :"memory")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment