Commit 46675716 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: add 64-bit atomics

This is factored out part of:
https://golang.org/cl/5279048/
(Parallel GC)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5985047
parent a28a10e1
...@@ -2,3 +2,7 @@ enum { ...@@ -2,3 +2,7 @@ enum {
thechar = '8', thechar = '8',
CacheLineSize = 64 CacheLineSize = 64
}; };
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
...@@ -2,3 +2,7 @@ enum { ...@@ -2,3 +2,7 @@ enum {
thechar = '6', thechar = '6',
CacheLineSize = 64 CacheLineSize = 64
}; };
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
...@@ -2,3 +2,5 @@ enum { ...@@ -2,3 +2,5 @@ enum {
thechar = '5', thechar = '5',
CacheLineSize = 32 CacheLineSize = 32
}; };
#define PREFETCH(addr) USED(addr)
...@@ -299,6 +299,33 @@ TEXT runtime·cas(SB), 7, $0 ...@@ -299,6 +299,33 @@ TEXT runtime·cas(SB), 7, $0
MOVL $1, AX MOVL $1, AX
RET RET
// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
// Atomically:
// if(*val == *old){
// *val = new;
// return 1;
// } else {
// *old = *val
// return 0;
// }
TEXT runtime·cas64(SB), 7, $0
MOVL 4(SP), BP
MOVL 8(SP), SI
MOVL 0(SI), AX
MOVL 4(SI), DX
MOVL 12(SP), BX
MOVL 16(SP), CX
LOCK
CMPXCHG8B 0(BP)
JNZ cas64_fail
MOVL $1, AX
RET
cas64_fail:
MOVL AX, 0(SI)
MOVL DX, 4(SI)
XORL AX, AX
RET
// bool casp(void **p, void *old, void *new) // bool casp(void **p, void *old, void *new)
// Atomically: // Atomically:
// if(*p == old){ // if(*p == old){
...@@ -357,6 +384,43 @@ TEXT runtime·atomicstore(SB), 7, $0 ...@@ -357,6 +384,43 @@ TEXT runtime·atomicstore(SB), 7, $0
XCHGL AX, 0(BX) XCHGL AX, 0(BX)
RET RET
// uint64 atomicload64(uint64 volatile* addr);
// so actually
// void atomicload64(uint64 *res, uint64 volatile *addr);
TEXT runtime·atomicload64(SB), 7, $0
MOVL 4(SP), BX
MOVL 8(SP), AX
// MOVQ (%EAX), %MM0
BYTE $0x0f; BYTE $0x6f; BYTE $0x00
// MOVQ %MM0, 0(%EBX)
BYTE $0x0f; BYTE $0x7f; BYTE $0x03
// EMMS
BYTE $0x0F; BYTE $0x77
RET
// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
TEXT runtime·atomicstore64(SB), 7, $0
MOVL 4(SP), AX
// MOVQ and EMMS were introduced on the Pentium MMX.
// MOVQ 0x8(%ESP), %MM0
BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
// MOVQ %MM0, (%EAX)
BYTE $0x0f; BYTE $0x7f; BYTE $0x00
// EMMS
BYTE $0x0F; BYTE $0x77
// This is essentially a no-op, but it provides required memory fencing.
// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
MOVL $0, AX
LOCK
XADDL AX, (SP)
RET
TEXT runtime·prefetch(SB), 7, $0
MOVL 4(SP), AX
// PREFETCHNTA (AX)
BYTE $0x0f; BYTE $0x18; BYTE $0x00
RET
// void jmpdefer(fn, sp); // void jmpdefer(fn, sp);
// called from deferreturn. // called from deferreturn.
// 1. pop the caller // 1. pop the caller
......
...@@ -344,6 +344,30 @@ TEXT runtime·cas(SB), 7, $0 ...@@ -344,6 +344,30 @@ TEXT runtime·cas(SB), 7, $0
MOVL $1, AX MOVL $1, AX
RET RET
// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
// Atomically:
// if(*val == *old){
// *val = new;
// return 1;
// } else {
// *old = *val
// return 0;
// }
TEXT runtime·cas64(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), BP
MOVQ 0(BP), AX
MOVQ 24(SP), CX
LOCK
CMPXCHGQ CX, 0(BX)
JNZ cas64_fail
MOVL $1, AX
RET
cas64_fail:
MOVQ AX, 0(BP)
MOVL $0, AX
RET
// bool casp(void **val, void *old, void *new) // bool casp(void **val, void *old, void *new)
// Atomically: // Atomically:
// if(*val == old){ // if(*val == old){
...@@ -376,6 +400,15 @@ TEXT runtime·xadd(SB), 7, $0 ...@@ -376,6 +400,15 @@ TEXT runtime·xadd(SB), 7, $0
ADDL CX, AX ADDL CX, AX
RET RET
TEXT runtime·xadd64(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), AX
MOVQ AX, CX
LOCK
XADDQ AX, 0(BX)
ADDQ CX, AX
RET
TEXT runtime·xchg(SB), 7, $0 TEXT runtime·xchg(SB), 7, $0
MOVQ 8(SP), BX MOVQ 8(SP), BX
MOVL 16(SP), AX MOVL 16(SP), AX
...@@ -402,6 +435,18 @@ TEXT runtime·atomicstore(SB), 7, $0 ...@@ -402,6 +435,18 @@ TEXT runtime·atomicstore(SB), 7, $0
XCHGL AX, 0(BX) XCHGL AX, 0(BX)
RET RET
TEXT runtime·atomicstore64(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), AX
XCHGQ AX, 0(BX)
RET
TEXT runtime·prefetch(SB), 7, $0
MOVQ 8(SP), AX
// PREFETCHNTA (AX)
BYTE $0x0f; BYTE $0x18; BYTE $0x00
RET
// void jmpdefer(fn, sp); // void jmpdefer(fn, sp);
// called from deferreturn. // called from deferreturn.
// 1. pop the caller // 1. pop the caller
......
...@@ -17,3 +17,16 @@ runtime·atomicloadp(void* volatile* addr) ...@@ -17,3 +17,16 @@ runtime·atomicloadp(void* volatile* addr)
{ {
return *addr; return *addr;
} }
#pragma textflag 7
uint64
runtime·xadd64(uint64 volatile* addr, int64 v)
{
uint64 old;
old = *addr;
while(!runtime·cas64(addr, &old, old+v)) {
// nothing
}
return old+v;
}
...@@ -11,6 +11,13 @@ runtime·atomicload(uint32 volatile* addr) ...@@ -11,6 +11,13 @@ runtime·atomicload(uint32 volatile* addr)
return *addr; return *addr;
} }
#pragma textflag 7
uint64
runtime·atomicload64(uint64 volatile* addr)
{
return *addr;
}
#pragma textflag 7 #pragma textflag 7
void* void*
runtime·atomicloadp(void* volatile* addr) runtime·atomicloadp(void* volatile* addr)
......
...@@ -3,6 +3,14 @@ ...@@ -3,6 +3,14 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
#include "runtime.h" #include "runtime.h"
#include "arch_GOARCH.h"
static union {
Lock l;
byte pad [CacheLineSize];
} locktab[57];
#define LOCK(addr) (&locktab[((uintptr)(addr)>>3)%nelem(locktab)].l)
// Atomic add and return new value. // Atomic add and return new value.
#pragma textflag 7 #pragma textflag 7
...@@ -80,4 +88,56 @@ runtime·atomicstore(uint32 volatile* addr, uint32 v) ...@@ -80,4 +88,56 @@ runtime·atomicstore(uint32 volatile* addr, uint32 v)
if(runtime·cas(addr, old, v)) if(runtime·cas(addr, old, v))
return; return;
} }
} }
\ No newline at end of file
#pragma textflag 7
bool
runtime·cas64(uint64 volatile *addr, uint64 *old, uint64 new)
{
bool res;
runtime·lock(LOCK(addr));
if(*addr == *old) {
*addr = new;
res = true;
} else {
*old = *addr;
res = false;
}
runtime·unlock(LOCK(addr));
return res;
}
#pragma textflag 7
uint64
runtime·xadd64(uint64 volatile *addr, int64 delta)
{
uint64 res;
runtime·lock(LOCK(addr));
res = *addr + delta;
*addr = res;
runtime·unlock(LOCK(addr));
return res;
}
#pragma textflag 7
uint64
runtime·atomicload64(uint64 volatile *addr)
{
uint64 res;
runtime·lock(LOCK(addr));
res = *addr;
runtime·unlock(LOCK(addr));
return res;
}
#pragma textflag 7
void
runtime·atomicstore64(uint64 volatile *addr, uint64 v)
{
runtime·lock(LOCK(addr));
*addr = v;
runtime·unlock(LOCK(addr));
}
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "runtime.h" #include "runtime.h"
#include "stack.h" #include "stack.h"
#include "arch_GOARCH.h"
enum { enum {
maxround = sizeof(uintptr), maxround = sizeof(uintptr),
...@@ -267,6 +268,33 @@ runtime·atoi(byte *p) ...@@ -267,6 +268,33 @@ runtime·atoi(byte *p)
return n; return n;
} }
static void
TestAtomic64(void)
{
uint64 z64, x64;
z64 = 42;
x64 = 0;
PREFETCH(&z64);
if(runtime·cas64(&z64, &x64, 1))
runtime·throw("cas64 failed");
if(x64 != 42)
runtime·throw("cas64 failed");
if(!runtime·cas64(&z64, &x64, 1))
runtime·throw("cas64 failed");
if(x64 != 42 || z64 != 1)
runtime·throw("cas64 failed");
if(runtime·atomicload64(&z64) != 1)
runtime·throw("load64 failed");
runtime·atomicstore64(&z64, (1ull<<40)+1);
if(runtime·atomicload64(&z64) != (1ull<<40)+1)
runtime·throw("store64 failed");
if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
runtime·throw("xadd64 failed");
if(runtime·atomicload64(&z64) != (2ull<<40)+2)
runtime·throw("xadd64 failed");
}
void void
runtime·check(void) runtime·check(void)
{ {
...@@ -342,6 +370,8 @@ runtime·check(void) ...@@ -342,6 +370,8 @@ runtime·check(void)
runtime·throw("float32nan2"); runtime·throw("float32nan2");
if(!(i != i1)) if(!(i != i1))
runtime·throw("float32nan3"); runtime·throw("float32nan3");
TestAtomic64();
} }
void void
......
...@@ -512,13 +512,17 @@ void runtime·tracebackothers(G*); ...@@ -512,13 +512,17 @@ void runtime·tracebackothers(G*);
int32 runtime·write(int32, void*, int32); int32 runtime·write(int32, void*, int32);
int32 runtime·mincore(void*, uintptr, byte*); int32 runtime·mincore(void*, uintptr, byte*);
bool runtime·cas(uint32*, uint32, uint32); bool runtime·cas(uint32*, uint32, uint32);
bool runtime·cas64(uint64*, uint64*, uint64);
bool runtime·casp(void**, void*, void*); bool runtime·casp(void**, void*, void*);
// Don't confuse with XADD x86 instruction, // Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch. // this one is actually 'addx', that is, add-and-fetch.
uint32 runtime·xadd(uint32 volatile*, int32); uint32 runtime·xadd(uint32 volatile*, int32);
uint64 runtime·xadd64(uint64 volatile*, int64);
uint32 runtime·xchg(uint32 volatile*, uint32); uint32 runtime·xchg(uint32 volatile*, uint32);
uint32 runtime·atomicload(uint32 volatile*); uint32 runtime·atomicload(uint32 volatile*);
void runtime·atomicstore(uint32 volatile*, uint32); void runtime·atomicstore(uint32 volatile*, uint32);
void runtime·atomicstore64(uint64 volatile*, uint64);
uint64 runtime·atomicload64(uint64 volatile*);
void* runtime·atomicloadp(void* volatile*); void* runtime·atomicloadp(void* volatile*);
void runtime·atomicstorep(void* volatile*, void*); void runtime·atomicstorep(void* volatile*, void*);
void runtime·jmpdefer(byte*, void*); void runtime·jmpdefer(byte*, void*);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment