Commit 46675716 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: add 64-bit atomics

This is factored out part of:
https://golang.org/cl/5279048/
(Parallel GC)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5985047
parent a28a10e1
......@@ -2,3 +2,7 @@ enum {
thechar = '8',
CacheLineSize = 64
};
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
......@@ -2,3 +2,7 @@ enum {
thechar = '6',
CacheLineSize = 64
};
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
......@@ -2,3 +2,5 @@ enum {
thechar = '5',
CacheLineSize = 32
};
#define PREFETCH(addr) USED(addr)
......@@ -299,6 +299,33 @@ TEXT runtime·cas(SB), 7, $0
MOVL $1, AX
RET
// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
// Atomically:
// if(*val == *old){
// *val = new;
// return 1;
// } else {
// *old = *val
// return 0;
// }
TEXT runtime·cas64(SB), 7, $0
MOVL 4(SP), BP
MOVL 8(SP), SI
MOVL 0(SI), AX
MOVL 4(SI), DX
MOVL 12(SP), BX
MOVL 16(SP), CX
LOCK
CMPXCHG8B 0(BP)
JNZ cas64_fail
MOVL $1, AX
RET
cas64_fail:
MOVL AX, 0(SI)
MOVL DX, 4(SI)
XORL AX, AX
RET
// bool casp(void **p, void *old, void *new)
// Atomically:
// if(*p == old){
......@@ -357,6 +384,43 @@ TEXT runtime·atomicstore(SB), 7, $0
XCHGL AX, 0(BX)
RET
// uint64 atomicload64(uint64 volatile* addr);
// so actually
// void atomicload64(uint64 *res, uint64 volatile *addr);
TEXT runtime·atomicload64(SB), 7, $0
MOVL 4(SP), BX
MOVL 8(SP), AX
// MOVQ (%EAX), %MM0
BYTE $0x0f; BYTE $0x6f; BYTE $0x00
// MOVQ %MM0, 0(%EBX)
BYTE $0x0f; BYTE $0x7f; BYTE $0x03
// EMMS
BYTE $0x0F; BYTE $0x77
RET
// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
TEXT runtime·atomicstore64(SB), 7, $0
MOVL 4(SP), AX
// MOVQ and EMMS were introduced on the Pentium MMX.
// MOVQ 0x8(%ESP), %MM0
BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
// MOVQ %MM0, (%EAX)
BYTE $0x0f; BYTE $0x7f; BYTE $0x00
// EMMS
BYTE $0x0F; BYTE $0x77
// This is essentially a no-op, but it provides required memory fencing.
// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
MOVL $0, AX
LOCK
XADDL AX, (SP)
RET
TEXT runtime·prefetch(SB), 7, $0
MOVL 4(SP), AX
// PREFETCHNTA (AX)
BYTE $0x0f; BYTE $0x18; BYTE $0x00
RET
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
......
......@@ -344,6 +344,30 @@ TEXT runtime·cas(SB), 7, $0
MOVL $1, AX
RET
// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
// Atomically:
// if(*val == *old){
// *val = new;
// return 1;
// } else {
// *old = *val
// return 0;
// }
TEXT runtime·cas64(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), BP
MOVQ 0(BP), AX
MOVQ 24(SP), CX
LOCK
CMPXCHGQ CX, 0(BX)
JNZ cas64_fail
MOVL $1, AX
RET
cas64_fail:
MOVQ AX, 0(BP)
MOVL $0, AX
RET
// bool casp(void **val, void *old, void *new)
// Atomically:
// if(*val == old){
......@@ -376,6 +400,15 @@ TEXT runtime·xadd(SB), 7, $0
ADDL CX, AX
RET
TEXT runtime·xadd64(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), AX
MOVQ AX, CX
LOCK
XADDQ AX, 0(BX)
ADDQ CX, AX
RET
TEXT runtime·xchg(SB), 7, $0
MOVQ 8(SP), BX
MOVL 16(SP), AX
......@@ -402,6 +435,18 @@ TEXT runtime·atomicstore(SB), 7, $0
XCHGL AX, 0(BX)
RET
TEXT runtime·atomicstore64(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), AX
XCHGQ AX, 0(BX)
RET
TEXT runtime·prefetch(SB), 7, $0
MOVQ 8(SP), AX
// PREFETCHNTA (AX)
BYTE $0x0f; BYTE $0x18; BYTE $0x00
RET
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
......
......@@ -17,3 +17,16 @@ runtime·atomicloadp(void* volatile* addr)
{
return *addr;
}
#pragma textflag 7
uint64
runtime·xadd64(uint64 volatile* addr, int64 v)
{
uint64 old;
old = *addr;
while(!runtime·cas64(addr, &old, old+v)) {
// nothing
}
return old+v;
}
......@@ -11,6 +11,13 @@ runtime·atomicload(uint32 volatile* addr)
return *addr;
}
#pragma textflag 7
uint64
runtime·atomicload64(uint64 volatile* addr)
{
return *addr;
}
#pragma textflag 7
void*
runtime·atomicloadp(void* volatile* addr)
......
......@@ -3,6 +3,14 @@
// license that can be found in the LICENSE file.
#include "runtime.h"
#include "arch_GOARCH.h"
static union {
Lock l;
byte pad [CacheLineSize];
} locktab[57];
#define LOCK(addr) (&locktab[((uintptr)(addr)>>3)%nelem(locktab)].l)
// Atomic add and return new value.
#pragma textflag 7
......@@ -81,3 +89,55 @@ runtime·atomicstore(uint32 volatile* addr, uint32 v)
return;
}
}
#pragma textflag 7
bool
runtime·cas64(uint64 volatile *addr, uint64 *old, uint64 new)
{
bool res;
runtime·lock(LOCK(addr));
if(*addr == *old) {
*addr = new;
res = true;
} else {
*old = *addr;
res = false;
}
runtime·unlock(LOCK(addr));
return res;
}
#pragma textflag 7
uint64
runtime·xadd64(uint64 volatile *addr, int64 delta)
{
uint64 res;
runtime·lock(LOCK(addr));
res = *addr + delta;
*addr = res;
runtime·unlock(LOCK(addr));
return res;
}
#pragma textflag 7
uint64
runtime·atomicload64(uint64 volatile *addr)
{
uint64 res;
runtime·lock(LOCK(addr));
res = *addr;
runtime·unlock(LOCK(addr));
return res;
}
#pragma textflag 7
void
runtime·atomicstore64(uint64 volatile *addr, uint64 v)
{
runtime·lock(LOCK(addr));
*addr = v;
runtime·unlock(LOCK(addr));
}
......@@ -4,6 +4,7 @@
#include "runtime.h"
#include "stack.h"
#include "arch_GOARCH.h"
enum {
maxround = sizeof(uintptr),
......@@ -267,6 +268,33 @@ runtime·atoi(byte *p)
return n;
}
static void
TestAtomic64(void)
{
uint64 z64, x64;
z64 = 42;
x64 = 0;
PREFETCH(&z64);
if(runtime·cas64(&z64, &x64, 1))
runtime·throw("cas64 failed");
if(x64 != 42)
runtime·throw("cas64 failed");
if(!runtime·cas64(&z64, &x64, 1))
runtime·throw("cas64 failed");
if(x64 != 42 || z64 != 1)
runtime·throw("cas64 failed");
if(runtime·atomicload64(&z64) != 1)
runtime·throw("load64 failed");
runtime·atomicstore64(&z64, (1ull<<40)+1);
if(runtime·atomicload64(&z64) != (1ull<<40)+1)
runtime·throw("store64 failed");
if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
runtime·throw("xadd64 failed");
if(runtime·atomicload64(&z64) != (2ull<<40)+2)
runtime·throw("xadd64 failed");
}
void
runtime·check(void)
{
......@@ -342,6 +370,8 @@ runtime·check(void)
runtime·throw("float32nan2");
if(!(i != i1))
runtime·throw("float32nan3");
TestAtomic64();
}
void
......
......@@ -512,13 +512,17 @@ void runtime·tracebackothers(G*);
int32 runtime·write(int32, void*, int32);
int32 runtime·mincore(void*, uintptr, byte*);
bool runtime·cas(uint32*, uint32, uint32);
bool runtime·cas64(uint64*, uint64*, uint64);
bool runtime·casp(void**, void*, void*);
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.
uint32 runtime·xadd(uint32 volatile*, int32);
uint64 runtime·xadd64(uint64 volatile*, int64);
uint32 runtime·xchg(uint32 volatile*, uint32);
uint32 runtime·atomicload(uint32 volatile*);
void runtime·atomicstore(uint32 volatile*, uint32);
void runtime·atomicstore64(uint64 volatile*, uint64);
uint64 runtime·atomicload64(uint64 volatile*);
void* runtime·atomicloadp(void* volatile*);
void runtime·atomicstorep(void* volatile*, void*);
void runtime·jmpdefer(byte*, void*);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment