Commit d42495aa authored by Russ Cox's avatar Russ Cox

cmd/cc: add PREFETCH built-in (like SET, USED)

This makes it possible to inline the prefetch of upcoming
memory addresses during garbage collection, instead of
needing to flush registers, make a function call, and
reload registers.  On garbage collection-heavy workloads,
this results in a 5% speedup.

Fixes #3493.

R=dvyukov, ken, r, dave
CC=golang-dev
https://golang.org/cl/5990066
parent 10838165
......@@ -297,6 +297,7 @@ void patch(Prog*, int32);
int sconst(Node*);
int sval(int32);
void gpseudo(int, Sym*, Node*);
void gprefetch(Node*);
/*
* swt.c
......
......@@ -1194,6 +1194,12 @@ gpseudo(int a, Sym *s, Node *n)
pc--;
}
void
gprefetch(Node *n)
{
// nothing
}
int
sconst(Node *n)
{
......
......@@ -292,6 +292,7 @@ void gbranch(int);
void patch(Prog*, int32);
int sconst(Node*);
void gpseudo(int, Sym*, Node*);
void gprefetch(Node*);
/*
* swt.c
......
......@@ -697,6 +697,11 @@ copyu(Prog *p, Adr *v, Adr *s)
case ACMPB:
case ACMPQ:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
case ACOMISD:
case ACOMISS:
case AUCOMISD:
......
......@@ -185,6 +185,10 @@ regopt(Prog *p)
case ACMPL:
case ACMPQ:
case ACMPW:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
case ACOMISS:
case ACOMISD:
case AUCOMISS:
......
......@@ -1502,6 +1502,18 @@ gpseudo(int a, Sym *s, Node *n)
pc--;
}
void
gprefetch(Node *n)
{
Node n1;
regalloc(&n1, n, Z);
gmove(n, &n1);
n1.op = OINDREG;
gins(APREFETCHNTA, &n1, Z);
regfree(&n1);
}
int
sconst(Node *n)
{
......
......@@ -297,6 +297,7 @@ void gbranch(int);
void patch(Prog*, int32);
int sconst(Node*);
void gpseudo(int, Sym*, Node*);
void gprefetch(Node*);
/*
* swt.c
......
......@@ -602,6 +602,12 @@ copyu(Prog *p, Adr *v, Adr *s)
case ACMPL: /* read only */
case ACMPW:
case ACMPB:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
case AFCOMB:
case AFCOMBP:
......
......@@ -182,6 +182,10 @@ regopt(Prog *p)
case ACMPB:
case ACMPL:
case ACMPW:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
break;
......
......@@ -1383,6 +1383,18 @@ gpseudo(int a, Sym *s, Node *n)
pc--;
}
void
gprefetch(Node *n)
{
Node n1;
regalloc(&n1, n, Z);
gmove(n, &n1);
n1.op = OINDREG;
gins(APREFETCHNTA, &n1, Z);
regfree(&n1);
}
int
sconst(Node *n)
{
......
......@@ -7,4 +7,4 @@ include ../../Make.dist
install: y.tab.h
y.tab.h: cc.y
LANG=C LANGUAGE=en_US.UTF8 bison -d -v -y a.y
LANG=C LANGUAGE=en_US.UTF8 bison -d -v -y cc.y
......@@ -300,6 +300,7 @@ enum
OPOSTINC,
OPREDEC,
OPREINC,
OPREFETCH,
OPROTO,
OREGISTER,
ORETURN,
......
......@@ -93,7 +93,7 @@
%token <sval> LSTRING LLSTRING
%token LAUTO LBREAK LCASE LCHAR LCONTINUE LDEFAULT LDO
%token LDOUBLE LELSE LEXTERN LFLOAT LFOR LGOTO
%token LIF LINT LLONG LREGISTER LRETURN LSHORT LSIZEOF LUSED
%token LIF LINT LLONG LPREFETCH LREGISTER LRETURN LSHORT LSIZEOF LUSED
%token LSTATIC LSTRUCT LSWITCH LTYPEDEF LTYPESTR LUNION LUNSIGNED
%token LWHILE LVOID LENUM LSIGNED LCONSTNT LVOLATILE LSET LSIGNOF
%token LRESTRICT LINLINE
......@@ -535,6 +535,10 @@ ulstmnt:
{
$$ = new(OUSED, $3, Z);
}
| LPREFETCH '(' zelist ')' ';'
{
$$ = new(OPREFETCH, $3, Z);
}
| LSET '(' zelist ')' ';'
{
$$ = new(OSET, $3, Z);
......
......@@ -1174,6 +1174,7 @@ struct
"inline", LINLINE, 0,
"int", LINT, TINT,
"long", LLONG, TLONG,
"PREFETCH", LPREFETCH, 0,
"register", LREGISTER, 0,
"restrict", LRESTRICT, 0,
"return", LRETURN, 0,
......
......@@ -528,6 +528,7 @@ loop:
case OSET:
case OUSED:
case OPREFETCH:
usedset(n->left, o);
break;
}
......@@ -542,6 +543,10 @@ usedset(Node *n, int o)
return;
}
complex(n);
if(o == OPREFETCH) {
gprefetch(n);
return;
}
switch(n->op) {
case OADDR: /* volatile */
gins(ANOP, n, Z);
......
......@@ -1497,6 +1497,7 @@ Init onamesinit[] =
OPOSTINC, 0, "POSTINC",
OPREDEC, 0, "PREDEC",
OPREINC, 0, "PREINC",
OPREFETCH, 0, "PREFETCH",
OPROTO, 0, "PROTO",
OREGISTER, 0, "REGISTER",
ORETURN, 0, "RETURN",
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -88,28 +88,29 @@
LIF = 304,
LINT = 305,
LLONG = 306,
LREGISTER = 307,
LRETURN = 308,
LSHORT = 309,
LSIZEOF = 310,
LUSED = 311,
LSTATIC = 312,
LSTRUCT = 313,
LSWITCH = 314,
LTYPEDEF = 315,
LTYPESTR = 316,
LUNION = 317,
LUNSIGNED = 318,
LWHILE = 319,
LVOID = 320,
LENUM = 321,
LSIGNED = 322,
LCONSTNT = 323,
LVOLATILE = 324,
LSET = 325,
LSIGNOF = 326,
LRESTRICT = 327,
LINLINE = 328
LPREFETCH = 307,
LREGISTER = 308,
LRETURN = 309,
LSHORT = 310,
LSIZEOF = 311,
LUSED = 312,
LSTATIC = 313,
LSTRUCT = 314,
LSWITCH = 315,
LTYPEDEF = 316,
LTYPESTR = 317,
LUNION = 318,
LUNSIGNED = 319,
LWHILE = 320,
LVOID = 321,
LENUM = 322,
LSIGNED = 323,
LCONSTNT = 324,
LVOLATILE = 325,
LSET = 326,
LSIGNOF = 327,
LRESTRICT = 328,
LINLINE = 329
};
#endif
/* Tokens. */
......@@ -162,28 +163,29 @@
#define LIF 304
#define LINT 305
#define LLONG 306
#define LREGISTER 307
#define LRETURN 308
#define LSHORT 309
#define LSIZEOF 310
#define LUSED 311
#define LSTATIC 312
#define LSTRUCT 313
#define LSWITCH 314
#define LTYPEDEF 315
#define LTYPESTR 316
#define LUNION 317
#define LUNSIGNED 318
#define LWHILE 319
#define LVOID 320
#define LENUM 321
#define LSIGNED 322
#define LCONSTNT 323
#define LVOLATILE 324
#define LSET 325
#define LSIGNOF 326
#define LRESTRICT 327
#define LINLINE 328
#define LPREFETCH 307
#define LREGISTER 308
#define LRETURN 309
#define LSHORT 310
#define LSIZEOF 311
#define LUSED 312
#define LSTATIC 313
#define LSTRUCT 314
#define LSWITCH 315
#define LTYPEDEF 316
#define LTYPESTR 317
#define LUNION 318
#define LUNSIGNED 319
#define LWHILE 320
#define LVOID 321
#define LENUM 322
#define LSIGNED 323
#define LCONSTNT 324
#define LVOLATILE 325
#define LSET 326
#define LSIGNOF 327
#define LRESTRICT 328
#define LINLINE 329
......@@ -217,7 +219,7 @@ typedef union YYSTYPE
vlong vval;
}
/* Line 1529 of yacc.c. */
#line 221 "y.tab.h"
#line 223 "y.tab.h"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
......
......@@ -2,7 +2,3 @@ enum {
thechar = '8',
CacheLineSize = 64
};
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
......@@ -2,7 +2,3 @@ enum {
thechar = '6',
CacheLineSize = 64
};
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
......@@ -2,5 +2,3 @@ enum {
thechar = '5',
CacheLineSize = 32
};
#define PREFETCH(addr) USED(addr)
......@@ -415,11 +415,6 @@ TEXT runtime·atomicstore64(SB), 7, $0
XADDL AX, (SP)
RET
TEXT runtime·prefetch(SB), 7, $0
MOVL 4(SP), AX
PREFETCHNTA (AX)
RET
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
......
......@@ -441,11 +441,6 @@ TEXT runtime·atomicstore64(SB), 7, $0
XCHGQ AX, 0(BX)
RET
TEXT runtime·prefetch(SB), 7, $0
MOVQ 8(SP), AX
PREFETCHNTA (AX)
RET
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment