Commit d42495aa authored by Russ Cox's avatar Russ Cox

cmd/cc: add PREFETCH built-in (like SET, USED)

This makes it possible to inline the prefetch of upcoming
memory addresses during garbage collection, instead of
needing to flush registers, make a function call, and
reload registers.  On garbage collection-heavy workloads,
this results in a 5% speedup.

Fixes #3493.

R=dvyukov, ken, r, dave
CC=golang-dev
https://golang.org/cl/5990066
parent 10838165
...@@ -297,6 +297,7 @@ void patch(Prog*, int32); ...@@ -297,6 +297,7 @@ void patch(Prog*, int32);
int sconst(Node*); int sconst(Node*);
int sval(int32); int sval(int32);
void gpseudo(int, Sym*, Node*); void gpseudo(int, Sym*, Node*);
void gprefetch(Node*);
/* /*
* swt.c * swt.c
......
...@@ -1194,6 +1194,12 @@ gpseudo(int a, Sym *s, Node *n) ...@@ -1194,6 +1194,12 @@ gpseudo(int a, Sym *s, Node *n)
pc--; pc--;
} }
void
gprefetch(Node *n)
{
// nothing
}
int int
sconst(Node *n) sconst(Node *n)
{ {
......
...@@ -292,6 +292,7 @@ void gbranch(int); ...@@ -292,6 +292,7 @@ void gbranch(int);
void patch(Prog*, int32); void patch(Prog*, int32);
int sconst(Node*); int sconst(Node*);
void gpseudo(int, Sym*, Node*); void gpseudo(int, Sym*, Node*);
void gprefetch(Node*);
/* /*
* swt.c * swt.c
......
...@@ -697,6 +697,11 @@ copyu(Prog *p, Adr *v, Adr *s) ...@@ -697,6 +697,11 @@ copyu(Prog *p, Adr *v, Adr *s)
case ACMPB: case ACMPB:
case ACMPQ: case ACMPQ:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
case ACOMISD: case ACOMISD:
case ACOMISS: case ACOMISS:
case AUCOMISD: case AUCOMISD:
......
...@@ -185,6 +185,10 @@ regopt(Prog *p) ...@@ -185,6 +185,10 @@ regopt(Prog *p)
case ACMPL: case ACMPL:
case ACMPQ: case ACMPQ:
case ACMPW: case ACMPW:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
case ACOMISS: case ACOMISS:
case ACOMISD: case ACOMISD:
case AUCOMISS: case AUCOMISS:
......
...@@ -1502,6 +1502,18 @@ gpseudo(int a, Sym *s, Node *n) ...@@ -1502,6 +1502,18 @@ gpseudo(int a, Sym *s, Node *n)
pc--; pc--;
} }
void
gprefetch(Node *n)
{
Node n1;
regalloc(&n1, n, Z);
gmove(n, &n1);
n1.op = OINDREG;
gins(APREFETCHNTA, &n1, Z);
regfree(&n1);
}
int int
sconst(Node *n) sconst(Node *n)
{ {
......
...@@ -297,6 +297,7 @@ void gbranch(int); ...@@ -297,6 +297,7 @@ void gbranch(int);
void patch(Prog*, int32); void patch(Prog*, int32);
int sconst(Node*); int sconst(Node*);
void gpseudo(int, Sym*, Node*); void gpseudo(int, Sym*, Node*);
void gprefetch(Node*);
/* /*
* swt.c * swt.c
......
...@@ -602,6 +602,12 @@ copyu(Prog *p, Adr *v, Adr *s) ...@@ -602,6 +602,12 @@ copyu(Prog *p, Adr *v, Adr *s)
case ACMPL: /* read only */ case ACMPL: /* read only */
case ACMPW: case ACMPW:
case ACMPB: case ACMPB:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
case AFCOMB: case AFCOMB:
case AFCOMBP: case AFCOMBP:
......
...@@ -182,6 +182,10 @@ regopt(Prog *p) ...@@ -182,6 +182,10 @@ regopt(Prog *p)
case ACMPB: case ACMPB:
case ACMPL: case ACMPL:
case ACMPW: case ACMPW:
case APREFETCHT0:
case APREFETCHT1:
case APREFETCHT2:
case APREFETCHNTA:
for(z=0; z<BITS; z++) for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z]; r->use2.b[z] |= bit.b[z];
break; break;
......
...@@ -1383,6 +1383,18 @@ gpseudo(int a, Sym *s, Node *n) ...@@ -1383,6 +1383,18 @@ gpseudo(int a, Sym *s, Node *n)
pc--; pc--;
} }
void
gprefetch(Node *n)
{
Node n1;
regalloc(&n1, n, Z);
gmove(n, &n1);
n1.op = OINDREG;
gins(APREFETCHNTA, &n1, Z);
regfree(&n1);
}
int int
sconst(Node *n) sconst(Node *n)
{ {
......
...@@ -7,4 +7,4 @@ include ../../Make.dist ...@@ -7,4 +7,4 @@ include ../../Make.dist
install: y.tab.h install: y.tab.h
y.tab.h: cc.y y.tab.h: cc.y
LANG=C LANGUAGE=en_US.UTF8 bison -d -v -y a.y LANG=C LANGUAGE=en_US.UTF8 bison -d -v -y cc.y
...@@ -300,6 +300,7 @@ enum ...@@ -300,6 +300,7 @@ enum
OPOSTINC, OPOSTINC,
OPREDEC, OPREDEC,
OPREINC, OPREINC,
OPREFETCH,
OPROTO, OPROTO,
OREGISTER, OREGISTER,
ORETURN, ORETURN,
......
...@@ -93,7 +93,7 @@ ...@@ -93,7 +93,7 @@
%token <sval> LSTRING LLSTRING %token <sval> LSTRING LLSTRING
%token LAUTO LBREAK LCASE LCHAR LCONTINUE LDEFAULT LDO %token LAUTO LBREAK LCASE LCHAR LCONTINUE LDEFAULT LDO
%token LDOUBLE LELSE LEXTERN LFLOAT LFOR LGOTO %token LDOUBLE LELSE LEXTERN LFLOAT LFOR LGOTO
%token LIF LINT LLONG LREGISTER LRETURN LSHORT LSIZEOF LUSED %token LIF LINT LLONG LPREFETCH LREGISTER LRETURN LSHORT LSIZEOF LUSED
%token LSTATIC LSTRUCT LSWITCH LTYPEDEF LTYPESTR LUNION LUNSIGNED %token LSTATIC LSTRUCT LSWITCH LTYPEDEF LTYPESTR LUNION LUNSIGNED
%token LWHILE LVOID LENUM LSIGNED LCONSTNT LVOLATILE LSET LSIGNOF %token LWHILE LVOID LENUM LSIGNED LCONSTNT LVOLATILE LSET LSIGNOF
%token LRESTRICT LINLINE %token LRESTRICT LINLINE
...@@ -535,6 +535,10 @@ ulstmnt: ...@@ -535,6 +535,10 @@ ulstmnt:
{ {
$$ = new(OUSED, $3, Z); $$ = new(OUSED, $3, Z);
} }
| LPREFETCH '(' zelist ')' ';'
{
$$ = new(OPREFETCH, $3, Z);
}
| LSET '(' zelist ')' ';' | LSET '(' zelist ')' ';'
{ {
$$ = new(OSET, $3, Z); $$ = new(OSET, $3, Z);
......
...@@ -1174,6 +1174,7 @@ struct ...@@ -1174,6 +1174,7 @@ struct
"inline", LINLINE, 0, "inline", LINLINE, 0,
"int", LINT, TINT, "int", LINT, TINT,
"long", LLONG, TLONG, "long", LLONG, TLONG,
"PREFETCH", LPREFETCH, 0,
"register", LREGISTER, 0, "register", LREGISTER, 0,
"restrict", LRESTRICT, 0, "restrict", LRESTRICT, 0,
"return", LRETURN, 0, "return", LRETURN, 0,
......
...@@ -528,6 +528,7 @@ loop: ...@@ -528,6 +528,7 @@ loop:
case OSET: case OSET:
case OUSED: case OUSED:
case OPREFETCH:
usedset(n->left, o); usedset(n->left, o);
break; break;
} }
...@@ -542,6 +543,10 @@ usedset(Node *n, int o) ...@@ -542,6 +543,10 @@ usedset(Node *n, int o)
return; return;
} }
complex(n); complex(n);
if(o == OPREFETCH) {
gprefetch(n);
return;
}
switch(n->op) { switch(n->op) {
case OADDR: /* volatile */ case OADDR: /* volatile */
gins(ANOP, n, Z); gins(ANOP, n, Z);
......
...@@ -1497,6 +1497,7 @@ Init onamesinit[] = ...@@ -1497,6 +1497,7 @@ Init onamesinit[] =
OPOSTINC, 0, "POSTINC", OPOSTINC, 0, "POSTINC",
OPREDEC, 0, "PREDEC", OPREDEC, 0, "PREDEC",
OPREINC, 0, "PREINC", OPREINC, 0, "PREINC",
OPREFETCH, 0, "PREFETCH",
OPROTO, 0, "PROTO", OPROTO, 0, "PROTO",
OREGISTER, 0, "REGISTER", OREGISTER, 0, "REGISTER",
ORETURN, 0, "RETURN", ORETURN, 0, "RETURN",
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -88,28 +88,29 @@ ...@@ -88,28 +88,29 @@
LIF = 304, LIF = 304,
LINT = 305, LINT = 305,
LLONG = 306, LLONG = 306,
LREGISTER = 307, LPREFETCH = 307,
LRETURN = 308, LREGISTER = 308,
LSHORT = 309, LRETURN = 309,
LSIZEOF = 310, LSHORT = 310,
LUSED = 311, LSIZEOF = 311,
LSTATIC = 312, LUSED = 312,
LSTRUCT = 313, LSTATIC = 313,
LSWITCH = 314, LSTRUCT = 314,
LTYPEDEF = 315, LSWITCH = 315,
LTYPESTR = 316, LTYPEDEF = 316,
LUNION = 317, LTYPESTR = 317,
LUNSIGNED = 318, LUNION = 318,
LWHILE = 319, LUNSIGNED = 319,
LVOID = 320, LWHILE = 320,
LENUM = 321, LVOID = 321,
LSIGNED = 322, LENUM = 322,
LCONSTNT = 323, LSIGNED = 323,
LVOLATILE = 324, LCONSTNT = 324,
LSET = 325, LVOLATILE = 325,
LSIGNOF = 326, LSET = 326,
LRESTRICT = 327, LSIGNOF = 327,
LINLINE = 328 LRESTRICT = 328,
LINLINE = 329
}; };
#endif #endif
/* Tokens. */ /* Tokens. */
...@@ -162,28 +163,29 @@ ...@@ -162,28 +163,29 @@
#define LIF 304 #define LIF 304
#define LINT 305 #define LINT 305
#define LLONG 306 #define LLONG 306
#define LREGISTER 307 #define LPREFETCH 307
#define LRETURN 308 #define LREGISTER 308
#define LSHORT 309 #define LRETURN 309
#define LSIZEOF 310 #define LSHORT 310
#define LUSED 311 #define LSIZEOF 311
#define LSTATIC 312 #define LUSED 312
#define LSTRUCT 313 #define LSTATIC 313
#define LSWITCH 314 #define LSTRUCT 314
#define LTYPEDEF 315 #define LSWITCH 315
#define LTYPESTR 316 #define LTYPEDEF 316
#define LUNION 317 #define LTYPESTR 317
#define LUNSIGNED 318 #define LUNION 318
#define LWHILE 319 #define LUNSIGNED 319
#define LVOID 320 #define LWHILE 320
#define LENUM 321 #define LVOID 321
#define LSIGNED 322 #define LENUM 322
#define LCONSTNT 323 #define LSIGNED 323
#define LVOLATILE 324 #define LCONSTNT 324
#define LSET 325 #define LVOLATILE 325
#define LSIGNOF 326 #define LSET 326
#define LRESTRICT 327 #define LSIGNOF 327
#define LINLINE 328 #define LRESTRICT 328
#define LINLINE 329
...@@ -217,7 +219,7 @@ typedef union YYSTYPE ...@@ -217,7 +219,7 @@ typedef union YYSTYPE
vlong vval; vlong vval;
} }
/* Line 1529 of yacc.c. */ /* Line 1529 of yacc.c. */
#line 221 "y.tab.h" #line 223 "y.tab.h"
YYSTYPE; YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1 # define YYSTYPE_IS_DECLARED 1
......
...@@ -2,7 +2,3 @@ enum { ...@@ -2,7 +2,3 @@ enum {
thechar = '8', thechar = '8',
CacheLineSize = 64 CacheLineSize = 64
}; };
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
...@@ -2,7 +2,3 @@ enum { ...@@ -2,7 +2,3 @@ enum {
thechar = '6', thechar = '6',
CacheLineSize = 64 CacheLineSize = 64
}; };
// prefetches *addr into processor's cache
#define PREFETCH(addr) runtime·prefetch(addr)
void runtime·prefetch(void*);
...@@ -2,5 +2,3 @@ enum { ...@@ -2,5 +2,3 @@ enum {
thechar = '5', thechar = '5',
CacheLineSize = 32 CacheLineSize = 32
}; };
#define PREFETCH(addr) USED(addr)
...@@ -415,11 +415,6 @@ TEXT runtime·atomicstore64(SB), 7, $0 ...@@ -415,11 +415,6 @@ TEXT runtime·atomicstore64(SB), 7, $0
XADDL AX, (SP) XADDL AX, (SP)
RET RET
TEXT runtime·prefetch(SB), 7, $0
MOVL 4(SP), AX
PREFETCHNTA (AX)
RET
// void jmpdefer(fn, sp); // void jmpdefer(fn, sp);
// called from deferreturn. // called from deferreturn.
// 1. pop the caller // 1. pop the caller
......
...@@ -441,11 +441,6 @@ TEXT runtime·atomicstore64(SB), 7, $0 ...@@ -441,11 +441,6 @@ TEXT runtime·atomicstore64(SB), 7, $0
XCHGQ AX, 0(BX) XCHGQ AX, 0(BX)
RET RET
TEXT runtime·prefetch(SB), 7, $0
MOVQ 8(SP), AX
PREFETCHNTA (AX)
RET
// void jmpdefer(fn, sp); // void jmpdefer(fn, sp);
// called from deferreturn. // called from deferreturn.
// 1. pop the caller // 1. pop the caller
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment