Commit 3aa006b8 authored by Russ Cox's avatar Russ Cox

better 64-bit handling in 8g.

fewer moves, fewer stupid LEALs.
powser1 runs (with evaln commented out).
beginnings of floating point.

R=ken
OCL=29540
CL=29543
parent 63e1b714
......@@ -505,7 +505,7 @@ gmove(Node *f, Node *t)
switch(CASE(ft, tt)) {
default:
fatal("gmove %T -> %T", f, t);
fatal("gmove %lT -> %lT", f->type, t->type);
/*
* integer copy and truncate
......
......@@ -57,7 +57,7 @@ Bconv(Fmt *fp)
if(str[0])
strcat(str, " ");
if(var[i].sym == S) {
sprint(ss, "$%ld", var[i].offset);
sprint(ss, "$%d", var[i].offset);
s = ss;
} else
s = var[i].sym->name;
......@@ -108,7 +108,7 @@ Dconv(Fmt *fp)
i = a->type;
if(i >= D_INDIR) {
if(a->offset)
sprint(str, "%ld(%R)", a->offset, i-D_INDIR);
sprint(str, "%d(%R)", a->offset, i-D_INDIR);
else
sprint(str, "(%R)", i-D_INDIR);
goto brk;
......@@ -117,7 +117,7 @@ Dconv(Fmt *fp)
default:
if(a->offset)
sprint(str, "$%ld,%R", a->offset, i);
sprint(str, "$%d,%R", a->offset, i);
else
sprint(str, "%R", i);
break;
......@@ -127,35 +127,35 @@ Dconv(Fmt *fp)
break;
case D_BRANCH:
sprint(str, "%ld(PC)", a->offset-pc);
sprint(str, "%d(PC)", a->offset-pc);
break;
case D_EXTERN:
sprint(str, "%s+%ld(SB)", a->sym->name, a->offset);
sprint(str, "%s+%d(SB)", a->sym->name, a->offset);
break;
case D_STATIC:
sprint(str, "%s<>+%ld(SB)", a->sym->name,
sprint(str, "%s<>+%d(SB)", a->sym->name,
a->offset);
break;
case D_AUTO:
sprint(str, "%s+%ld(SP)", a->sym->name, a->offset);
sprint(str, "%s+%d(SP)", a->sym->name, a->offset);
break;
case D_PARAM:
if(a->sym)
sprint(str, "%s+%ld(FP)", a->sym->name, a->offset);
sprint(str, "%s+%d(FP)", a->sym->name, a->offset);
else
sprint(str, "%ld(FP)", a->offset);
sprint(str, "%d(FP)", a->offset);
break;
case D_CONST:
sprint(str, "$%ld", a->offset);
sprint(str, "$%d", a->offset);
break;
case D_CONST2:
sprint(str, "$%ld-%ld", a->offset, a->offset2);
sprint(str, "$%d-%d", a->offset, a->offset2);
break;
case D_FCONST:
......
......@@ -27,6 +27,42 @@ is64(Type *t)
return 0;
}
int
noconv(Type *t1, Type *t2)
{
int e1, e2;
e1 = simtype[t1->etype];
e2 = simtype[t2->etype];
switch(e1) {
case TINT8:
case TUINT8:
return e2 == TINT8 || e2 == TUINT8;
case TINT16:
case TUINT16:
return e2 == TINT16 || e2 == TUINT16;
case TINT32:
case TUINT32:
case TPTR32:
return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
case TINT64:
case TUINT64:
case TPTR64:
return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
case TFLOAT32:
return e2 == TFLOAT32;
case TFLOAT64:
return e2 == TFLOAT64;
}
return 0;
}
/*
* generate:
* res = n;
......@@ -38,7 +74,7 @@ is64(Type *t)
void
cgen(Node *n, Node *res)
{
Node *nl, *nr, *r, n1, n2, rr;
Node *nl, *nr, *r, n1, n2, rr, f0, f1;
Prog *p1, *p2, *p3;
int a;
......@@ -112,6 +148,9 @@ cgen(Node *n, Node *res)
return;
}
if(isfloat[n->type->etype] && isfloat[nl->type->etype])
goto flt;
switch(n->op) {
default:
dump("cgen", n);
......@@ -162,7 +201,7 @@ cgen(Node *n, Node *res)
goto abop;
case OCONV:
if(eqtype(n->type, nl->type)) {
if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
cgen(nl, res);
break;
}
......@@ -303,6 +342,29 @@ uop: // unary
gmove(&n1, res);
tempfree(&n1);
return;
flt: // floating-point. 387 (not SSE2) to interoperate with 6c
nodreg(&f0, n->type, D_F0);
nodreg(&f1, n->type, D_F0+1);
if(nl->ullman >= nr->ullman) {
cgen(nl, &f0);
if(nr->addable)
gins(foptoas(n->op, n->type, 0), nr, &f0);
else {
cgen(nr, &f0);
gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
}
} else {
cgen(nr, &f0);
if(nl->addable)
gins(foptoas(n->op, n->type, Frev), nl, &f0);
else {
cgen(nl, &f0);
gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
}
}
gmove(&f0, res);
return;
}
/*
......@@ -916,7 +978,8 @@ sgen(Node *n, Node *res, int w)
static int
cancgen64(Node *n, Node *res)
{
Node adr1, adr2, t1, t2, r1, r2, r3, r4, r5, nod, *l, *r;
Node t1, t2, ax, dx, cx, ex, fx, zero, *l, *r;
Node lo1, lo2, hi1, hi2;
Prog *p1, *p2;
if(n->op == OCALL)
......@@ -936,14 +999,13 @@ cancgen64(Node *n, Node *res)
return 1;
case OMINUS:
nodconst(&zero, types[TINT32], 0);
cgen(n->left, res);
gins(ANEGL, N, res);
res->xoffset += 4;
regalloc(&nod, types[TINT32], N);
gins(AXORL, &nod, &nod);
gins(ASBBL, res, &nod);
gins(AMOVL, &nod, res);
regfree(&nod);
split64(res, &lo1, &hi1);
gins(ANEGL, N, &lo1);
gins(AADCL, &zero, &hi1);
gins(ANEGL, N, &hi1);
splitclean();
return 1;
case OADD:
......@@ -966,93 +1028,69 @@ cancgen64(Node *n, Node *res)
}
// Setup for binary operation.
tempalloc(&adr1, types[TPTR32]);
agen(l, &adr1);
tempalloc(&adr2, types[TPTR32]);
agen(r, &adr2);
split64(l, &lo1, &hi1);
split64(r, &lo2, &hi2);
nodreg(&r1, types[TPTR32], D_AX);
nodreg(&r2, types[TPTR32], D_DX);
nodreg(&r3, types[TPTR32], D_CX);
nodreg(&ax, types[TPTR32], D_AX);
nodreg(&cx, types[TPTR32], D_CX);
nodreg(&dx, types[TPTR32], D_DX);
// Do op. Leave result in DX:AX.
switch(n->op) {
case OADD:
gins(AMOVL, &lo1, &ax);
gins(AMOVL, &hi1, &dx);
gins(AADDL, &lo2, &ax);
gins(AADCL, &hi2, &dx);
break;
case OSUB:
gmove(&adr1, &r3);
r3.op = OINDREG;
r3.xoffset = 0;
gins(AMOVL, &r3, &r1);
r3.xoffset = 4;
gins(AMOVL, &r3, &r2);
r3.xoffset = 0;
r3.op = OREGISTER;
gmove(&adr2, &r3);
r3.op = OINDREG;
if(n->op == OADD)
gins(AADDL, &r3, &r1);
else
gins(ASUBL, &r3, &r1);
r3.xoffset = 4;
if(n->op == OADD)
gins(AADCL, &r3, &r2);
else
gins(ASBBL, &r3, &r2);
gins(AMOVL, &lo1, &ax);
gins(AMOVL, &hi1, &dx);
gins(ASUBL, &lo2, &ax);
gins(ASBBL, &hi2, &dx);
break;
case OMUL:
regalloc(&r4, types[TPTR32], N);
regalloc(&r5, types[TPTR32], N);
// load args into r2:r1 and r4:r3.
// leave result in r2:r1 (DX:AX)
gmove(&adr1, &r5);
r5.op = OINDREG;
r5.xoffset = 0;
gmove(&r5, &r1);
r5.xoffset = 4;
gmove(&r5, &r2);
r5.xoffset = 0;
r5.op = OREGISTER;
gmove(&adr2, &r5);
r5.op = OINDREG;
gmove(&r5, &r3);
r5.xoffset = 4;
gmove(&r5, &r4);
r5.xoffset = 0;
r5.op = OREGISTER;
// if r2|r4 == 0, use one 32 x 32 -> 64 unsigned multiply
gmove(&r2, &r5);
gins(AORL, &r4, &r5);
// let's call the next two EX and FX.
regalloc(&ex, types[TPTR32], N);
regalloc(&fx, types[TPTR32], N);
// load args into DX:AX and EX:CX.
gins(AMOVL, &lo1, &ax);
gins(AMOVL, &hi1, &dx);
gins(AMOVL, &lo2, &cx);
gins(AMOVL, &hi2, &ex);
// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
gins(AMOVL, &dx, &fx);
gins(AORL, &ex, &fx);
p1 = gbranch(AJNE, T);
gins(AMULL, &r3, N); // AX (=r1) is implied
gins(AMULL, &cx, N); // implicit &ax
p2 = gbranch(AJMP, T);
patch(p1, pc);
// full 64x64 -> 64, from 32 x 32 -> 64.
gins(AIMULL, &r3, &r2);
gins(AMOVL, &r1, &r5);
gins(AIMULL, &r4, &r5);
gins(AADDL, &r2, &r5);
gins(AMOVL, &r3, &r2);
gins(AMULL, &r2, N); // AX (=r1) is implied
gins(AADDL, &r5, &r2);
// full 64x64 -> 64, from 32x32 -> 64.
gins(AIMULL, &cx, &dx);
gins(AMOVL, &ax, &fx);
gins(AIMULL, &ex, &fx);
gins(AADDL, &dx, &fx);
gins(AMOVL, &cx, &dx);
gins(AMULL, &dx, N); // implicit &ax
gins(AADDL, &fx, &dx);
patch(p2, pc);
regfree(&r4);
regfree(&r5);
break;
regfree(&ex);
regfree(&fx);
break;
}
splitclean();
splitclean();
tempfree(&adr2);
tempfree(&adr1);
// Store result.
gins(AMOVL, &r1, res);
res->xoffset += 4;
gins(AMOVL, &r2, res);
res->xoffset -= 4;
split64(res, &lo1, &hi1);
gins(AMOVL, &ax, &lo1);
gins(AMOVL, &dx, &hi1);
splitclean();
if(r == &t2)
tempfree(&t2);
......@@ -1068,47 +1106,23 @@ cancgen64(Node *n, Node *res)
void
cmp64(Node *nl, Node *nr, int op, Prog *to)
{
int64 x;
Node adr1, adr2, rr;
Prog *br, *p;
Node lo1, hi1, lo2, hi2, rr;
Prog *br;
Type *t;
t = nr->type;
memset(&adr1, 0, sizeof adr1);
memset(&adr2, 0, sizeof adr2);
regalloc(&adr1, types[TPTR32], N);
agen(nl, &adr1);
adr1.op = OINDREG;
nl = &adr1;
x = 0;
if(nr->op == OLITERAL) {
if(!isconst(nr, CTINT))
fatal("bad const in cmp64");
x = mpgetfix(nr->val.u.xval);
} else {
regalloc(&adr2, types[TPTR32], N);
agen(nr, &adr2);
adr2.op = OINDREG;
nr = &adr2;
}
split64(nl, &lo1, &hi1);
split64(nr, &lo2, &hi2);
// compare most significant word
nl->xoffset += 4;
if(nr->op == OLITERAL) {
p = gins(ACMPL, nl, nodintconst((uint32)(x>>32)));
} else {
regalloc(&rr, types[TUINT32], N);
nr->xoffset += 4;
gins(AMOVL, nr, &rr);
gins(ACMPL, nl, &rr);
nr->xoffset -= 4;
t = hi1.type;
if(nl->op == OLITERAL || nr->op == OLITERAL)
gins(ACMPL, &hi1, &hi2);
else {
regalloc(&rr, types[TINT32], N);
gins(AMOVL, &hi1, &rr);
gins(ACMPL, &rr, &hi2);
regfree(&rr);
}
nl->xoffset -= 4;
br = P;
switch(op) {
default:
......@@ -1153,35 +1167,24 @@ cmp64(Node *nl, Node *nr, int op, Prog *to)
}
// compare least significant word
if(nr->op == OLITERAL) {
p = gins(ACMPL, nl, nodintconst((uint32)x));
} else {
regalloc(&rr, types[TUINT32], N);
gins(AMOVL, nr, &rr);
gins(ACMPL, nl, &rr);
t = lo1.type;
if(nl->op == OLITERAL || nr->op == OLITERAL)
gins(ACMPL, &lo1, &lo2);
else {
regalloc(&rr, types[TINT32], N);
gins(AMOVL, &lo1, &rr);
gins(ACMPL, &rr, &lo2);
regfree(&rr);
}
// jump again
switch(op) {
default:
fatal("cmp64 %O %T", op, nr->type);
case OEQ:
case ONE:
case OGE:
case OGT:
case OLE:
case OLT:
patch(gbranch(optoas(op, t), T), to);
break;
}
// point first branch down here if appropriate
if(br != P)
patch(br, pc);
regfree(&adr1);
if(nr == &adr2)
regfree(&adr2);
splitclean();
splitclean();
}
......@@ -43,6 +43,14 @@ struct Prog
void* reg; // pointer to containing Reg struct
};
// foptoas flags
enum
{
Frev = 1<<0,
Fpop = 1<<1,
Fpop2 = 1<<2,
};
EXTERN Biobuf* bout;
EXTERN int32 dynloc;
EXTERN uchar reg[D_NONE];
......@@ -114,6 +122,7 @@ Prog* gop(int, Node*, Node*, Node*);
void setconst(Addr*, vlong);
void setaddr(Addr*, Node*);
int optoas(int, Type*);
int foptoas(int, Type*, int);
void ginit(void);
void gclean(void);
void regalloc(Node*, Type*, Node*);
......@@ -131,7 +140,10 @@ Plist* newplist(void);
int isfat(Type*);
void sudoclean(void);
int sudoaddable(int, Node*, Addr*);
int dotaddable(Node*, Node*);
void afunclit(Addr*);
void split64(Node*, Node*, Node*);
void splitclean(void);
/*
* list.c
......
......@@ -468,7 +468,7 @@ cgen_asop(Node *n)
hard:
if(nr->ullman > nl->ullman) {
regalloc(&n2, nr->type, N);
tempalloc(&n2, nr->type);
cgen(nr, &n2);
igen(nl, &n1, N);
} else {
......
This diff is collapsed.
......@@ -857,7 +857,7 @@ convconst(Node *con, Type *t, Val *val)
con->val.u.xval = mal(sizeof *con->val.u.xval);
switch(val->ctype) {
default:
fatal("convconst ctype=%d %lT", val->ctype, t->type);
fatal("convconst ctype=%d %lT", val->ctype, t);
case CTINT:
i = mpgetfix(val->u.xval);
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment