better 64-bit handling in 8g.

fewer moves, fewer stupid LEALs. powser1 runs (with evaln commented out). beginnings of floating point. R=ken OCL=29540 CL=29543

better 64-bit handling in 8g.
fewer moves, fewer stupid LEALs. powser1 runs (with evaln commented out). beginnings of floating point. R=ken OCL=29540 CL=29543
3aa006b8 · Russ Cox · 63e1b714 · 3aa006b8 · 3aa006b8 · 3aa006b8
Commit 3aa006b8 authored May 28, 2009 by Russ Cox
7 changed files
--- a/src/cmd/6g/gsubr.c
+++ b/src/cmd/6g/gsubr.c
@@ -505,7 +505,7 @@ gmove(Node *f, Node *t)

 	switch(CASE(ft, tt)) {
 	default:
-		fatal("gmove %T -> %T", f, t);
+		fatal("gmove %lT -> %lT", f->type, t->type);

 	/*
 	 * integer copy and truncate

--- a/src/cmd/8c/list.c
+++ b/src/cmd/8c/list.c
@@ -57,7 +57,7 @@ Bconv(Fmt *fp)
 		if(str[0])
 			strcat(str, " ");
 		if(var[i].sym == S) {
-			sprint(ss, "$%ld", var[i].offset);
+			sprint(ss, "$%d", var[i].offset);
 			s = ss;
 		} else
 			s = var[i].sym->name;
@@ -108,7 +108,7 @@ Dconv(Fmt *fp)
 	i = a->type;
 	if(i >= D_INDIR) {
 		if(a->offset)
-			sprint(str, "%ld(%R)", a->offset, i-D_INDIR);
+			sprint(str, "%d(%R)", a->offset, i-D_INDIR);
 		else
 			sprint(str, "(%R)", i-D_INDIR);
 		goto brk;
@@ -117,7 +117,7 @@ Dconv(Fmt *fp)

 	default:
 		if(a->offset)
-			sprint(str, "$%ld,%R", a->offset, i);
+			sprint(str, "$%d,%R", a->offset, i);
 		else
 			sprint(str, "%R", i);
 		break;
@@ -127,35 +127,35 @@ Dconv(Fmt *fp)
 		break;

 	case D_BRANCH:
-		sprint(str, "%ld(PC)", a->offset-pc);
+		sprint(str, "%d(PC)", a->offset-pc);
 		break;

 	case D_EXTERN:
-		sprint(str, "%s+%ld(SB)", a->sym->name, a->offset);
+		sprint(str, "%s+%d(SB)", a->sym->name, a->offset);
 		break;

 	case D_STATIC:
-		sprint(str, "%s<>+%ld(SB)", a->sym->name,
+		sprint(str, "%s<>+%d(SB)", a->sym->name,
 			a->offset);
 		break;

 	case D_AUTO:
-		sprint(str, "%s+%ld(SP)", a->sym->name, a->offset);
+		sprint(str, "%s+%d(SP)", a->sym->name, a->offset);
 		break;

 	case D_PARAM:
 		if(a->sym)
-			sprint(str, "%s+%ld(FP)", a->sym->name, a->offset);
+			sprint(str, "%s+%d(FP)", a->sym->name, a->offset);
 		else
-			sprint(str, "%ld(FP)", a->offset);
+			sprint(str, "%d(FP)", a->offset);
 		break;

 	case D_CONST:
-		sprint(str, "$%ld", a->offset);
+		sprint(str, "$%d", a->offset);
 		break;

 	case D_CONST2:
-		sprint(str, "$%ld-%ld", a->offset, a->offset2);
+		sprint(str, "$%d-%d", a->offset, a->offset2);
 		break;

 	case D_FCONST:

--- a/src/cmd/8g/cgen.c
+++ b/src/cmd/8g/cgen.c
@@ -27,6 +27,42 @@ is64(Type *t)
 	return 0;
 }

+int
+noconv(Type *t1, Type *t2)
+{
+	int e1, e2;
+
+	e1 = simtype[t1->etype];
+	e2 = simtype[t2->etype];
+
+	switch(e1) {
+	case TINT8:
+	case TUINT8:
+		return e2 == TINT8 || e2 == TUINT8;
+
+	case TINT16:
+	case TUINT16:
+		return e2 == TINT16 || e2 == TUINT16;
+
+	case TINT32:
+	case TUINT32:
+	case TPTR32:
+		return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
+
+	case TINT64:
+	case TUINT64:
+	case TPTR64:
+		return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
+
+	case TFLOAT32:
+		return e2 == TFLOAT32;
+
+	case TFLOAT64:
+		return e2 == TFLOAT64;
+	}
+	return 0;
+}
+
 /*
 * generate:
 *	res = n;
@@ -38,7 +74,7 @@ is64(Type *t)
 void
 cgen(Node *n, Node *res)
 {
-	Node *nl, *nr, *r, n1, n2, rr;
+	Node *nl, *nr, *r, n1, n2, rr, f0, f1;
 	Prog *p1, *p2, *p3;
 	int a;

@@ -112,6 +148,9 @@ cgen(Node *n, Node *res)
 		return;
 	}

+	if(isfloat[n->type->etype] && isfloat[nl->type->etype])
+		goto flt;
+
 	switch(n->op) {
 	default:
 		dump("cgen", n);
@@ -162,7 +201,7 @@ cgen(Node *n, Node *res)
 		goto abop;

 	case OCONV:
-		if(eqtype(n->type, nl->type)) {
+		if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
 			cgen(nl, res);
 			break;
 		}
@@ -303,6 +342,29 @@ uop:	// unary
 	gmove(&n1, res);
 	tempfree(&n1);
 	return;
+
+flt:	// floating-point.  387 (not SSE2) to interoperate with 6c
+	nodreg(&f0, n->type, D_F0);
+	nodreg(&f1, n->type, D_F0+1);
+	if(nl->ullman >= nr->ullman) {
+		cgen(nl, &f0);
+		if(nr->addable)
+			gins(foptoas(n->op, n->type, 0), nr, &f0);
+		else {
+			cgen(nr, &f0);
+			gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
+		}
+	} else {
+		cgen(nr, &f0);
+		if(nl->addable)
+			gins(foptoas(n->op, n->type, Frev), nl, &f0);
+		else {
+			cgen(nl, &f0);
+			gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
+		}
+	}
+	gmove(&f0, res);
+	return;
 }

 /*
@@ -916,7 +978,8 @@ sgen(Node *n, Node *res, int w)
 static int
 cancgen64(Node *n, Node *res)
 {
-	Node adr1, adr2, t1, t2, r1, r2, r3, r4, r5, nod, *l, *r;
+	Node t1, t2, ax, dx, cx, ex, fx, zero, *l, *r;
+	Node lo1, lo2, hi1, hi2;
 	Prog *p1, *p2;

 	if(n->op == OCALL)
@@ -936,14 +999,13 @@ cancgen64(Node *n, Node *res)
 		return 1;

 	case OMINUS:
+		nodconst(&zero, types[TINT32], 0);
 		cgen(n->left, res);
-		gins(ANEGL, N, res);
-		res->xoffset += 4;
-		regalloc(&nod, types[TINT32], N);
-		gins(AXORL, &nod, &nod);
-		gins(ASBBL, res, &nod);
-		gins(AMOVL, &nod, res);
-		regfree(&nod);
+		split64(res, &lo1, &hi1);
+		gins(ANEGL, N, &lo1);
+		gins(AADCL, &zero, &hi1);
+		gins(ANEGL, N, &hi1);
+		splitclean();
 		return 1;

 	case OADD:
@@ -966,93 +1028,69 @@ cancgen64(Node *n, Node *res)
 	}

 	// Setup for binary operation.
-	tempalloc(&adr1, types[TPTR32]);
-	agen(l, &adr1);		
-	tempalloc(&adr2, types[TPTR32]);
-	agen(r, &adr2);
+	split64(l, &lo1, &hi1);
+	split64(r, &lo2, &hi2);

-	nodreg(&r1, types[TPTR32], D_AX);
-	nodreg(&r2, types[TPTR32], D_DX);
-	nodreg(&r3, types[TPTR32], D_CX);
+	nodreg(&ax, types[TPTR32], D_AX);
+	nodreg(&cx, types[TPTR32], D_CX);
+	nodreg(&dx, types[TPTR32], D_DX);

+	// Do op.  Leave result in DX:AX.
 	switch(n->op) {
 	case OADD:
+		gins(AMOVL, &lo1, &ax);
+		gins(AMOVL, &hi1, &dx);
+		gins(AADDL, &lo2, &ax);
+		gins(AADCL, &hi2, &dx);
+		break;
+
 	case OSUB:
-		gmove(&adr1, &r3);
-		r3.op = OINDREG;
-		r3.xoffset = 0;
-		gins(AMOVL, &r3, &r1);
-		r3.xoffset = 4;
-		gins(AMOVL, &r3, &r2);
-		
-		r3.xoffset = 0;
-		r3.op = OREGISTER;
-		gmove(&adr2, &r3);
-		r3.op = OINDREG;
-		if(n->op == OADD)
-			gins(AADDL, &r3, &r1);
-		else
-			gins(ASUBL, &r3, &r1);
-		r3.xoffset = 4;
-		if(n->op == OADD)
-			gins(AADCL, &r3, &r2);
-		else
-			gins(ASBBL, &r3, &r2);
+		gins(AMOVL, &lo1, &ax);
+		gins(AMOVL, &hi1, &dx);
+		gins(ASUBL, &lo2, &ax);
+		gins(ASBBL, &hi2, &dx);
 		break;

 	case OMUL:
-		regalloc(&r4, types[TPTR32], N);
-		regalloc(&r5, types[TPTR32], N);
-		
-		// load args into r2:r1 and r4:r3.
-		// leave result in r2:r1 (DX:AX)
-		gmove(&adr1, &r5);
-		r5.op = OINDREG;
-		r5.xoffset = 0;
-		gmove(&r5, &r1);
-		r5.xoffset = 4;
-		gmove(&r5, &r2);
-		r5.xoffset = 0;
-		r5.op = OREGISTER;
-		gmove(&adr2, &r5);
-		r5.op = OINDREG;
-		gmove(&r5, &r3);
-		r5.xoffset = 4;
-		gmove(&r5, &r4);
-		r5.xoffset = 0;
-		r5.op = OREGISTER;
-
-		// if r2|r4 == 0, use one 32 x 32 -> 64 unsigned multiply
-		gmove(&r2, &r5);
-		gins(AORL, &r4, &r5);
+		// let's call the next two EX and FX.
+		regalloc(&ex, types[TPTR32], N);
+		regalloc(&fx, types[TPTR32], N);
+
+		// load args into DX:AX and EX:CX.
+		gins(AMOVL, &lo1, &ax);
+		gins(AMOVL, &hi1, &dx);
+		gins(AMOVL, &lo2, &cx);
+		gins(AMOVL, &hi2, &ex);
+
+		// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
+		gins(AMOVL, &dx, &fx);
+		gins(AORL, &ex, &fx);
 		p1 = gbranch(AJNE, T);
-		gins(AMULL, &r3, N);	// AX (=r1) is implied
+		gins(AMULL, &cx, N);	// implicit &ax
 		p2 = gbranch(AJMP, T);
 		patch(p1, pc);

-		// full 64x64 -> 64, from 32 x 32 -> 64.
-		gins(AIMULL, &r3, &r2);
-		gins(AMOVL, &r1, &r5);
-		gins(AIMULL, &r4, &r5);
-		gins(AADDL, &r2, &r5);
-		gins(AMOVL, &r3, &r2);
-		gins(AMULL, &r2, N);	// AX (=r1) is implied
-		gins(AADDL, &r5, &r2);
+		// full 64x64 -> 64, from 32x32 -> 64.
+		gins(AIMULL, &cx, &dx);
+		gins(AMOVL, &ax, &fx);
+		gins(AIMULL, &ex, &fx);
+		gins(AADDL, &dx, &fx);
+		gins(AMOVL, &cx, &dx);
+		gins(AMULL, &dx, N);	// implicit &ax
+		gins(AADDL, &fx, &dx);
 		patch(p2, pc);
-		regfree(&r4);
-		regfree(&r5);
-		break;

+		regfree(&ex);
+		regfree(&fx);
+		break;
 	}
+	splitclean();
+	splitclean();

-	tempfree(&adr2);
-	tempfree(&adr1);
-
-	// Store result.
-	gins(AMOVL, &r1, res);
-	res->xoffset += 4;
-	gins(AMOVL, &r2, res);
-	res->xoffset -= 4;
+	split64(res, &lo1, &hi1);
+	gins(AMOVL, &ax, &lo1);
+	gins(AMOVL, &dx, &hi1);
+	splitclean();

 	if(r == &t2)
 		tempfree(&t2);
@@ -1068,47 +1106,23 @@ cancgen64(Node *n, Node *res)
 void
 cmp64(Node *nl, Node *nr, int op, Prog *to)
 {
-	int64 x;
-	Node adr1, adr2, rr;
-	Prog *br, *p;
+	Node lo1, hi1, lo2, hi2, rr;
+	Prog *br;
 	Type *t;

-	t = nr->type;
-	
-	memset(&adr1, 0, sizeof adr1);
-	memset(&adr2, 0, sizeof adr2);
-
-	regalloc(&adr1, types[TPTR32], N);
-	agen(nl, &adr1);
-	adr1.op = OINDREG;
-	nl = &adr1;
-	
-	x = 0;
-	if(nr->op == OLITERAL) {
-		if(!isconst(nr, CTINT))
-			fatal("bad const in cmp64");
-		x = mpgetfix(nr->val.u.xval);
-	} else {
-		regalloc(&adr2, types[TPTR32], N);
-		agen(nr, &adr2);
-		adr2.op = OINDREG;
-		nr = &adr2;
-	}
+	split64(nl, &lo1, &hi1);
+	split64(nr, &lo2, &hi2);

 	// compare most significant word
-	nl->xoffset += 4;
-	if(nr->op == OLITERAL) {
-		p = gins(ACMPL, nl, nodintconst((uint32)(x>>32)));
-	} else {
-		regalloc(&rr, types[TUINT32], N);
-		nr->xoffset += 4;
-		gins(AMOVL, nr, &rr);
-		gins(ACMPL, nl, &rr);
-		nr->xoffset -= 4;
+	t = hi1.type;
+	if(nl->op == OLITERAL || nr->op == OLITERAL)
+		gins(ACMPL, &hi1, &hi2);
+	else {
+		regalloc(&rr, types[TINT32], N);
+		gins(AMOVL, &hi1, &rr);
+		gins(ACMPL, &rr, &hi2);
 		regfree(&rr);
 	}
-	nl->xoffset -= 4;
-
 	br = P;
 	switch(op) {
 	default:
@@ -1153,35 +1167,24 @@ cmp64(Node *nl, Node *nr, int op, Prog *to)
 	}

 	// compare least significant word
-	if(nr->op == OLITERAL) {
-		p = gins(ACMPL, nl, nodintconst((uint32)x));
-	} else {
-		regalloc(&rr, types[TUINT32], N);
-		gins(AMOVL, nr, &rr);
-		gins(ACMPL, nl, &rr);
+	t = lo1.type;
+	if(nl->op == OLITERAL || nr->op == OLITERAL)
+		gins(ACMPL, &lo1, &lo2);
+	else {
+		regalloc(&rr, types[TINT32], N);
+		gins(AMOVL, &lo1, &rr);
+		gins(ACMPL, &rr, &lo2);
 		regfree(&rr);
 	}

 	// jump again
-	switch(op) {
-	default:
-		fatal("cmp64 %O %T", op, nr->type);
-	case OEQ:
-	case ONE:
-	case OGE:
-	case OGT:
-	case OLE:
-	case OLT:
 	patch(gbranch(optoas(op, t), T), to);
-		break;	
-	}

 	// point first branch down here if appropriate
 	if(br != P)
 		patch(br, pc);

-	regfree(&adr1);
-	if(nr == &adr2)
-		regfree(&adr2);	
+	splitclean();
+	splitclean();
 }

--- a/src/cmd/8g/gg.h
+++ b/src/cmd/8g/gg.h
@@ -43,6 +43,14 @@ struct	Prog
 	void*	reg;		// pointer to containing Reg struct
 };

+// foptoas flags
+enum
+{
+	Frev = 1<<0,
+	Fpop = 1<<1,
+	Fpop2 = 1<<2,
+};
+
 EXTERN	Biobuf*	bout;
 EXTERN	int32	dynloc;
 EXTERN	uchar	reg[D_NONE];
@@ -114,6 +122,7 @@ Prog*	gop(int, Node*, Node*, Node*);
 void	setconst(Addr*, vlong);
 void	setaddr(Addr*, Node*);
 int	optoas(int, Type*);
+int	foptoas(int, Type*, int);
 void	ginit(void);
 void	gclean(void);
 void	regalloc(Node*, Type*, Node*);
@@ -131,7 +140,10 @@ Plist*	newplist(void);
 int	isfat(Type*);
 void	sudoclean(void);
 int	sudoaddable(int, Node*, Addr*);
+int	dotaddable(Node*, Node*);
 void	afunclit(Addr*);
+void	split64(Node*, Node*, Node*);
+void	splitclean(void);

 /*
 * list.c

--- a/src/cmd/8g/ggen.c
+++ b/src/cmd/8g/ggen.c
@@ -468,7 +468,7 @@ cgen_asop(Node *n)

 hard:
 	if(nr->ullman > nl->ullman) {
-		regalloc(&n2, nr->type, N);
+		tempalloc(&n2, nr->type);
 		cgen(nr, &n2);
 		igen(nl, &n1, N);
 	} else {

--- a/src/cmd/8g/gsubr.c
+++ b/src/cmd/8g/gsubr.c
--- a/src/cmd/gc/const.c
+++ b/src/cmd/gc/const.c
@@ -857,7 +857,7 @@ convconst(Node *con, Type *t, Val *val)
 		con->val.u.xval = mal(sizeof *con->val.u.xval);
 		switch(val->ctype) {
 		default:
-			fatal("convconst ctype=%d %lT", val->ctype, t->type);
+			fatal("convconst ctype=%d %lT", val->ctype, t);
 		case CTINT:
 			i = mpgetfix(val->u.xval);
 			break;