ggen.go 23.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package main

import (
	"cmd/internal/obj"
	"cmd/internal/obj/x86"
)
import "cmd/internal/gc"

func defframe(ptxt *obj.Prog) {
	var n *gc.Node

	// fill in argument size, stack size
	ptxt.To.Type = obj.TYPE_TEXTSIZE

	ptxt.To.U.Argsize = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
20
	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
21 22 23 24 25
	ptxt.To.Offset = int64(frame)

	// insert code to zero ambiguously live variables
	// so that the garbage collector only sees initialized values
	// when it looks for pointers.
26
	p := ptxt
27

28 29 30
	hi := int64(0)
	lo := hi
	ax := uint32(0)
31 32

	// iterate through declarations - they are sorted in decreasing xoffset order.
33
	for l := gc.Curfn.Dcl; l != nil; l = l.Next {
34
		n = l.N
35
		if n.Needzero == 0 {
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
			continue
		}
		if n.Class != gc.PAUTO {
			gc.Fatal("needzero class %d", n.Class)
		}
		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
			gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
		}

		if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) {
			// merge with range we already have
			lo = n.Xoffset

			continue
		}

		// zero old range
		p = zerorange(p, int64(frame), lo, hi, &ax)

		// set new range
		hi = n.Xoffset + n.Type.Width

		lo = n.Xoffset
	}

	// zero final range
	zerorange(p, int64(frame), lo, hi, &ax)
}

func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
66
	cnt := hi - lo
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
	if cnt == 0 {
		return p
	}
	if *ax == 0 {
		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
		*ax = 1
	}

	if cnt%int64(gc.Widthreg) != 0 {
		// should only happen with nacl
		if cnt%int64(gc.Widthptr) != 0 {
			gc.Fatal("zerorange count not a multiple of widthptr %d", cnt)
		}
		p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo)
		lo += int64(gc.Widthptr)
		cnt -= int64(gc.Widthptr)
	}

	if cnt <= int64(4*gc.Widthreg) {
86
		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
			p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
		}
	} else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) {
		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 2*(128-cnt/int64(gc.Widthreg)))
		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
	} else {
		p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
		p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
		p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
	}

	return p
}

func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
104
	q := gc.Ctxt.NewProg()
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
	gc.Clearp(q)
	q.As = int16(as)
	q.Lineno = p.Lineno
	q.From.Type = int16(ftype)
	q.From.Reg = int16(freg)
	q.From.Offset = foffset
	q.To.Type = int16(ttype)
	q.To.Reg = int16(treg)
	q.To.Offset = toffset
	q.Link = p.Link
	p.Link = q
	return q
}

/*
 * generate:
 *	call f
 *	proc=-1	normal call but no return
 *	proc=0	normal call
 *	proc=1	goroutine run in new proc
 *	proc=2	defer call save away stack
  *	proc=3	normal call to C pointer (not Go func value)
*/
func ginscall(f *gc.Node, proc int) {
	if f.Type != nil {
130
		extra := int32(0)
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
		if proc == 1 || proc == 2 {
			extra = 2 * int32(gc.Widthptr)
		}
		gc.Setmaxarg(f.Type, extra)
	}

	switch proc {
	default:
		gc.Fatal("ginscall: bad proc %d", proc)

	case 0, // normal call
		-1: // normal call but no return
		if f.Op == gc.ONAME && f.Class == gc.PFUNC {
			if f == gc.Deferreturn {
				// Deferred calls will appear to be returning to
				// the CALL deferreturn(SB) that we are about to emit.
				// However, the stack trace code will show the line
				// of the instruction byte before the return PC.
				// To avoid that being an unrelated instruction,
				// insert an x86 NOP that we will have the right line number.
				// x86 NOP 0x90 is really XCHG AX, AX; use that description
				// because the NOP pseudo-instruction would be removed by
				// the linker.
154
				var reg gc.Node
155 156 157 158 159
				gc.Nodreg(&reg, gc.Types[gc.TINT], x86.REG_AX)

				gins(x86.AXCHGL, &reg, &reg)
			}

160
			p := gins(obj.ACALL, nil, f)
161
			gc.Afunclit(&p.To, f)
162
			if proc == -1 || gc.Noreturn(p) {
163 164 165 166 167
				gins(obj.AUNDEF, nil, nil)
			}
			break
		}

168
		var reg gc.Node
169
		gc.Nodreg(&reg, gc.Types[gc.Tptr], x86.REG_DX)
170
		var r1 gc.Node
171 172 173 174 175 176 177 178 179 180 181 182
		gc.Nodreg(&r1, gc.Types[gc.Tptr], x86.REG_BX)
		gmove(f, &reg)
		reg.Op = gc.OINDREG
		gmove(&reg, &r1)
		reg.Op = gc.OREGISTER
		gins(obj.ACALL, &reg, &r1)

	case 3: // normal call of c function pointer
		gins(obj.ACALL, nil, f)

	case 1, // call in new proc (go)
		2: // deferred call (defer)
183
		stk := gc.Node{}
184 185 186 187 188

		stk.Op = gc.OINDREG
		stk.Val.U.Reg = x86.REG_SP
		stk.Xoffset = 0

189
		var reg gc.Node
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
		if gc.Widthptr == 8 {
			// size of arguments at 0(SP)
			ginscon(x86.AMOVQ, int64(gc.Argsize(f.Type)), &stk)

			// FuncVal* at 8(SP)
			stk.Xoffset = int64(gc.Widthptr)

			gc.Nodreg(&reg, gc.Types[gc.TINT64], x86.REG_AX)
			gmove(f, &reg)
			gins(x86.AMOVQ, &reg, &stk)
		} else {
			// size of arguments at 0(SP)
			ginscon(x86.AMOVL, int64(gc.Argsize(f.Type)), &stk)

			// FuncVal* at 4(SP)
			stk.Xoffset = int64(gc.Widthptr)

			gc.Nodreg(&reg, gc.Types[gc.TINT32], x86.REG_AX)
			gmove(f, &reg)
			gins(x86.AMOVL, &reg, &stk)
		}

		if proc == 1 {
			ginscall(gc.Newproc, 0)
		} else {
215
			if gc.Hasdefer == 0 {
216 217 218 219 220 221 222 223
				gc.Fatal("hasdefer=0 but has defer")
			}
			ginscall(gc.Deferproc, 0)
		}

		if proc == 2 {
			gc.Nodreg(&reg, gc.Types[gc.TINT32], x86.REG_AX)
			gins(x86.ATESTL, &reg, &reg)
224
			p := gc.Gbranch(x86.AJEQ, nil, +1)
225 226 227 228 229 230 231 232 233 234 235
			cgen_ret(nil)
			gc.Patch(p, gc.Pc)
		}
	}
}

/*
 * n is call to interface method.
 * generate res = n.
 */
func cgen_callinter(n *gc.Node, res *gc.Node, proc int) {
236
	i := n.Left
237 238 239 240
	if i.Op != gc.ODOTINTER {
		gc.Fatal("cgen_callinter: not ODOTINTER %v", gc.Oconv(int(i.Op), 0))
	}

241
	f := i.Right // field
242 243 244 245 246 247
	if f.Op != gc.ONAME {
		gc.Fatal("cgen_callinter: not ONAME %v", gc.Oconv(int(f.Op), 0))
	}

	i = i.Left // interface

248
	if i.Addable == 0 {
249
		var tmpi gc.Node
250 251 252 253 254 255 256 257 258
		gc.Tempname(&tmpi, i.Type)
		cgen(i, &tmpi)
		i = &tmpi
	}

	gc.Genlist(n.List) // assign the args

	// i is now addable, prepare an indirected
	// register to hold its address.
259
	var nodi gc.Node
260 261
	igen(i, &nodi, res) // REG = &inter

262
	var nodsp gc.Node
263 264 265 266 267 268 269 270 271 272
	gc.Nodindreg(&nodsp, gc.Types[gc.Tptr], x86.REG_SP)

	nodsp.Xoffset = 0
	if proc != 0 {
		nodsp.Xoffset += 2 * int64(gc.Widthptr) // leave room for size & fn
	}
	nodi.Type = gc.Types[gc.Tptr]
	nodi.Xoffset += int64(gc.Widthptr)
	cgen(&nodi, &nodsp) // {0, 8(nacl), or 16}(SP) = 8(REG) -- i.data

273
	var nodo gc.Node
274 275 276 277 278 279 280
	regalloc(&nodo, gc.Types[gc.Tptr], res)

	nodi.Type = gc.Types[gc.Tptr]
	nodi.Xoffset -= int64(gc.Widthptr)
	cgen(&nodi, &nodo) // REG = 0(REG) -- i.tab
	regfree(&nodi)

281
	var nodr gc.Node
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
	regalloc(&nodr, gc.Types[gc.Tptr], &nodo)
	if n.Left.Xoffset == gc.BADWIDTH {
		gc.Fatal("cgen_callinter: badwidth")
	}
	gc.Cgen_checknil(&nodo) // in case offset is huge
	nodo.Op = gc.OINDREG
	nodo.Xoffset = n.Left.Xoffset + 3*int64(gc.Widthptr) + 8
	if proc == 0 {
		// plain call: use direct c function pointer - more efficient
		cgen(&nodo, &nodr) // REG = 32+offset(REG) -- i.tab->fun[f]
		proc = 3
	} else {
		// go/defer. generate go func value.
		gins(x86.ALEAQ, &nodo, &nodr) // REG = &(32+offset(REG)) -- i.tab->fun[f]
	}

	nodr.Type = n.Left.Type
	ginscall(&nodr, proc)

	regfree(&nodr)
	regfree(&nodo)
}

/*
 * generate function call;
 *	proc=0	normal call
 *	proc=1	goroutine run in new proc
 *	proc=2	defer call save away stack
 */
func cgen_call(n *gc.Node, proc int) {
	if n == nil {
		return
	}

316
	var afun gc.Node
317 318 319 320 321 322 323 324 325
	if n.Left.Ullman >= gc.UINF {
		// if name involves a fn call
		// precompute the address of the fn
		gc.Tempname(&afun, gc.Types[gc.Tptr])

		cgen(n.Left, &afun)
	}

	gc.Genlist(n.List) // assign the args
326
	t := n.Left.Type
327 328 329

	// call tempname pointer
	if n.Left.Ullman >= gc.UINF {
330
		var nod gc.Node
331 332 333 334 335 336 337 338 339 340
		regalloc(&nod, gc.Types[gc.Tptr], nil)
		gc.Cgen_as(&nod, &afun)
		nod.Type = t
		ginscall(&nod, proc)
		regfree(&nod)
		return
	}

	// call pointer
	if n.Left.Op != gc.ONAME || n.Left.Class != gc.PFUNC {
341
		var nod gc.Node
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
		regalloc(&nod, gc.Types[gc.Tptr], nil)
		gc.Cgen_as(&nod, n.Left)
		nod.Type = t
		ginscall(&nod, proc)
		regfree(&nod)
		return
	}

	// call direct
	n.Left.Method = 1

	ginscall(n.Left, proc)
}

/*
 * call to n has already been generated.
 * generate:
 *	res = return value from call.
 */
func cgen_callret(n *gc.Node, res *gc.Node) {
362
	t := n.Left.Type
363 364 365 366
	if t.Etype == gc.TPTR32 || t.Etype == gc.TPTR64 {
		t = t.Type
	}

367 368
	var flist gc.Iter
	fp := gc.Structfirst(&flist, gc.Getoutarg(t))
369 370 371 372
	if fp == nil {
		gc.Fatal("cgen_callret: nil")
	}

373
	nod := gc.Node{}
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
	nod.Op = gc.OINDREG
	nod.Val.U.Reg = x86.REG_SP
	nod.Addable = 1

	nod.Xoffset = fp.Width
	nod.Type = fp.Type
	gc.Cgen_as(res, &nod)
}

/*
 * call to n has already been generated.
 * generate:
 *	res = &return value from call.
 */
func cgen_aret(n *gc.Node, res *gc.Node) {
389
	t := n.Left.Type
390 391 392 393
	if gc.Isptr[t.Etype] != 0 {
		t = t.Type
	}

394 395
	var flist gc.Iter
	fp := gc.Structfirst(&flist, gc.Getoutarg(t))
396 397 398 399
	if fp == nil {
		gc.Fatal("cgen_aret: nil")
	}

400
	nod1 := gc.Node{}
401 402 403 404 405 406 407 408
	nod1.Op = gc.OINDREG
	nod1.Val.U.Reg = x86.REG_SP
	nod1.Addable = 1

	nod1.Xoffset = fp.Width
	nod1.Type = fp.Type

	if res.Op != gc.OREGISTER {
409
		var nod2 gc.Node
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
		regalloc(&nod2, gc.Types[gc.Tptr], res)
		gins(leaptr, &nod1, &nod2)
		gins(movptr, &nod2, res)
		regfree(&nod2)
	} else {
		gins(leaptr, &nod1, res)
	}
}

/*
 * generate return.
 * n->left is assignments to return values.
 */
func cgen_ret(n *gc.Node) {
	if n != nil {
		gc.Genlist(n.List) // copy out args
	}
	if gc.Hasdefer != 0 {
		ginscall(gc.Deferreturn, 0)
	}
	gc.Genlist(gc.Curfn.Exit)
431
	p := gins(obj.ARET, nil, nil)
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
	if n != nil && n.Op == gc.ORETJMP {
		p.To.Type = obj.TYPE_MEM
		p.To.Name = obj.NAME_EXTERN
		p.To.Sym = gc.Linksym(n.Left.Sym)
	}
}

/*
 * generate division.
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	// Have to be careful about handling
	// most negative int divided by -1 correctly.
	// The hardware will trap.
	// Also the byte divide instruction needs AH,
	// which we otherwise don't have to deal with.
	// Easiest way to avoid for int8, int16: use int32.
	// For int32 and int64, use explicit test.
	// Could use int64 hw for int32.
455
	t := nl.Type
456

457 458
	t0 := t
	check := 0
459 460
	if gc.Issigned[t.Etype] != 0 {
		check = 1
461
		if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -(1<<uint64(t.Width*8-1)) {
462
			check = 0
463
		} else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 {
464 465 466 467 468 469 470 471 472 473 474 475 476
			check = 0
		}
	}

	if t.Width < 4 {
		if gc.Issigned[t.Etype] != 0 {
			t = gc.Types[gc.TINT32]
		} else {
			t = gc.Types[gc.TUINT32]
		}
		check = 0
	}

477
	a := optoas(op, t)
478

479
	var n3 gc.Node
480
	regalloc(&n3, t0, nil)
481 482
	var ax gc.Node
	var oldax gc.Node
483 484 485 486 487 488 489 490 491 492 493 494 495 496
	if nl.Ullman >= nr.Ullman {
		savex(x86.REG_AX, &ax, &oldax, res, t0)
		cgen(nl, &ax)
		regalloc(&ax, t0, &ax) // mark ax live during cgen
		cgen(nr, &n3)
		regfree(&ax)
	} else {
		cgen(nr, &n3)
		savex(x86.REG_AX, &ax, &oldax, res, t0)
		cgen(nl, &ax)
	}

	if t != t0 {
		// Convert
497
		ax1 := ax
498

499
		n31 := n3
500 501 502 503 504 505
		ax.Type = t
		n3.Type = t
		gmove(&ax1, &ax)
		gmove(&n31, &n3)
	}

506 507
	p2 := (*obj.Prog)(nil)
	var n4 gc.Node
508 509 510 511 512 513 514
	if gc.Nacl {
		// Native Client does not relay the divide-by-zero trap
		// to the executing program, so we must insert a check
		// for ourselves.
		gc.Nodconst(&n4, t, 0)

		gins(optoas(gc.OCMP, t), &n3, &n4)
515
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
516 517 518 519 520 521 522 523 524 525
		if panicdiv == nil {
			panicdiv = gc.Sysfunc("panicdivide")
		}
		ginscall(panicdiv, -1)
		gc.Patch(p1, gc.Pc)
	}

	if check != 0 {
		gc.Nodconst(&n4, t, -1)
		gins(optoas(gc.OCMP, t), &n3, &n4)
526
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
		if op == gc.ODIV {
			// a / (-1) is -a.
			gins(optoas(gc.OMINUS, t), nil, &ax)

			gmove(&ax, res)
		} else {
			// a % (-1) is 0.
			gc.Nodconst(&n4, t, 0)

			gmove(&n4, res)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
	}

543 544
	var olddx gc.Node
	var dx gc.Node
545
	savex(x86.REG_DX, &dx, &olddx, res, t)
546
	if gc.Issigned[t.Etype] == 0 {
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
		gc.Nodconst(&n4, t, 0)
		gmove(&n4, &dx)
	} else {
		gins(optoas(gc.OEXTEND, t), nil, nil)
	}
	gins(a, &n3, nil)
	regfree(&n3)
	if op == gc.ODIV {
		gmove(&ax, res)
	} else {
		gmove(&dx, res)
	}
	restx(&dx, &olddx)
	if check != 0 {
		gc.Patch(p2, gc.Pc)
	}
	restx(&ax, &oldax)
}

/*
 * register dr is one of the special ones (AX, CX, DI, SI, etc.).
 * we need to use it.  if it is already allocated as a temporary
 * (r > 1; can only happen if a routine like sgen passed a
 * special as cgen's res and then cgen used regalloc to reuse
 * it as its own temporary), then move it for now to another
 * register.  caller must call restx to move it back.
 * the move is not necessary if dr == res, because res is
 * known to be dead.
 */
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
577
	r := int(reg[dr])
578 579 580 581 582 583

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	gc.Nodreg(x, t, dr)
584
	if r > 1 && !gc.Samereg(x, res) {
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
		regalloc(oldx, gc.Types[gc.TINT64], nil)
		x.Type = gc.Types[gc.TINT64]
		gmove(x, oldx)
		x.Type = t
		oldx.Ostk = int32(r) // squirrel away old r value
		reg[dr] = 1
	}
}

func restx(x *gc.Node, oldx *gc.Node) {
	if oldx.Op != 0 {
		x.Type = gc.Types[gc.TINT64]
		reg[x.Val.U.Reg] = uint8(oldx.Ostk)
		gmove(oldx, x)
		regfree(oldx)
	}
}

/*
 * generate division according to op, one of:
 *	res = nl / nr
 *	res = nl % nr
 */
func cgen_div(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	var w int

	if nr.Op != gc.OLITERAL {
		goto longdiv
	}
	w = int(nl.Type.Width * 8)

	// Front end handled 32-bit division. We only need to handle 64-bit.
	// try to do division by multiply by (2^w)/d
	// see hacker's delight chapter 10
	switch gc.Simtype[nl.Type.Etype] {
	default:
		goto longdiv

	case gc.TUINT64:
624
		var m gc.Magic
625 626 627 628 629 630 631 632 633 634
		m.W = w
		m.Ud = uint64(gc.Mpgetfix(nr.Val.U.Xval))
		gc.Umagic(&m)
		if m.Bad != 0 {
			break
		}
		if op == gc.OMOD {
			goto longmod
		}

635
		var n1 gc.Node
636
		cgenr(nl, &n1, nil)
637
		var n2 gc.Node
638
		gc.Nodconst(&n2, nl.Type, int64(m.Um))
639
		var n3 gc.Node
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
		regalloc(&n3, nl.Type, res)
		cgen_hmul(&n1, &n2, &n3)

		if m.Ua != 0 {
			// need to add numerator accounting for overflow
			gins(optoas(gc.OADD, nl.Type), &n1, &n3)

			gc.Nodconst(&n2, nl.Type, 1)
			gins(optoas(gc.ORROTC, nl.Type), &n2, &n3)
			gc.Nodconst(&n2, nl.Type, int64(m.S)-1)
			gins(optoas(gc.ORSH, nl.Type), &n2, &n3)
		} else {
			gc.Nodconst(&n2, nl.Type, int64(m.S))
			gins(optoas(gc.ORSH, nl.Type), &n2, &n3) // shift dx
		}

		gmove(&n3, res)
		regfree(&n1)
		regfree(&n3)
		return

	case gc.TINT64:
662
		var m gc.Magic
663 664 665 666 667 668 669 670 671 672
		m.W = w
		m.Sd = gc.Mpgetfix(nr.Val.U.Xval)
		gc.Smagic(&m)
		if m.Bad != 0 {
			break
		}
		if op == gc.OMOD {
			goto longmod
		}

673
		var n1 gc.Node
674
		cgenr(nl, &n1, res)
675
		var n2 gc.Node
676
		gc.Nodconst(&n2, nl.Type, m.Sm)
677
		var n3 gc.Node
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
		regalloc(&n3, nl.Type, nil)
		cgen_hmul(&n1, &n2, &n3)

		if m.Sm < 0 {
			// need to add numerator
			gins(optoas(gc.OADD, nl.Type), &n1, &n3)
		}

		gc.Nodconst(&n2, nl.Type, int64(m.S))
		gins(optoas(gc.ORSH, nl.Type), &n2, &n3) // shift n3

		gc.Nodconst(&n2, nl.Type, int64(w)-1)

		gins(optoas(gc.ORSH, nl.Type), &n2, &n1) // -1 iff num is neg
		gins(optoas(gc.OSUB, nl.Type), &n1, &n3) // added

		if m.Sd < 0 {
			// this could probably be removed
			// by factoring it into the multiplier
			gins(optoas(gc.OMINUS, nl.Type), nil, &n3)
		}

		gmove(&n3, res)
		regfree(&n1)
		regfree(&n3)
		return
	}

	goto longdiv

	// division and mod using (slow) hardware instruction
longdiv:
	dodiv(op, nl, nr, res)

	return

	// mod using formula A%B = A-(A/B*B) but
	// we know that there is a fast algorithm for A/B
longmod:
717
	var n1 gc.Node
718 719 720
	regalloc(&n1, nl.Type, res)

	cgen(nl, &n1)
721
	var n2 gc.Node
722 723
	regalloc(&n2, nl.Type, nil)
	cgen_div(gc.ODIV, &n1, nr, &n2)
724
	a := optoas(gc.OMUL, nl.Type)
725 726 727 728 729 730
	if w == 8 {
		// use 2-operand 16-bit multiply
		// because there is no 2-operand 8-bit multiply
		a = x86.AIMULW
	}

731
	if !gc.Smallintconst(nr) {
732
		var n3 gc.Node
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
		regalloc(&n3, nl.Type, nil)
		cgen(nr, &n3)
		gins(a, &n3, &n2)
		regfree(&n3)
	} else {
		gins(a, nr, &n2)
	}
	gins(optoas(gc.OSUB, nl.Type), &n2, &n1)
	gmove(&n1, res)
	regfree(&n1)
	regfree(&n2)
}

/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
751 752
	t := nl.Type
	a := optoas(gc.OHMUL, t)
753
	if nl.Ullman < nr.Ullman {
754
		tmp := nl
755 756 757 758
		nl = nr
		nr = tmp
	}

759
	var n1 gc.Node
760
	cgenr(nl, &n1, res)
761
	var n2 gc.Node
762
	cgenr(nr, &n2, nil)
763
	var ax gc.Node
764 765 766 767 768 769
	gc.Nodreg(&ax, t, x86.REG_AX)
	gmove(&n1, &ax)
	gins(a, &n2, nil)
	regfree(&n2)
	regfree(&n1)

770
	var dx gc.Node
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
	if t.Width == 1 {
		// byte multiply behaves differently.
		gc.Nodreg(&ax, t, x86.REG_AH)

		gc.Nodreg(&dx, t, x86.REG_DX)
		gmove(&ax, &dx)
	}

	gc.Nodreg(&dx, t, x86.REG_DX)
	gmove(&dx, res)
}

/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
788
func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
789 790 791 792 793 794 795 796
	var n1 gc.Node
	var n2 gc.Node
	var n3 gc.Node
	var cx gc.Node
	var oldcx gc.Node
	var rcx int
	var tcount *gc.Type

797
	a := optoas(op, nl.Type)
798 799

	if nr.Op == gc.OLITERAL {
800
		var n1 gc.Node
801 802
		regalloc(&n1, nl.Type, res)
		cgen(nl, &n1)
803
		sc := uint64(gc.Mpgetfix(nr.Val.U.Xval))
804 805
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
806
			var n3 gc.Node
807 808 809 810 811 812 813 814 815 816 817 818 819
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)

			gins(a, &n3, &n1)
			gins(a, &n3, &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		regfree(&n1)
		goto ret
	}

	if nl.Ullman >= gc.UINF {
820
		var n4 gc.Node
821 822 823 824 825 826
		gc.Tempname(&n4, nl.Type)
		cgen(nl, &n4)
		nl = &n4
	}

	if nr.Ullman >= gc.UINF {
827
		var n5 gc.Node
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
		gc.Tempname(&n5, nr.Type)
		cgen(nr, &n5)
		nr = &n5
	}

	rcx = int(reg[x86.REG_CX])
	gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

	// Allow either uint32 or uint64 as shift type,
	// to avoid unnecessary conversion from uint32 to uint64
	// just to do the comparison.
	tcount = gc.Types[gc.Simtype[nr.Type.Etype]]

	if tcount.Etype < gc.TUINT32 {
		tcount = gc.Types[gc.TUINT32]
	}

	regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
	regalloc(&n3, tcount, &n1)  // to clear high bits of CX

	gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX)

	oldcx = gc.Node{}
851
	if rcx > 0 && !gc.Samereg(&cx, res) {
852 853 854 855 856 857
		regalloc(&oldcx, gc.Types[gc.TUINT64], nil)
		gmove(&cx, &oldcx)
	}

	cx.Type = tcount

858
	if gc.Samereg(&cx, res) {
859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
		regalloc(&n2, nl.Type, nil)
	} else {
		regalloc(&n2, nl.Type, res)
	}
	if nl.Ullman >= nr.Ullman {
		cgen(nl, &n2)
		cgen(nr, &n1)
		gmove(&n1, &n3)
	} else {
		cgen(nr, &n1)
		gmove(&n1, &n3)
		cgen(nl, &n2)
	}

	regfree(&n3)

	// test and fix up large shifts
876
	if !bounded {
877 878
		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
		gins(optoas(gc.OCMP, tcount), &n1, &n3)
879
		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)
880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] != 0 {
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
			gins(a, &n3, &n2)
		} else {
			gc.Nodconst(&n3, nl.Type, 0)
			gmove(&n3, &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	if oldcx.Op != 0 {
		cx.Type = gc.Types[gc.TUINT64]
		gmove(&oldcx, &cx)
		regfree(&oldcx)
	}

	gmove(&n2, res)

	regfree(&n1)
	regfree(&n2)

ret:
}

/*
 * generate byte multiply:
 *	res = nl * nr
 * there is no 2-operand byte multiply instruction so
 * we do a full-width multiplication and truncate afterwards.
 */
func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	// largest ullman on left.
	if nl.Ullman < nr.Ullman {
916
		tmp := nl
917 918 919 920 921
		nl = nr
		nr = tmp
	}

	// generate operands in "8-bit" registers.
922
	var n1b gc.Node
923 924 925
	regalloc(&n1b, nl.Type, res)

	cgen(nl, &n1b)
926
	var n2b gc.Node
927 928 929 930
	regalloc(&n2b, nr.Type, nil)
	cgen(nr, &n2b)

	// perform full-width multiplication.
931
	t := gc.Types[gc.TUINT64]
932 933 934 935

	if gc.Issigned[nl.Type.Etype] != 0 {
		t = gc.Types[gc.TINT64]
	}
936
	var n1 gc.Node
937
	gc.Nodreg(&n1, t, int(n1b.Val.U.Reg))
938
	var n2 gc.Node
939
	gc.Nodreg(&n2, t, int(n2b.Val.U.Reg))
940
	a := optoas(op, t)
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
	gins(a, &n2, &n1)

	// truncate.
	gmove(&n1, res)

	regfree(&n1b)
	regfree(&n2b)
}

func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		gc.Dump("\nclearfat", nl)
	}

956
	w := nl.Type.Width
957 958

	// Avoid taking the address for simple enough types.
959
	if componentgen(nil, nl) {
960 961 962
		return
	}

963 964
	c := w % 8 // bytes
	q := w / 8 // quads
965 966 967 968 969 970 971 972

	if q < 4 {
		// Write sequence of MOV 0, off(base) instead of using STOSQ.
		// The hope is that although the code will be slightly longer,
		// the MOVs will have no dependencies and pipeline better
		// than the unrolled STOSQ loop.
		// NOTE: Must use agen, not igen, so that optimizer sees address
		// being taken. We are not writing on field boundaries.
973
		var n1 gc.Node
974 975 976
		agenr(nl, &n1, nil)

		n1.Op = gc.OINDREG
977
		var z gc.Node
978 979 980 981
		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
		for {
			tmp14 := q
			q--
982
			if tmp14 <= 0 {
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001
				break
			}
			n1.Type = z.Type
			gins(x86.AMOVQ, &z, &n1)
			n1.Xoffset += 8
		}

		if c >= 4 {
			gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
			n1.Type = z.Type
			gins(x86.AMOVL, &z, &n1)
			n1.Xoffset += 4
			c -= 4
		}

		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
		for {
			tmp15 := c
			c--
1002
			if tmp15 <= 0 {
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
				break
			}
			n1.Type = z.Type
			gins(x86.AMOVB, &z, &n1)
			n1.Xoffset++
		}

		regfree(&n1)
		return
	}

1014 1015
	var oldn1 gc.Node
	var n1 gc.Node
1016 1017 1018
	savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
	agen(nl, &n1)

1019 1020
	var ax gc.Node
	var oldax gc.Node
1021 1022 1023 1024 1025 1026 1027 1028
	savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
	gconreg(x86.AMOVL, 0, x86.REG_AX)

	if q > 128 || gc.Nacl {
		gconreg(movptr, q, x86.REG_CX)
		gins(x86.AREP, nil, nil)   // repeat
		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+
	} else {
1029
		p := gins(obj.ADUFFZERO, nil, nil)
1030 1031 1032 1033 1034 1035 1036
		p.To.Type = obj.TYPE_ADDR
		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))

		// 2 and 128 = magic constants: see ../../runtime/asm_amd64.s
		p.To.Offset = 2 * (128 - q)
	}

1037 1038
	z := ax
	di := n1
1039 1040 1041 1042
	if w >= 8 && c >= 4 {
		di.Op = gc.OINDREG
		z.Type = gc.Types[gc.TINT64]
		di.Type = z.Type
1043
		p := gins(x86.AMOVQ, &z, &di)
1044 1045 1046 1047 1048 1049
		p.To.Scale = 1
		p.To.Offset = c - 8
	} else if c >= 4 {
		di.Op = gc.OINDREG
		z.Type = gc.Types[gc.TINT32]
		di.Type = z.Type
1050
		gins(x86.AMOVL, &z, &di)
1051
		if c > 4 {
1052
			p := gins(x86.AMOVL, &z, &di)
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
			p.To.Scale = 1
			p.To.Offset = c - 4
		}
	} else {
		for c > 0 {
			gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
			c--
		}
	}

	restx(&n1, &oldn1)
	restx(&ax, &oldax)
}

// Called after regopt and peep have run.
// Expand CHECKNIL pseudo-op into actual nil pointer check.
func expandchecks(firstp *obj.Prog) {
	var p1 *obj.Prog
	var p2 *obj.Prog

1073
	for p := firstp; p != nil; p = p.Link {
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
		if p.As != obj.ACHECKNIL {
			continue
		}
		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
			gc.Warnl(int(p.Lineno), "generated nil check")
		}

		// check is
		//	CMP arg, $0
		//	JNE 2(PC) (likely)
		//	MOV AX, 0
		p1 = gc.Ctxt.NewProg()

		p2 = gc.Ctxt.NewProg()
		gc.Clearp(p1)
		gc.Clearp(p2)
		p1.Link = p2
		p2.Link = p.Link
		p.Link = p1
		p1.Lineno = p.Lineno
		p2.Lineno = p.Lineno
		p1.Pc = 9999
		p2.Pc = 9999
		p.As = int16(cmpptr)
		p.To.Type = obj.TYPE_CONST
		p.To.Offset = 0
		p1.As = x86.AJNE
		p1.From.Type = obj.TYPE_CONST
		p1.From.Offset = 1 // likely
		p1.To.Type = obj.TYPE_BRANCH
		p1.To.U.Branch = p2.Link

		// crash by write to memory address 0.
		// if possible, since we know arg is 0, use 0(arg),
		// which will be shorter to encode than plain 0.
		p2.As = x86.AMOVL

		p2.From.Type = obj.TYPE_REG
		p2.From.Reg = x86.REG_AX
1113
		if regtyp(&p.From) {
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
			p2.To.Type = obj.TYPE_MEM
			p2.To.Reg = p.From.Reg
		} else {
			p2.To.Type = obj.TYPE_MEM
			p2.To.Reg = x86.REG_NONE
		}

		p2.To.Offset = 0
	}
}