lex.c 34.3 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#define		EXTERN
#include	"go.h"
#include	"y.tab.h"
8
#include	<ar.h>
9

Russ Cox's avatar
Russ Cox committed
10 11 12 13 14
#undef	getc
#undef	ungetc
#define	getc	ccgetc
#define	ungetc	ccungetc

15
extern int yychar;
Russ Cox's avatar
Russ Cox committed
16
int windows;
Russ Cox's avatar
Russ Cox committed
17 18
int yyprev;
int yylast;
Russ Cox's avatar
Russ Cox committed
19

Russ Cox's avatar
Russ Cox committed
20 21 22 23 24 25 26 27
static void	lexinit(void);
static void	lexfini(void);
static void	yytinit(void);
static int	getc(void);
static void	ungetc(int);
static int32	getr(void);
static int	escchar(int, int*, vlong*);
static void	addidir(char*);
28
static int	getlinepragma(void);
29
static char *goos, *goarch, *goroot;
30

Russ Cox's avatar
Russ Cox committed
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
// Our own isdigit, isspace, isalpha, isalnum that take care 
// of EOF and other out of range arguments.
static int
yy_isdigit(int c)
{
	return c >= 0 && c <= 0xFF && isdigit(c);
}

static int
yy_isspace(int c)
{
	return c >= 0 && c <= 0xFF && isspace(c);
}

static int
yy_isalpha(int c)
{
	return c >= 0 && c <= 0xFF && isalpha(c);
}

static int
yy_isalnum(int c)
{
	return c >= 0 && c <= 0xFF && isalnum(c);
}

// Disallow use of isdigit etc.
#undef isdigit
#undef isspace
#undef isalpha
#undef isalnum
#define isdigit use_yy_isdigit_instead_of_isdigit
#define isspace use_yy_isspace_instead_of_isspace
#define isalpha use_yy_isalpha_instead_of_isalpha
#define isalnum use_yy_isalnum_instead_of_isalnum

67 68 69 70 71 72
#define	DBG	if(!debug['x']);else print
enum
{
	EOF		= -1,
};

Russ Cox's avatar
Russ Cox committed
73 74 75
void
usage(void)
{
76
	print("gc: usage: %cg [flags] file.go...\n", thechar);
Russ Cox's avatar
Russ Cox committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
	print("flags:\n");
	// -A is allow use of "any" type, for bootstrapping
	print("  -I DIR search for packages in DIR\n");
	print("  -d print declarations\n");
	print("  -e no limit on number of errors printed\n");
	print("  -f print stack frame structure\n");
	print("  -h panic on an error\n");
	print("  -o file specify output file\n");
	print("  -S print the assembly language\n");
	print("  -V print the compiler version\n");
	print("  -u disable package unsafe\n");
	print("  -w print the parse tree after typing\n");
	print("  -x print lex tokens\n");
	exit(0);
}

Russ Cox's avatar
Russ Cox committed
93 94 95 96 97 98 99 100 101 102 103 104
void
fault(int s)
{
	// If we've already complained about things
	// in the program, don't bother complaining
	// about the seg fault too; let the user clean up
	// the code and try again.
	if(nerrors > 0)
		errorexit();
	fatal("fault");
}

105
int
106
main(int argc, char *argv[])
107
{
Russ Cox's avatar
Russ Cox committed
108 109
	int i, c;
	NodeList *l;
Russ Cox's avatar
Russ Cox committed
110
	char *p;
Russ Cox's avatar
Russ Cox committed
111 112 113
	
	signal(SIGBUS, fault);
	signal(SIGSEGV, fault);
114

115 116 117 118
	localpkg = mkpkg(strlit(""));
	localpkg->prefix = "\"\"";

	builtinpkg = mkpkg(strlit("go.builtin"));
119

120
	gostringpkg = mkpkg(strlit("go.string"));
121 122 123
	gostringpkg->name = "go.string";
	gostringpkg->prefix = "go.string";	// not go%2estring

124
	runtimepkg = mkpkg(strlit("runtime"));
125 126
	runtimepkg->name = "runtime";

127
	typepkg = mkpkg(strlit("type"));
128 129
	typepkg->name = "type";

130
	unsafepkg = mkpkg(strlit("unsafe"));
131
	unsafepkg->name = "unsafe";
132 133 134 135 136

	goroot = getgoroot();
	goos = getgoos();
	goarch = thestring;

137 138 139 140 141 142 143 144 145
	outfile = nil;
	ARGBEGIN {
	default:
		c = ARGC();
		if(c >= 0 && c < sizeof(debug))
			debug[c]++;
		break;

	case 'o':
Russ Cox's avatar
Russ Cox committed
146
		outfile = EARGF(usage());
147 148
		break;

149
	case 'I':
Russ Cox's avatar
Russ Cox committed
150 151 152 153 154
		addidir(EARGF(usage()));
		break;
	
	case 'u':
		safemode = 1;
155
		break;
Russ Cox's avatar
Russ Cox committed
156

157 158
	case 'V':
		print("%cg version %s\n", thechar, getgoversion());
Russ Cox's avatar
Russ Cox committed
159
		exit(0);
160 161
	} ARGEND

Russ Cox's avatar
Russ Cox committed
162
	if(argc < 1)
Russ Cox's avatar
Russ Cox committed
163
		usage();
164

165 166 167
	// special flag to detect compilation of package runtime
	compiling_runtime = debug['+'];

168 169
	pathname = mal(1000);
	if(getwd(pathname, 999) == 0)
170 171
		strcpy(pathname, "/???");

Russ Cox's avatar
Russ Cox committed
172
	if(yy_isalpha(pathname[0]) && pathname[1] == ':') {
Russ Cox's avatar
Russ Cox committed
173 174 175
		// On Windows.
		windows = 1;

Russ Cox's avatar
Russ Cox committed
176 177 178 179 180 181
		// Canonicalize path by converting \ to / (Windows accepts both).
		for(p=pathname; *p; p++)
			if(*p == '\\')
				*p = '/';
	}

182 183 184 185 186 187 188
	fmtinstall('O', Oconv);		// node opcodes
	fmtinstall('E', Econv);		// etype opcodes
	fmtinstall('J', Jconv);		// all the node flags
	fmtinstall('S', Sconv);		// sym pointer
	fmtinstall('T', Tconv);		// type pointer
	fmtinstall('N', Nconv);		// node pointer
	fmtinstall('Z', Zconv);		// escaped string
189
	fmtinstall('L', Lconv);		// line number
Ken Thompson's avatar
Ken Thompson committed
190
	fmtinstall('B', Bconv);		// big numbers
191
	fmtinstall('F', Fconv);		// big float numbers
Russ Cox's avatar
Russ Cox committed
192

193
	betypeinit();
Russ Cox's avatar
Russ Cox committed
194
	if(widthptr == 0)
195 196
		fatal("betypeinit failed");

197
	lexinit();
Russ Cox's avatar
Russ Cox committed
198
	typeinit();
Russ Cox's avatar
Russ Cox committed
199
	yytinit();
200

201
	blockgen = 1;
Russ Cox's avatar
Russ Cox committed
202 203
	dclcontext = PEXTERN;
	nerrors = 0;
204
	lexlineno = 1;
205

Russ Cox's avatar
Russ Cox committed
206 207 208 209 210 211
	for(i=0; i<argc; i++) {
		infile = argv[i];
		linehist(infile, 0, 0);

		curio.infile = infile;
		curio.bin = Bopen(infile, OREAD);
212 213 214 215
		if(curio.bin == nil) {
			print("open %s: %r\n", infile);
			errorexit();
		}
Russ Cox's avatar
Russ Cox committed
216 217
		curio.peekc = 0;
		curio.peekc1 = 0;
Russ Cox's avatar
Russ Cox committed
218
		curio.nlsemi = 0;
219

Russ Cox's avatar
Russ Cox committed
220
		block = 1;
221

Russ Cox's avatar
Russ Cox committed
222 223 224
		yyparse();
		if(nsyntaxerrors != 0)
			errorexit();
225

Russ Cox's avatar
Russ Cox committed
226 227 228 229 230
		linehist(nil, 0, 0);
		if(curio.bin != nil)
			Bterm(curio.bin);
	}
	testdclstack();
231
	mkpackage(localpkg->name);	// final import not used checks
232
	lexfini();
Russ Cox's avatar
Russ Cox committed
233 234 235 236

	typecheckok = 1;
	if(debug['f'])
		frame(1);
Russ Cox's avatar
Russ Cox committed
237

238
	// Process top-level declarations in four phases.
Russ Cox's avatar
Russ Cox committed
239 240 241 242 243
	// Phase 1: const, type, and names and types of funcs.
	//   This will gather all the information about types
	//   and methods but doesn't depend on any of it.
	// Phase 2: Variable assignments.
	//   To check interface assignments, depends on phase 1.
244 245
	// Phase 3: Type check function bodies.
	// Phase 4: Compile function bodies.
Russ Cox's avatar
Russ Cox committed
246
	defercheckwidth();
Russ Cox's avatar
Russ Cox committed
247 248 249 250 251 252
	for(l=xtop; l; l=l->next)
		if(l->n->op != ODCL && l->n->op != OAS)
			typecheck(&l->n, Etop);
	for(l=xtop; l; l=l->next)
		if(l->n->op == ODCL || l->n->op == OAS)
			typecheck(&l->n, Etop);
253
	resumetypecopy();
Russ Cox's avatar
Russ Cox committed
254
	resumecheckwidth();
255 256 257 258 259 260 261 262

	for(l=xtop; l; l=l->next)
		if(l->n->op == ODCLFUNC) {
			curfn = l->n;
			typechecklist(l->n->nbody, Etop);
		}
	curfn = nil;

Russ Cox's avatar
Russ Cox committed
263 264
	for(l=xtop; l; l=l->next)
		if(l->n->op == ODCLFUNC)
Russ Cox's avatar
Russ Cox committed
265
			funccompile(l->n, 0);
266

Russ Cox's avatar
Russ Cox committed
267 268
	if(nerrors == 0)
		fninit(xtop);
269

Russ Cox's avatar
Russ Cox committed
270 271 272 273
	while(closures) {
		l = closures;
		closures = nil;
		for(; l; l=l->next)
Russ Cox's avatar
Russ Cox committed
274
			funccompile(l->n, 1);
Russ Cox's avatar
Russ Cox committed
275
	}
276

277 278 279
	for(l=externdcl; l; l=l->next)
		if(l->n->op == ONAME)
			typecheck(&l->n, Erv);
280 281 282 283 284 285

	if(nerrors)
		errorexit();

	dumpobj();

Ken Thompson's avatar
Ken Thompson committed
286 287 288
	if(nerrors)
		errorexit();

Russ Cox's avatar
Russ Cox committed
289
	flusherrors();
290
	exit(0);
291 292 293
	return 0;
}

Russ Cox's avatar
Russ Cox committed
294
static int
295 296
arsize(Biobuf *b, char *name)
{
Rob Pike's avatar
Rob Pike committed
297 298 299
	struct ar_hdr *a;

	if((a = Brdline(b, '\n')) == nil)
300
		return -1;
Rob Pike's avatar
Rob Pike committed
301
	if(Blinelen(b) != sizeof(struct ar_hdr))
302
		return -1;
Rob Pike's avatar
Rob Pike committed
303
	if(strncmp(a->name, name, strlen(name)) != 0)
304
		return -1;
Rob Pike's avatar
Rob Pike committed
305 306 307
	return atoi(a->size);
}

Russ Cox's avatar
Russ Cox committed
308
static int
Rob Pike's avatar
Rob Pike committed
309 310 311 312 313 314 315 316 317 318 319 320 321 322
skiptopkgdef(Biobuf *b)
{
	char *p;
	int sz;

	/* archive header */
	if((p = Brdline(b, '\n')) == nil)
		return 0;
	if(Blinelen(b) != 8)
		return 0;
	if(memcmp(p, "!<arch>\n", 8) != 0)
		return 0;
	/* symbol table is first; skip it */
	sz = arsize(b, "__.SYMDEF");
323
	if(sz < 0)
Rob Pike's avatar
Rob Pike committed
324 325 326 327 328 329 330 331 332
		return 0;
	Bseek(b, sz, 1);
	/* package export block is second */
	sz = arsize(b, "__.PKGDEF");
	if(sz <= 0)
		return 0;
	return 1;
}

Russ Cox's avatar
Russ Cox committed
333
static void
334 335 336 337 338 339 340 341 342 343 344 345 346 347
addidir(char* dir)
{
	Idir** pp;

	if(dir == nil)
		return;

	for(pp = &idirs; *pp != nil; pp = &(*pp)->link)
		;
	*pp = mal(sizeof(Idir));
	(*pp)->link = nil;
	(*pp)->dir = dir;
}

348
// is this path a local name?  begins with ./ or ../ or /
Russ Cox's avatar
Russ Cox committed
349
static int
350 351
islocalname(Strlit *name)
{
Russ Cox's avatar
Russ Cox committed
352
	if(!windows && name->len >= 1 && name->s[0] == '/')
353
		return 1;
Russ Cox's avatar
Russ Cox committed
354
	if(windows && name->len >= 3 &&
Russ Cox's avatar
Russ Cox committed
355
	   yy_isalpha(name->s[0]) && name->s[1] == ':' && name->s[2] == '/')
Russ Cox's avatar
Russ Cox committed
356
	   	return 1;
357 358 359 360 361 362 363
	if(name->len >= 2 && strncmp(name->s, "./", 2) == 0)
		return 1;
	if(name->len >= 3 && strncmp(name->s, "../", 3) == 0)
		return 1;
	return 0;
}

Russ Cox's avatar
Russ Cox committed
364
static int
365
findpkg(Strlit *name)
Rob Pike's avatar
Rob Pike committed
366
{
Russ Cox's avatar
Russ Cox committed
367
	Idir *p;
368
	char *q;
Rob Pike's avatar
Rob Pike committed
369

370
	if(islocalname(name)) {
Russ Cox's avatar
Russ Cox committed
371
		if(safemode)
372
			return 0;
373 374 375 376 377 378 379 380 381 382 383 384
		// try .a before .6.  important for building libraries:
		// if there is an array.6 in the array.a library,
		// want to find all of array.a, not just array.6.
		snprint(namebuf, sizeof(namebuf), "%Z.a", name);
		if(access(namebuf, 0) >= 0)
			return 1;
		snprint(namebuf, sizeof(namebuf), "%Z.%c", name, thechar);
		if(access(namebuf, 0) >= 0)
			return 1;
		return 0;
	}

385 386 387 388 389 390 391 392
	// local imports should be canonicalized already.
	// don't want to see "container/../container/vector"
	// as different from "container/vector".
	q = mal(name->len+1);
	memmove(q, name->s, name->len);
	q[name->len] = '\0';
	cleanname(q);
	if(strlen(q) != name->len || memcmp(q, name->s, name->len) != 0) {
393
		yyerror("non-canonical import path %Z (should be %s)", name, q);
394 395 396
		return 0;
	}

397 398 399 400
	for(p = idirs; p != nil; p = p->link) {
		snprint(namebuf, sizeof(namebuf), "%s/%Z.a", p->dir, name);
		if(access(namebuf, 0) >= 0)
			return 1;
401
		snprint(namebuf, sizeof(namebuf), "%s/%Z.%c", p->dir, name, thechar);
402 403 404 405
		if(access(namebuf, 0) >= 0)
			return 1;
	}
	if(goroot != nil) {
Russ Cox's avatar
Russ Cox committed
406
		snprint(namebuf, sizeof(namebuf), "%s/pkg/%s_%s/%Z.a", goroot, goos, goarch, name);
407 408
		if(access(namebuf, 0) >= 0)
			return 1;
Russ Cox's avatar
Russ Cox committed
409
		snprint(namebuf, sizeof(namebuf), "%s/pkg/%s_%s/%Z.%c", goroot, goos, goarch, name, thechar);
410 411 412
		if(access(namebuf, 0) >= 0)
			return 1;
	}
Rob Pike's avatar
Rob Pike committed
413 414 415
	return 0;
}

416
void
Russ Cox's avatar
Russ Cox committed
417
importfile(Val *f, int line)
418 419
{
	Biobuf *imp;
420
	char *file, *p, *q;
421
	int32 c;
Rob Pike's avatar
Rob Pike committed
422
	int len;
423
	Strlit *path;
424
	char *cleanbuf;
425

426
	// TODO(rsc): don't bother reloading imports more than once?
427

428 429 430 431
	if(f->ctype != CTSTR) {
		yyerror("import statement not a string");
		return;
	}
Ken Thompson's avatar
Ken Thompson committed
432

433 434 435 436
	if(strlen(f->u.sval->s) != f->u.sval->len) {
		yyerror("import path contains NUL");
		errorexit();
	}
437

438 439 440 441 442 443 444 445 446
	// The package name main is no longer reserved,
	// but we reserve the import path "main" to identify
	// the main package, just as we reserve the import 
	// path "math" to identify the standard math package.
	if(strcmp(f->u.sval->s, "main") == 0) {
		yyerror("cannot import \"main\"");
		errorexit();
	}

Ken Thompson's avatar
Ken Thompson committed
447
	if(strcmp(f->u.sval->s, "unsafe") == 0) {
Russ Cox's avatar
Russ Cox committed
448 449 450 451
		if(safemode) {
			yyerror("cannot import package unsafe");
			errorexit();
		}
452
		importpkg = mkpkg(f->u.sval);
Ken Thompson's avatar
Ken Thompson committed
453 454 455
		cannedimports("unsafe.6", unsafeimport);
		return;
	}
456
	
457 458
	path = f->u.sval;
	if(islocalname(path)) {
459
		cleanbuf = mal(strlen(pathname) + strlen(path->s) + 2);
460 461 462
		strcpy(cleanbuf, pathname);
		strcat(cleanbuf, "/");
		strcat(cleanbuf, path->s);
463 464 465 466
		cleanname(cleanbuf);
		path = strlit(cleanbuf);
	}

467 468 469 470
	if(!findpkg(path)) {
		yyerror("can't find import: %Z", f->u.sval);
		errorexit();
	}
471 472
	importpkg = mkpkg(path);

Rob Pike's avatar
Rob Pike committed
473
	imp = Bopen(namebuf, OREAD);
474
	if(imp == nil) {
475
		yyerror("can't open import: %Z: %r", f->u.sval);
476 477
		errorexit();
	}
478
	file = strdup(namebuf);
479

Rob Pike's avatar
Rob Pike committed
480
	len = strlen(namebuf);
481
	if(len > 2 && namebuf[len-2] == '.' && namebuf[len-1] == 'a') {
482
		if(!skiptopkgdef(imp)) {
483
			yyerror("import %s: not a package file", file);
484 485
			errorexit();
		}
486
	}
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
	
	// check object header
	p = Brdstr(imp, '\n', 1);
	if(strcmp(p, "empty archive") != 0) {
		if(strncmp(p, "go object ", 10) != 0) {
			yyerror("import %s: not a go object file", file);
			errorexit();
		}
		q = smprint("%s %s %s", getgoos(), thestring, getgoversion());
		if(strcmp(p+10, q) != 0) {
			yyerror("import %s: object is [%s] expected [%s]", file, p+10, q);
			errorexit();
		}
		free(q);
	}
502

503 504 505 506
	// assume files move (get installed)
	// so don't record the full path.
	linehist(file + len - path->len - 2, -1, 1);	// acts as #pragma lib

507 508
	/*
	 * position the input right
Russ Cox's avatar
Russ Cox committed
509
	 * after $$ and return
510 511 512 513
	 */
	pushedio = curio;
	curio.bin = imp;
	curio.peekc = 0;
Ken Thompson's avatar
Ken Thompson committed
514
	curio.peekc1 = 0;
515
	curio.infile = file;
Russ Cox's avatar
Russ Cox committed
516
	curio.nlsemi = 0;
517
	typecheckok = 1;
518

519 520 521 522
	for(;;) {
		c = getc();
		if(c == EOF)
			break;
Russ Cox's avatar
Russ Cox committed
523
		if(c != '$')
524 525 526 527
			continue;
		c = getc();
		if(c == EOF)
			break;
Russ Cox's avatar
Russ Cox committed
528
		if(c != '$')
529 530 531
			continue;
		return;
	}
Ken Thompson's avatar
Ken Thompson committed
532
	yyerror("no import in: %Z", f->u.sval);
533 534 535 536 537 538 539 540 541
	unimportfile();
}

void
unimportfile(void)
{
	if(curio.bin != nil) {
		Bterm(curio.bin);
		curio.bin = nil;
Ken Thompson's avatar
Ken Thompson committed
542
	} else
543
		lexlineno--;	// re correct sys.6 line number
Russ Cox's avatar
Russ Cox committed
544

545 546
	curio = pushedio;
	pushedio.bin = nil;
Russ Cox's avatar
Russ Cox committed
547
	incannedimport = 0;
548
	typecheckok = 0;
549 550 551
}

void
Ken Thompson's avatar
Ken Thompson committed
552
cannedimports(char *file, char *cp)
553
{
554
	lexlineno++;		// if sys.6 is included on line 1,
555

556 557 558
	pushedio = curio;
	curio.bin = nil;
	curio.peekc = 0;
Ken Thompson's avatar
Ken Thompson committed
559
	curio.peekc1 = 0;
560
	curio.infile = file;
Ken Thompson's avatar
Ken Thompson committed
561
	curio.cp = cp;
Russ Cox's avatar
Russ Cox committed
562
	curio.nlsemi = 0;
563
	curio.importsafe = 0;
564

565
	typecheckok = 1;
Russ Cox's avatar
Russ Cox committed
566
	incannedimport = 1;
567 568
}

Russ Cox's avatar
Russ Cox committed
569
static int
570 571
isfrog(int c)
{
572 573 574 575
	// complain about possibly invisible control characters
	if(c < 0)
		return 1;
	if(c < ' ') {
576
		if(c == '\n' || c== '\r' || c == '\t')	// good white space
577 578 579
			return 0;
		return 1;
	}
580
	if(0x7f <= c && c <= 0xa0)	// DEL, unicode block including unbreakable space.
581 582 583 584
		return 1;
	return 0;
}

Russ Cox's avatar
Russ Cox committed
585 586 587 588 589 590
typedef struct Loophack Loophack;
struct Loophack {
	int v;
	Loophack *next;
};

591 592
static int32
_yylex(void)
593
{
594
	int c, c1, clen, escflag, ncp;
Ken Thompson's avatar
Ken Thompson committed
595
	vlong v;
Russ Cox's avatar
Russ Cox committed
596
	char *cp, *ep;
597 598
	Rune rune;
	Sym *s;
Russ Cox's avatar
Russ Cox committed
599 600
	static Loophack *lstk;
	Loophack *h;
601

602 603
	prevlineno = lineno;

604 605
l0:
	c = getc();
Russ Cox's avatar
Russ Cox committed
606
	if(yy_isspace(c)) {
Russ Cox's avatar
Russ Cox committed
607 608 609 610 611
		if(c == '\n' && curio.nlsemi) {
			ungetc(c);
			DBG("lex: implicit semi\n");
			return ';';
		}
612
		goto l0;
Russ Cox's avatar
Russ Cox committed
613
	}
614

615 616
	lineno = lexlineno;	/* start of token */

617 618
	if(c >= Runeself) {
		/* all multibyte runes are alpha */
619
		cp = lexbuf;
Russ Cox's avatar
Russ Cox committed
620
		ep = lexbuf+sizeof lexbuf;
621 622 623
		goto talph;
	}

Russ Cox's avatar
Russ Cox committed
624
	if(yy_isalpha(c)) {
625
		cp = lexbuf;
Russ Cox's avatar
Russ Cox committed
626
		ep = lexbuf+sizeof lexbuf;
627 628 629
		goto talph;
	}

Russ Cox's avatar
Russ Cox committed
630
	if(yy_isdigit(c))
631 632 633 634
		goto tnum;

	switch(c) {
	case EOF:
Russ Cox's avatar
Russ Cox committed
635
		lineno = prevlineno;
636 637 638 639
		ungetc(EOF);
		return -1;

	case '_':
640
		cp = lexbuf;
Russ Cox's avatar
Russ Cox committed
641
		ep = lexbuf+sizeof lexbuf;
642 643 644 645
		goto talph;

	case '.':
		c1 = getc();
Russ Cox's avatar
Russ Cox committed
646
		if(yy_isdigit(c1)) {
647
			cp = lexbuf;
Russ Cox's avatar
Russ Cox committed
648
			ep = lexbuf+sizeof lexbuf;
649 650 651 652 653
			*cp++ = c;
			c = c1;
			c1 = 0;
			goto casedot;
		}
Ken Thompson's avatar
Ken Thompson committed
654 655 656 657 658 659 660 661 662
		if(c1 == '.') {
			c1 = getc();
			if(c1 == '.') {
				c = LDDD;
				goto lx;
			}
			ungetc(c1);
			c1 = '.';
		}
663 664 665 666
		break;

	case '"':
		/* "..." */
667
		strcpy(lexbuf, "\"<string>\"");
668
		cp = mal(8);
Ken Thompson's avatar
Ken Thompson committed
669
		clen = sizeof(int32);
670
		ncp = 8;
671 672

		for(;;) {
673 674 675 676
			if(clen+UTFmax > ncp) {
				cp = remal(cp, ncp, ncp);
				ncp += ncp;
			}
Ken Thompson's avatar
Ken Thompson committed
677
			if(escchar('"', &escflag, &v))
678
				break;
Ken Thompson's avatar
Ken Thompson committed
679
			if(v < Runeself || escflag) {
Ken Thompson's avatar
Ken Thompson committed
680
				cp[clen++] = v;
Ken Thompson's avatar
Ken Thompson committed
681
			} else {
Ken Thompson's avatar
Ken Thompson committed
682
				rune = v;
683
				c = runelen(rune);
Ken Thompson's avatar
Ken Thompson committed
684 685
				runetochar(cp+clen, &rune);
				clen += c;
686 687
			}
		}
Russ Cox's avatar
Russ Cox committed
688 689
		goto strlit;
	
690 691
	case '`':
		/* `...` */
692
		strcpy(lexbuf, "`<string>`");
693
		cp = mal(8);
Ken Thompson's avatar
Ken Thompson committed
694
		clen = sizeof(int32);
695
		ncp = 8;
696 697

		for(;;) {
698 699
			if(clen+UTFmax > ncp) {
				cp = remal(cp, ncp, ncp);
700 701
				ncp += ncp;
			}
702
			c = getr();
703
			if(c == EOF) {
704
				yyerror("eof in string");
705 706 707
				break;
			}
			if(c == '`')
708
				break;
709 710
			rune = c;
			clen += runetochar(cp+clen, &rune);
711
		}
Ken Thompson's avatar
Ken Thompson committed
712

Russ Cox's avatar
Russ Cox committed
713
	strlit:
Ken Thompson's avatar
Ken Thompson committed
714
		*(int32*)cp = clen-sizeof(int32);	// length
715
		do {
Ken Thompson's avatar
Ken Thompson committed
716 717
			cp[clen++] = 0;
		} while(clen & MAXALIGN);
718
		yylval.val.u.sval = (Strlit*)cp;
719 720 721 722 723 724
		yylval.val.ctype = CTSTR;
		DBG("lex: string literal\n");
		return LLITERAL;

	case '\'':
		/* '.' */
Russ Cox's avatar
Russ Cox committed
725 726 727 728
		if(escchar('\'', &escflag, &v)) {
			yyerror("empty character literal or unescaped ' in character literal");
			v = '\'';
		}
Ken Thompson's avatar
Ken Thompson committed
729
		if(!escchar('\'', &escflag, &v)) {
730
			yyerror("missing '");
Ken Thompson's avatar
Ken Thompson committed
731
			ungetc(v);
732
		}
Ken Thompson's avatar
Ken Thompson committed
733 734
		yylval.val.u.xval = mal(sizeof(*yylval.val.u.xval));
		mpmovecfix(yylval.val.u.xval, v);
735 736 737 738 739 740 741
		yylval.val.ctype = CTINT;
		DBG("lex: codepoint literal\n");
		return LLITERAL;

	case '/':
		c1 = getc();
		if(c1 == '*') {
Russ Cox's avatar
Russ Cox committed
742 743 744
			int nl;
			
			nl = 0;
745 746
			for(;;) {
				c = getr();
Russ Cox's avatar
Russ Cox committed
747 748
				if(c == '\n')
					nl = 1;
749 750
				while(c == '*') {
					c = getr();
Russ Cox's avatar
Russ Cox committed
751 752 753
					if(c == '/') {
						if(nl)
							ungetc('\n');
754
						goto l0;
Russ Cox's avatar
Russ Cox committed
755 756 757
					}
					if(c == '\n')
						nl = 1;
758 759 760 761 762 763 764 765
				}
				if(c == EOF) {
					yyerror("eof in comment");
					errorexit();
				}
			}
		}
		if(c1 == '/') {
766
			c = getlinepragma();
767
			for(;;) {
Russ Cox's avatar
Russ Cox committed
768
				if(c == '\n' || c == EOF) {
Russ Cox's avatar
Russ Cox committed
769
					ungetc(c);
770
					goto l0;
Russ Cox's avatar
Russ Cox committed
771
				}
772
				c = getr();
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
			}
		}
		if(c1 == '=') {
			c = ODIV;
			goto asop;
		}
		break;

	case ':':
		c1 = getc();
		if(c1 == '=') {
			c = LCOLAS;
			goto lx;
		}
		break;

	case '*':
		c1 = getc();
		if(c1 == '=') {
			c = OMUL;
			goto asop;
		}
		break;

	case '%':
		c1 = getc();
		if(c1 == '=') {
			c = OMOD;
			goto asop;
		}
		break;

	case '+':
		c1 = getc();
		if(c1 == '+') {
			c = LINC;
			goto lx;
		}
		if(c1 == '=') {
			c = OADD;
			goto asop;
		}
		break;

	case '-':
		c1 = getc();
		if(c1 == '-') {
			c = LDEC;
			goto lx;
		}
		if(c1 == '=') {
			c = OSUB;
			goto asop;
		}
		break;

	case '>':
		c1 = getc();
		if(c1 == '>') {
			c = LRSH;
			c1 = getc();
			if(c1 == '=') {
				c = ORSH;
				goto asop;
			}
			break;
		}
		if(c1 == '=') {
			c = LGE;
			goto lx;
		}
		c = LGT;
		break;

	case '<':
		c1 = getc();
		if(c1 == '<') {
			c = LLSH;
			c1 = getc();
			if(c1 == '=') {
				c = OLSH;
				goto asop;
			}
			break;
		}
		if(c1 == '=') {
			c = LLE;
			goto lx;
		}
Ken Thompson's avatar
Ken Thompson committed
862
		if(c1 == '-') {
Rob Pike's avatar
Rob Pike committed
863
			c = LCOMM;
Ken Thompson's avatar
Ken Thompson committed
864 865
			goto lx;
		}
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
		c = LLT;
		break;

	case '=':
		c1 = getc();
		if(c1 == '=') {
			c = LEQ;
			goto lx;
		}
		break;

	case '!':
		c1 = getc();
		if(c1 == '=') {
			c = LNE;
			goto lx;
		}
		break;

	case '&':
		c1 = getc();
		if(c1 == '&') {
			c = LANDAND;
			goto lx;
		}
891 892 893 894 895 896 897 898 899
		if(c1 == '^') {
			c = LANDNOT;
			c1 = getc();
			if(c1 == '=') {
				c = OANDNOT;
				goto asop;
			}
			break;
		}
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
		if(c1 == '=') {
			c = OAND;
			goto asop;
		}
		break;

	case '|':
		c1 = getc();
		if(c1 == '|') {
			c = LOROR;
			goto lx;
		}
		if(c1 == '=') {
			c = OOR;
			goto asop;
		}
		break;

	case '^':
		c1 = getc();
		if(c1 == '=') {
			c = OXOR;
			goto asop;
		}
		break;

Russ Cox's avatar
Russ Cox committed
926 927 928 929 930 931 932 933 934 935 936 937 938
	/*
	 * clumsy dance:
	 * to implement rule that disallows
	 *	if T{1}[0] { ... }
	 * but allows
	 * 	if (T{1}[0]) { ... }
	 * the block bodies for if/for/switch/select
	 * begin with an LBODY token, not '{'.
	 *
	 * when we see the keyword, the next
	 * non-parenthesized '{' becomes an LBODY.
	 * loophack is normally 0.
	 * a keyword makes it go up to 1.
Russ Cox's avatar
Russ Cox committed
939
	 * parens push loophack onto a stack and go back to 0.
Russ Cox's avatar
Russ Cox committed
940 941 942 943 944
	 * a '{' with loophack == 1 becomes LBODY and disables loophack.
	 *
	 * i said it was clumsy.
	 */
	case '(':
Russ Cox's avatar
Russ Cox committed
945
	case '[':
Russ Cox's avatar
Russ Cox committed
946 947 948 949 950 951 952
		if(loophack || lstk != nil) {
			h = malloc(sizeof *h);
			h->v = loophack;
			h->next = lstk;
			lstk = h;
			loophack = 0;
		}
Russ Cox's avatar
Russ Cox committed
953 954
		goto lx;
	case ')':
Russ Cox's avatar
Russ Cox committed
955
	case ']':
Russ Cox's avatar
Russ Cox committed
956 957 958 959 960 961
		if(lstk != nil) {
			h = lstk;
			loophack = h->v;
			lstk = h->next;
			free(h);
		}
Russ Cox's avatar
Russ Cox committed
962 963 964
		goto lx;
	case '{':
		if(loophack == 1) {
965
			DBG("%L lex: LBODY\n", lexlineno);
Russ Cox's avatar
Russ Cox committed
966 967 968 969 970
			loophack = 0;
			return LBODY;
		}
		goto lx;

971 972 973 974 975 976 977
	default:
		goto lx;
	}
	ungetc(c1);

lx:
	if(c > 0xff)
978
		DBG("%L lex: TOKEN %s\n", lexlineno, lexname(c));
979
	else
980
		DBG("%L lex: TOKEN '%c'\n", lexlineno, c);
981 982 983 984
	if(isfrog(c)) {
		yyerror("illegal character 0x%ux", c);
		goto l0;
	}
985 986 987 988
	if(importpkg == nil && (c == '#' || c == '$' || c == '?' || c == '@' || c == '\\')) {
		yyerror("%s: unexpected %c", "syntax error", c);
		goto l0;
	}
989 990 991
	return c;

asop:
Ken Thompson's avatar
Ken Thompson committed
992
	yylval.lint = c;	// rathole to hold which asop
993 994 995 996 997
	DBG("lex: TOKEN ASOP %c\n", c);
	return LASOP;

talph:
	/*
998
	 * cp is set to lexbuf and some
999 1000 1001
	 * prefix has been stored
	 */
	for(;;) {
Russ Cox's avatar
Russ Cox committed
1002 1003 1004 1005
		if(cp+10 >= ep) {
			yyerror("identifier too long");
			errorexit();
		}
1006
		if(c >= Runeself) {
1007 1008 1009
			ungetc(c);
			rune = getr();
			// 0xb7 · is used for internal names
Russ Cox's avatar
Russ Cox committed
1010
			if(!isalpharune(rune) && !isdigitrune(rune) && (importpkg == nil || rune != 0xb7))
1011 1012
				yyerror("invalid identifier character 0x%ux", rune);
			cp += runetochar(cp, &rune);
Russ Cox's avatar
Russ Cox committed
1013
		} else if(!yy_isalnum(c) && c != '_')
1014
			break;
1015 1016
		else
			*cp++ = c;
1017 1018 1019 1020 1021
		c = getc();
	}
	*cp = 0;
	ungetc(c);

1022
	s = lookup(lexbuf);
Russ Cox's avatar
Russ Cox committed
1023 1024
	switch(s->lexical) {
	case LIGNORE:
1025 1026
		goto l0;

Russ Cox's avatar
Russ Cox committed
1027 1028 1029 1030 1031 1032
	case LFOR:
	case LIF:
	case LSWITCH:
	case LSELECT:
		loophack = 1;	// see comment about loophack above
		break;
1033 1034 1035 1036 1037 1038 1039 1040
	}

	DBG("lex: %S %s\n", s, lexname(s->lexical));
	yylval.sym = s;
	return s->lexical;

tnum:
	c1 = 0;
1041
	cp = lexbuf;
Russ Cox's avatar
Russ Cox committed
1042
	ep = lexbuf+sizeof lexbuf;
1043 1044
	if(c != '0') {
		for(;;) {
Russ Cox's avatar
Russ Cox committed
1045 1046 1047 1048
			if(cp+10 >= ep) {
				yyerror("identifier too long");
				errorexit();
			}
1049 1050
			*cp++ = c;
			c = getc();
Russ Cox's avatar
Russ Cox committed
1051
			if(yy_isdigit(c))
1052 1053 1054 1055 1056 1057
				continue;
			goto dc;
		}
	}
	*cp++ = c;
	c = getc();
Ken Thompson's avatar
Ken Thompson committed
1058
	if(c == 'x' || c == 'X') {
1059
		for(;;) {
Russ Cox's avatar
Russ Cox committed
1060 1061 1062 1063
			if(cp+10 >= ep) {
				yyerror("identifier too long");
				errorexit();
			}
1064 1065
			*cp++ = c;
			c = getc();
Russ Cox's avatar
Russ Cox committed
1066
			if(yy_isdigit(c))
1067 1068 1069 1070 1071
				continue;
			if(c >= 'a' && c <= 'f')
				continue;
			if(c >= 'A' && c <= 'F')
				continue;
1072
			if(cp == lexbuf+2)
1073 1074 1075
				yyerror("malformed hex constant");
			goto ncu;
		}
Ken Thompson's avatar
Ken Thompson committed
1076 1077
	}

1078 1079 1080
	if(c == 'p')	// 0p begins floating point zero
		goto casep;

Ken Thompson's avatar
Ken Thompson committed
1081
	c1 = 0;
1082
	for(;;) {
Russ Cox's avatar
Russ Cox committed
1083 1084 1085 1086
		if(cp+10 >= ep) {
			yyerror("identifier too long");
			errorexit();
		}
Russ Cox's avatar
Russ Cox committed
1087
		if(!yy_isdigit(c))
Ken Thompson's avatar
Ken Thompson committed
1088 1089 1090 1091 1092
			break;
		if(c < '0' || c > '7')
			c1 = 1;		// not octal
		*cp++ = c;
		c = getc();
1093
	}
Ken Thompson's avatar
Ken Thompson committed
1094 1095 1096 1097
	if(c == '.')
		goto casedot;
	if(c == 'e' || c == 'E')
		goto casee;
1098 1099
	if(c == 'i')
		goto casei;
Ken Thompson's avatar
Ken Thompson committed
1100 1101 1102
	if(c1)
		yyerror("malformed octal constant");
	goto ncu;
1103 1104 1105 1106 1107 1108

dc:
	if(c == '.')
		goto casedot;
	if(c == 'e' || c == 'E')
		goto casee;
1109 1110
	if(c == 'p' || c == 'P')
		goto casep;
1111 1112
	if(c == 'i')
		goto casei;
1113 1114 1115 1116

ncu:
	*cp = 0;
	ungetc(c);
Ken Thompson's avatar
Ken Thompson committed
1117 1118

	yylval.val.u.xval = mal(sizeof(*yylval.val.u.xval));
1119
	mpatofix(yylval.val.u.xval, lexbuf);
Ken Thompson's avatar
Ken Thompson committed
1120
	if(yylval.val.u.xval->ovf) {
1121
		yyerror("overflow in constant");
Ken Thompson's avatar
Ken Thompson committed
1122
		mpmovecfix(yylval.val.u.xval, 0);
1123 1124 1125 1126 1127 1128 1129
	}
	yylval.val.ctype = CTINT;
	DBG("lex: integer literal\n");
	return LLITERAL;

casedot:
	for(;;) {
Russ Cox's avatar
Russ Cox committed
1130 1131 1132 1133
		if(cp+10 >= ep) {
			yyerror("identifier too long");
			errorexit();
		}
1134 1135
		*cp++ = c;
		c = getc();
Russ Cox's avatar
Russ Cox committed
1136
		if(!yy_isdigit(c))
1137 1138
			break;
	}
1139 1140
	if(c == 'i')
		goto casei;
1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
	if(c != 'e' && c != 'E')
		goto caseout;

casee:
	*cp++ = 'e';
	c = getc();
	if(c == '+' || c == '-') {
		*cp++ = c;
		c = getc();
	}
Russ Cox's avatar
Russ Cox committed
1151
	if(!yy_isdigit(c))
1152
		yyerror("malformed fp constant exponent");
Russ Cox's avatar
Russ Cox committed
1153
	while(yy_isdigit(c)) {
Russ Cox's avatar
Russ Cox committed
1154 1155 1156 1157
		if(cp+10 >= ep) {
			yyerror("identifier too long");
			errorexit();
		}
1158 1159 1160
		*cp++ = c;
		c = getc();
	}
1161 1162
	if(c == 'i')
		goto casei;
1163 1164 1165 1166 1167 1168 1169 1170 1171
	goto caseout;

casep:
	*cp++ = 'p';
	c = getc();
	if(c == '+' || c == '-') {
		*cp++ = c;
		c = getc();
	}
Russ Cox's avatar
Russ Cox committed
1172
	if(!yy_isdigit(c))
1173
		yyerror("malformed fp constant exponent");
Russ Cox's avatar
Russ Cox committed
1174
	while(yy_isdigit(c)) {
Russ Cox's avatar
Russ Cox committed
1175 1176 1177 1178
		if(cp+10 >= ep) {
			yyerror("identifier too long");
			errorexit();
		}
1179 1180 1181
		*cp++ = c;
		c = getc();
	}
1182 1183
	if(c == 'i')
		goto casei;
1184
	goto caseout;
1185

1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
casei:
	// imaginary constant
	*cp = 0;
	yylval.val.u.cval = mal(sizeof(*yylval.val.u.cval));
	mpmovecflt(&yylval.val.u.cval->real, 0.0);
	mpatoflt(&yylval.val.u.cval->imag, lexbuf);
	if(yylval.val.u.cval->imag.val.ovf) {
		yyerror("overflow in imaginary constant");
		mpmovecflt(&yylval.val.u.cval->real, 0.0);
	}
	yylval.val.ctype = CTCPLX;
	DBG("lex: imaginary literal\n");
	return LLITERAL;

1200 1201 1202
caseout:
	*cp = 0;
	ungetc(c);
Ken Thompson's avatar
Ken Thompson committed
1203 1204

	yylval.val.u.fval = mal(sizeof(*yylval.val.u.fval));
1205
	mpatoflt(yylval.val.u.fval, lexbuf);
1206
	if(yylval.val.u.fval->val.ovf) {
1207
		yyerror("overflow in float constant");
Ken Thompson's avatar
Ken Thompson committed
1208
		mpmovecflt(yylval.val.u.fval, 0.0);
1209 1210 1211 1212 1213 1214
	}
	yylval.val.ctype = CTFLT;
	DBG("lex: floating literal\n");
	return LLITERAL;
}

1215 1216
/*
 * read and interpret syntax that looks like
Ken Thompson's avatar
Ken Thompson committed
1217
 * //line parse.y:15
Rob Pike's avatar
Rob Pike committed
1218
 * as a discontinuity in sequential line numbers.
1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
 * the next line of input comes from parse.y:15
 */
static int
getlinepragma(void)
{
	int i, c, n;
	char *cp, *ep;
	Hist *h;

	for(i=0; i<5; i++) {
		c = getr();
		if(c != "line "[i])
Ken Thompson's avatar
Ken Thompson committed
1231
			goto out;
1232 1233 1234 1235 1236 1237
	}

	cp = lexbuf;
	ep = lexbuf+sizeof(lexbuf)-5;
	for(;;) {
		c = getr();
Ken Thompson's avatar
Ken Thompson committed
1238 1239
		if(c == '\n' || c == EOF)
			goto out;
1240 1241
		if(c == ' ')
			continue;
Ken Thompson's avatar
Ken Thompson committed
1242
		if(c == ':')
1243
			break;
Ken Thompson's avatar
Ken Thompson committed
1244 1245
		if(cp < ep)
			*cp++ = c;
1246 1247
	}
	*cp = 0;
Ken Thompson's avatar
Ken Thompson committed
1248 1249 1250 1251

	n = 0;
	for(;;) {
		c = getr();
Russ Cox's avatar
Russ Cox committed
1252
		if(!yy_isdigit(c))
Ken Thompson's avatar
Ken Thompson committed
1253 1254
			break;
		n = n*10 + (c-'0');
Russ Cox's avatar
Russ Cox committed
1255 1256 1257 1258
		if(n > 1e8) {
			yyerror("line number out of range");
			errorexit();
		}
Ken Thompson's avatar
Ken Thompson committed
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
	}

	if(c != '\n' || n <= 0)
		goto out;

	// try to avoid allocating file name over and over
	for(h=hist; h!=H; h=h->link) {
		if(h->name != nil && strcmp(h->name, lexbuf) == 0) {
			linehist(h->name, n, 0);
			goto out;
1269 1270
		}
	}
Ken Thompson's avatar
Ken Thompson committed
1271 1272 1273
	linehist(strdup(lexbuf), n, 0);

out:
1274 1275 1276
	return c;
}

1277 1278 1279
int32
yylex(void)
{
Russ Cox's avatar
Russ Cox committed
1280 1281 1282 1283 1284
	int lx;
	
	lx = _yylex();
	
	if(curio.nlsemi && lx == EOF) {
1285 1286
		// Treat EOF as "end of line" for the purposes
		// of inserting a semicolon.
Russ Cox's avatar
Russ Cox committed
1287
		lx = ';';
1288 1289
	}

Russ Cox's avatar
Russ Cox committed
1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
	switch(lx) {
	case LNAME:
	case LLITERAL:
	case LBREAK:
	case LCONTINUE:
	case LFALL:
	case LRETURN:
	case LINC:
	case LDEC:
	case ')':
	case '}':
	case ']':
		curio.nlsemi = 1;
		break;
	default:
		curio.nlsemi = 0;
		break;
1307
	}
Russ Cox's avatar
Russ Cox committed
1308 1309 1310 1311

	// Track last two tokens returned by yylex.
	yyprev = yylast;
	yylast = lx;
Ken Thompson's avatar
Ken Thompson committed
1312
 	return lx;
1313 1314
}

Russ Cox's avatar
Russ Cox committed
1315
static int
1316 1317 1318 1319 1320 1321
getc(void)
{
	int c;

	c = curio.peekc;
	if(c != 0) {
Ken Thompson's avatar
Ken Thompson committed
1322 1323
		curio.peekc = curio.peekc1;
		curio.peekc1 = 0;
1324
		if(c == '\n' && pushedio.bin == nil)
1325
			lexlineno++;
1326 1327
		return c;
	}
Russ Cox's avatar
Russ Cox committed
1328
	
1329 1330 1331 1332 1333 1334 1335 1336 1337
	if(curio.bin == nil) {
		c = *curio.cp & 0xff;
		if(c != 0)
			curio.cp++;
	} else
		c = Bgetc(curio.bin);

	switch(c) {
	case 0:
1338 1339
		if(curio.bin != nil) {
			yyerror("illegal NUL byte");
Rob Pike's avatar
Rob Pike committed
1340
			break;
1341
		}
1342
	case EOF:
Russ Cox's avatar
Russ Cox committed
1343 1344 1345 1346 1347
		// insert \n at EOF
		if(curio.eofnl)
			return EOF;
		curio.eofnl = 1;
		c = '\n';
1348
	case '\n':
1349 1350
		if(pushedio.bin == nil)
			lexlineno++;
1351 1352 1353 1354 1355
		break;
	}
	return c;
}

Russ Cox's avatar
Russ Cox committed
1356
static void
1357 1358
ungetc(int c)
{
Ken Thompson's avatar
Ken Thompson committed
1359
	curio.peekc1 = curio.peekc;
1360
	curio.peekc = c;
1361
	if(c == '\n' && pushedio.bin == nil)
1362
		lexlineno--;
1363 1364
}

Russ Cox's avatar
Russ Cox committed
1365
static int32
1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
getr(void)
{
	int c, i;
	char str[UTFmax+1];
	Rune rune;

	c = getc();
	if(c < Runeself)
		return c;
	i = 0;
	str[i++] = c;

loop:
	c = getc();
	str[i++] = c;
	if(!fullrune(str, i))
		goto loop;
	c = chartorune(&rune, str);
	if(rune == Runeerror && c == 1) {
1385
		lineno = lexlineno;
1386
		yyerror("illegal UTF-8 sequence");
1387
		flusherrors();
1388
		print("\t");
1389
		for(c=0; c<i; c++)
1390
			print("%s%.2x", c > 0 ? " " : "", *(uchar*)(str+c));
1391 1392 1393 1394 1395
		print("\n");
	}
	return rune;
}

Russ Cox's avatar
Russ Cox committed
1396
static int
Ken Thompson's avatar
Ken Thompson committed
1397
escchar(int e, int *escflg, vlong *val)
1398
{
Russ Cox's avatar
Russ Cox committed
1399
	int i, u, c;
Ken Thompson's avatar
Ken Thompson committed
1400
	vlong l;
1401

Ken Thompson's avatar
Ken Thompson committed
1402 1403
	*escflg = 0;

1404
	c = getr();
1405 1406 1407 1408 1409
	switch(c) {
	case EOF:
		yyerror("eof in string");
		return 1;
	case '\n':
1410
		yyerror("newline in string");
Ken Thompson's avatar
Ken Thompson committed
1411
		return 1;
1412 1413 1414
	case '\\':
		break;
	default:
1415
		if(c == e)
Ken Thompson's avatar
Ken Thompson committed
1416 1417 1418
			return 1;
		*val = c;
		return 0;
1419
	}
Ken Thompson's avatar
Ken Thompson committed
1420

Russ Cox's avatar
Russ Cox committed
1421
	u = 0;
1422 1423 1424
	c = getr();
	switch(c) {
	case 'x':
Ken Thompson's avatar
Ken Thompson committed
1425
		*escflg = 1;	// it's a byte
1426 1427 1428 1429 1430
		i = 2;
		goto hex;

	case 'u':
		i = 4;
Russ Cox's avatar
Russ Cox committed
1431
		u = 1;
1432 1433 1434 1435
		goto hex;

	case 'U':
		i = 8;
Russ Cox's avatar
Russ Cox committed
1436
		u = 1;
1437 1438 1439 1440 1441 1442 1443 1444 1445 1446
		goto hex;

	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
Ken Thompson's avatar
Ken Thompson committed
1447
		*escflg = 1;	// it's a byte
1448 1449
		goto oct;

Ken Thompson's avatar
Ken Thompson committed
1450 1451 1452 1453 1454 1455 1456 1457
	case 'a': c = '\a'; break;
	case 'b': c = '\b'; break;
	case 'f': c = '\f'; break;
	case 'n': c = '\n'; break;
	case 'r': c = '\r'; break;
	case 't': c = '\t'; break;
	case 'v': c = '\v'; break;
	case '\\': c = '\\'; break;
1458 1459 1460

	default:
		if(c != e)
1461
			yyerror("unknown escape sequence: %c", c);
1462
	}
Ken Thompson's avatar
Ken Thompson committed
1463 1464
	*val = c;
	return 0;
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481

hex:
	l = 0;
	for(; i>0; i--) {
		c = getc();
		if(c >= '0' && c <= '9') {
			l = l*16 + c-'0';
			continue;
		}
		if(c >= 'a' && c <= 'f') {
			l = l*16 + c-'a' + 10;
			continue;
		}
		if(c >= 'A' && c <= 'F') {
			l = l*16 + c-'A' + 10;
			continue;
		}
1482
		yyerror("non-hex character in escape sequence: %c", c);
1483 1484 1485
		ungetc(c);
		break;
	}
Russ Cox's avatar
Russ Cox committed
1486
	if(u && (l > Runemax || (0xd800 <= l && l < 0xe000))) {
Russ Cox's avatar
Russ Cox committed
1487 1488 1489
		yyerror("invalid Unicode code point in escape sequence: %#llx", l);
		l = Runeerror;
	}
Ken Thompson's avatar
Ken Thompson committed
1490 1491
	*val = l;
	return 0;
1492 1493 1494 1495 1496 1497 1498 1499 1500

oct:
	l = c - '0';
	for(i=2; i>0; i--) {
		c = getc();
		if(c >= '0' && c <= '7') {
			l = l*8 + c-'0';
			continue;
		}
1501
		yyerror("non-octal character in escape sequence: %c", c);
1502 1503 1504
		ungetc(c);
	}
	if(l > 255)
1505
		yyerror("octal escape value > 255: %d", l);
Ken Thompson's avatar
Ken Thompson committed
1506

Ken Thompson's avatar
Ken Thompson committed
1507 1508
	*val = l;
	return 0;
1509 1510 1511 1512 1513 1514 1515
}

static	struct
{
	char*	name;
	int	lexical;
	int	etype;
Russ Cox's avatar
Russ Cox committed
1516
	int	op;
1517 1518
} syms[] =
{
Russ Cox's avatar
Russ Cox committed
1519
/*	name		lexical		etype		op
1520 1521
 */
/* basic types */
Russ Cox's avatar
Russ Cox committed
1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
	"int8",		LNAME,		TINT8,		OXXX,
	"int16",	LNAME,		TINT16,		OXXX,
	"int32",	LNAME,		TINT32,		OXXX,
	"int64",	LNAME,		TINT64,		OXXX,

	"uint8",	LNAME,		TUINT8,		OXXX,
	"uint16",	LNAME,		TUINT16,	OXXX,
	"uint32",	LNAME,		TUINT32,	OXXX,
	"uint64",	LNAME,		TUINT64,	OXXX,

	"float32",	LNAME,		TFLOAT32,	OXXX,
	"float64",	LNAME,		TFLOAT64,	OXXX,

1535 1536 1537
	"complex64",	LNAME,		TCOMPLEX64,	OXXX,
	"complex128",	LNAME,		TCOMPLEX128,	OXXX,

Russ Cox's avatar
Russ Cox committed
1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569
	"bool",		LNAME,		TBOOL,		OXXX,
	"byte",		LNAME,		TUINT8,		OXXX,
	"string",	LNAME,		TSTRING,	OXXX,

	"any",		LNAME,		TANY,		OXXX,

	"break",	LBREAK,		Txxx,		OXXX,
	"case",		LCASE,		Txxx,		OXXX,
	"chan",		LCHAN,		Txxx,		OXXX,
	"const",	LCONST,		Txxx,		OXXX,
	"continue",	LCONTINUE,	Txxx,		OXXX,
	"default",	LDEFAULT,	Txxx,		OXXX,
	"else",		LELSE,		Txxx,		OXXX,
	"defer",	LDEFER,		Txxx,		OXXX,
	"fallthrough",	LFALL,		Txxx,		OXXX,
	"for",		LFOR,		Txxx,		OXXX,
	"func",		LFUNC,		Txxx,		OXXX,
	"go",		LGO,		Txxx,		OXXX,
	"goto",		LGOTO,		Txxx,		OXXX,
	"if",		LIF,		Txxx,		OXXX,
	"import",	LIMPORT,	Txxx,		OXXX,
	"interface",	LINTERFACE,	Txxx,		OXXX,
	"map",		LMAP,		Txxx,		OXXX,
	"package",	LPACKAGE,	Txxx,		OXXX,
	"range",	LRANGE,		Txxx,		OXXX,
	"return",	LRETURN,	Txxx,		OXXX,
	"select",	LSELECT,	Txxx,		OXXX,
	"struct",	LSTRUCT,	Txxx,		OXXX,
	"switch",	LSWITCH,	Txxx,		OXXX,
	"type",		LTYPE,		Txxx,		OXXX,
	"var",		LVAR,		Txxx,		OXXX,

Russ Cox's avatar
Russ Cox committed
1570
	"append",		LNAME,		Txxx,		OAPPEND,
Russ Cox's avatar
Russ Cox committed
1571 1572
	"cap",		LNAME,		Txxx,		OCAP,
	"close",	LNAME,		Txxx,		OCLOSE,
Russ Cox's avatar
Russ Cox committed
1573
	"complex",	LNAME,		Txxx,		OCOMPLEX,
Ken Thompson's avatar
Ken Thompson committed
1574
	"copy",		LNAME,		Txxx,		OCOPY,
Ken Thompson's avatar
Ken Thompson committed
1575
	"imag",		LNAME,		Txxx,		OIMAG,
Russ Cox's avatar
Russ Cox committed
1576 1577 1578 1579 1580 1581
	"len",		LNAME,		Txxx,		OLEN,
	"make",		LNAME,		Txxx,		OMAKE,
	"new",		LNAME,		Txxx,		ONEW,
	"panic",	LNAME,		Txxx,		OPANIC,
	"print",	LNAME,		Txxx,		OPRINT,
	"println",	LNAME,		Txxx,		OPRINTN,
Ken Thompson's avatar
Ken Thompson committed
1582
	"real",		LNAME,		Txxx,		OREAL,
1583
	"recover",	LNAME,		Txxx,		ORECOVER,
Russ Cox's avatar
Russ Cox committed
1584 1585 1586 1587 1588 1589

	"notwithstanding",		LIGNORE,	Txxx,		OXXX,
	"thetruthofthematter",		LIGNORE,	Txxx,		OXXX,
	"despiteallobjections",		LIGNORE,	Txxx,		OXXX,
	"whereas",			LIGNORE,	Txxx,		OXXX,
	"insofaras",			LIGNORE,	Txxx,		OXXX,
1590 1591
};

Russ Cox's avatar
Russ Cox committed
1592
static void
1593 1594
lexinit(void)
{
1595
	int i, lex;
1596
	Sym *s, *s1;
1597 1598
	Type *t;
	int etype;
Ken Thompson's avatar
Ken Thompson committed
1599

1600 1601 1602 1603 1604 1605 1606 1607 1608
	/*
	 * initialize basic types array
	 * initialize known symbols
	 */
	for(i=0; i<nelem(syms); i++) {
		lex = syms[i].lexical;
		s = lookup(syms[i].name);
		s->lexical = lex;

1609
		etype = syms[i].etype;
Russ Cox's avatar
Russ Cox committed
1610 1611 1612 1613 1614 1615 1616 1617
		if(etype != Txxx) {
			if(etype < 0 || etype >= nelem(types))
				fatal("lexinit: %s bad etype", s->name);
			t = types[etype];
			if(t == T) {
				t = typ(etype);
				t->sym = s;

1618 1619
				if(etype != TANY && etype != TSTRING)
					dowidth(t);
Russ Cox's avatar
Russ Cox committed
1620 1621
				types[etype] = t;
			}
1622
			s1 = pkglookup(syms[i].name, builtinpkg);
1623 1624
			s1->lexical = LNAME;
			s1->def = typenod(t);
1625 1626 1627
			continue;
		}
	}
Russ Cox's avatar
6g:  
Russ Cox committed
1628 1629 1630 1631 1632 1633 1634

	// logically, the type of a string literal.
	// types[TSTRING] is the named type string
	// (the type of x in var x string or var x = "hello").
	// this is the ideal form
	// (the type of x in const x = "hello").
	idealstring = typ(TSTRING);
Russ Cox's avatar
Russ Cox committed
1635 1636
	idealbool = typ(TBOOL);

1637
	s = pkglookup("true", builtinpkg);
Russ Cox's avatar
Russ Cox committed
1638 1639 1640 1641
	s->def = nodbool(1);
	s->def->sym = lookup("true");
	s->def->type = idealbool;

1642
	s = pkglookup("false", builtinpkg);
Russ Cox's avatar
Russ Cox committed
1643 1644 1645
	s->def = nodbool(0);
	s->def->sym = lookup("false");
	s->def->type = idealbool;
Russ Cox's avatar
Russ Cox committed
1646 1647 1648 1649 1650 1651 1652

	s = lookup("_");
	s->block = -100;
	s->def = nod(ONAME, N, N);
	s->def->sym = s;
	types[TBLANK] = typ(TBLANK);
	s->def->type = types[TBLANK];
Russ Cox's avatar
Russ Cox committed
1653
	nblank = s->def;
1654 1655
}

Russ Cox's avatar
Russ Cox committed
1656
static void
1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670
lexfini(void)
{
	Sym *s;
	int lex, etype, i;
	Val v;

	for(i=0; i<nelem(syms); i++) {
		lex = syms[i].lexical;
		if(lex != LNAME)
			continue;
		s = lookup(syms[i].name);
		s->lexical = lex;

		etype = syms[i].etype;
1671 1672
		if(etype != Txxx && (etype != TANY || debug['A']) && s->def == N)
			s->def = typenod(types[etype]);
1673 1674

		etype = syms[i].op;
1675 1676
		if(etype != OXXX && s->def == N) {
			s->def = nod(ONAME, N, N);
1677 1678 1679 1680 1681 1682 1683 1684
			s->def->sym = s;
			s->def->etype = etype;
			s->def->builtin = 1;
		}
	}

	for(i=0; typedefs[i].name; i++) {
		s = lookup(typedefs[i].name);
1685 1686
		if(s->def == N)
			s->def = typenod(types[typedefs[i].etype]);
1687 1688 1689 1690 1691 1692
	}

	// there's only so much table-driven we can handle.
	// these are special cases.
	types[TNIL] = typ(TNIL);
	s = lookup("nil");
1693
	if(s->def == N) {
1694
		v.ctype = CTNIL;
1695 1696 1697 1698 1699 1700 1701
		s->def = nodlit(v);
		s->def->sym = s;
	}
	
	s = lookup("iota");
	if(s->def == N) {
		s->def = nod(OIOTA, N, N);
1702 1703 1704 1705
		s->def->sym = s;
	}

	s = lookup("true");
1706 1707
	if(s->def == N) {
		s->def = nodbool(1);
1708 1709 1710 1711
		s->def->sym = s;
	}

	s = lookup("false");
1712 1713
	if(s->def == N) {
		s->def = nodbool(0);
1714 1715
		s->def->sym = s;
	}
Russ Cox's avatar
Russ Cox committed
1716 1717 1718 1719 1720 1721 1722
	
	nodfp = nod(ONAME, N, N);
	nodfp->noescape = 1;
	nodfp->type = types[TINT32];
	nodfp->xoffset = 0;
	nodfp->class = PPARAM;
	nodfp->sym = lookup(".fp");
1723 1724
}

1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
struct
{
	int	lex;
	char*	name;
} lexn[] =
{
	LANDAND,	"ANDAND",
	LASOP,		"ASOP",
	LBREAK,		"BREAK",
	LCASE,		"CASE",
	LCHAN,		"CHAN",
	LCOLAS,		"COLAS",
	LCONST,		"CONST",
	LCONTINUE,	"CONTINUE",
	LDEC,		"DEC",
Russ Cox's avatar
Russ Cox committed
1740
	LDEFER,		"DEFER",
1741 1742
	LELSE,		"ELSE",
	LEQ,		"EQ",
Russ Cox's avatar
Russ Cox committed
1743 1744
	LFALL,		"FALL",
	LFOR,		"FOR",
1745 1746 1747 1748 1749 1750
	LFUNC,		"FUNC",
	LGE,		"GE",
	LGO,		"GO",
	LGOTO,		"GOTO",
	LGT,		"GT",
	LIF,		"IF",
Russ Cox's avatar
Russ Cox committed
1751
	LIMPORT,	"IMPORT",
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
	LINC,		"INC",
	LINTERFACE,	"INTERFACE",
	LLE,		"LE",
	LLITERAL,	"LITERAL",
	LLSH,		"LSH",
	LLT,		"LT",
	LMAP,		"MAP",
	LNAME,		"NAME",
	LNE,		"NE",
	LOROR,		"OROR",
Russ Cox's avatar
Russ Cox committed
1762
	LPACKAGE,	"PACKAGE",
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784
	LRANGE,		"RANGE",
	LRETURN,	"RETURN",
	LRSH,		"RSH",
	LSTRUCT,	"STRUCT",
	LSWITCH,	"SWITCH",
	LTYPE,		"TYPE",
	LVAR,		"VAR",
};

char*
lexname(int lex)
{
	int i;
	static char buf[100];

	for(i=0; i<nelem(lexn); i++)
		if(lexn[i].lex == lex)
			return lexn[i].name;
	snprint(buf, sizeof(buf), "LEX-%d", lex);
	return buf;
}

Russ Cox's avatar
Russ Cox committed
1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835
struct
{
	char *have;
	char *want;
} yytfix[] =
{
	"$end",	"EOF",
	"LLITERAL",	"literal",
	"LASOP",	"op=",
	"LBREAK",	"break",
	"LCASE",	"case",
	"LCOLAS",	":=",
	"LCONST",	"const",
	"LCONTINUE",	"continue",
	"LDDD",	"...",
	"LDEFAULT",	"default",
	"LDEFER",	"defer",
	"LELSE",	"else",
	"LFALL",	"fallthrough",
	"LFOR",	"for",
	"LFUNC",	"func",
	"LGO",	"go",
	"LGOTO",	"goto",
	"LIF",	"if",
	"LIMPORT",	"import",
	"LINTERFACE",	"interface",
	"LMAP",	"map",
	"LNAME",	"name",
	"LPACKAGE",	"package",
	"LRANGE",	"range",
	"LRETURN",	"return",
	"LSELECT",	"select",
	"LSTRUCT",	"struct",
	"LSWITCH",	"switch",
	"LTYPE",	"type",
	"LVAR",	"var",
	"LANDAND",	"&&",
	"LANDNOT",	"&^",
	"LBODY",	"{",
	"LCOMM",	"<-",
	"LDEC",	"--",
	"LINC",	"++",
	"LEQ",	"==",
	"LGE",	">=",
	"LGT",	">",
	"LLE",	"<=",
	"LLT",	"<",
	"LLSH",	"<<",
	"LRSH",	">>",
	"LOROR",	"||",
	"LNE",	"!=",
1836 1837 1838 1839
	
	// spell out to avoid confusion with punctuation in error messages
	"';'",	"semicolon or newline",
	"','",	"comma",
Russ Cox's avatar
Russ Cox committed
1840 1841
};

Russ Cox's avatar
Russ Cox committed
1842
static void
Russ Cox's avatar
Russ Cox committed
1843 1844 1845 1846 1847 1848 1849 1850 1851
yytinit(void)
{
	int i, j;
	extern char *yytname[];
	char *s, *t;

	for(i=0; yytname[i] != nil; i++) {
		s = yytname[i];
		
1852 1853 1854 1855 1856 1857 1858 1859
		// apply yytfix if possible
		for(j=0; j<nelem(yytfix); j++) {
			if(strcmp(s, yytfix[j].have) == 0) {
				yytname[i] = yytfix[j].want;
				goto loop;
			}
		}

Russ Cox's avatar
Russ Cox committed
1860 1861 1862 1863 1864 1865
		// turn 'x' into x.
		if(s[0] == '\'') {
			t = strdup(s+1);
			t[strlen(t)-1] = '\0';
			yytname[i] = t;
		}
1866
	loop:;
Russ Cox's avatar
Russ Cox committed
1867 1868 1869
	}		
}

1870
void
1871
mkpackage(char* pkgname)
1872 1873
{
	Sym *s;
1874
	int32 h;
Ken Thompson's avatar
Ken Thompson committed
1875
	char *p;
1876

1877 1878
	if(localpkg->name == nil) {
		if(strcmp(pkgname, "_") == 0)
Russ Cox's avatar
Russ Cox committed
1879
			yyerror("invalid package name _");
1880
		localpkg->name = pkgname;
Russ Cox's avatar
Russ Cox committed
1881
	} else {
1882 1883
		if(strcmp(pkgname, localpkg->name) != 0)
			yyerror("package %s; expected %s", pkgname, localpkg->name);
Russ Cox's avatar
Russ Cox committed
1884 1885
		for(h=0; h<NHASH; h++) {
			for(s = hash[h]; s != S; s = s->link) {
1886
				if(s->def == N || s->pkg != localpkg)
Russ Cox's avatar
Russ Cox committed
1887 1888 1889 1890
					continue;
				if(s->def->op == OPACK) {
					// throw away top-level package name leftover
					// from previous file.
1891 1892 1893
					// leave s->block set to cause redeclaration
					// errors if a conflicting top-level name is
					// introduced by a different file.
Russ Cox's avatar
Russ Cox committed
1894
					if(!s->def->used && !nsyntaxerrors)
1895
						yyerrorl(s->def->lineno, "imported and not used: %Z", s->def->pkg->path);
Russ Cox's avatar
Russ Cox committed
1896 1897 1898
					s->def = N;
					continue;
				}
1899
				if(s->def->sym != s) {
Russ Cox's avatar
Russ Cox committed
1900 1901
					// throw away top-level name left over
					// from previous import . "x"
Russ Cox's avatar
Russ Cox committed
1902
					if(s->def->pack != N && !s->def->pack->used && !nsyntaxerrors) {
1903
						yyerrorl(s->def->pack->lineno, "imported and not used: %Z", s->def->pack->pkg->path);
Russ Cox's avatar
Russ Cox committed
1904 1905
						s->def->pack->used = 1;
					}
Russ Cox's avatar
Russ Cox committed
1906 1907 1908 1909 1910
					s->def = N;
					continue;
				}
			}
		}
1911 1912 1913
	}

	if(outfile == nil) {
Ken Thompson's avatar
Ken Thompson committed
1914 1915 1916 1917 1918 1919 1920 1921 1922
		p = strrchr(infile, '/');
		if(p == nil)
			p = infile;
		else
			p = p+1;
		snprint(namebuf, sizeof(namebuf), "%s", p);
		p = strrchr(namebuf, '.');
		if(p != nil)
			*p = 0;
1923
		outfile = smprint("%s.%c", namebuf, thechar);
1924 1925
	}
}