runtime: implement string ops in Go

Also implement go:nosplit annotation. Not really needed for now, but we'll definitely need it for other conversions. benchmark old ns/op new ns/op delta BenchmarkRuneIterate 534 474 -11.24% BenchmarkRuneIterate2 535 470 -12.15% LGTM=bradfitz R=golang-codereviews, dave, bradfitz, minux CC=golang-codereviews https://golang.org/cl/93380044

runtime: implement string ops in Go
Also implement go:nosplit annotation. Not really needed for now, but we'll definitely need it for other conversions. benchmark old ns/op new ns/op delta BenchmarkRuneIterate 534 474 -11.24% BenchmarkRuneIterate2 535 470 -12.15% LGTM=bradfitz R=golang-codereviews, dave, bradfitz, minux CC=golang-codereviews https://golang.org/cl/93380044
61dca94e · Keith Randall · b36ed905 · 61dca94e · 61dca94e · 61dca94e
Commit 61dca94e authored Jun 16, 2014 by Keith Randall
19 changed files
--- a/src/cmd/gc/fmt.c
+++ b/src/cmd/gc/fmt.c
@@ -649,7 +649,7 @@ typefmt(Fmt *fp, Type *t)
 		if(t->funarg) {
 			fmtstrcpy(fp, "(");
-			if(fmtmode == FTypeId || fmtmode == FErr) {	// no argument names on function signature, and no "noescape" tags
+			if(fmtmode == FTypeId || fmtmode == FErr) {	// no argument names on function signature, and no "noescape"/"nosplit" tags
 				for(t1=t->type; t1!=T; t1=t1->down)
 					if(t1->down)
 						fmtprint(fp, "%hT, ", t1);

--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -269,6 +269,7 @@ struct	Node
 	uchar	colas;		// OAS resulting from :=
 	uchar	diag;		// already printed error about this
 	uchar	noescape;	// func arguments do not escape
+	uchar	nosplit;	// func should not execute on separate stack
 	uchar	builtin;	// built-in name, like len or close
 	uchar	walkdef;
 	uchar	typecheck;
@@ -980,6 +981,7 @@ EXTERN	char*	flag_installsuffix;
 EXTERN	int	flag_race;
 EXTERN	int	flag_largemodel;
 EXTERN	int	noescape;
+EXTERN	int	nosplit;
 EXTERN	int	debuglive;
 EXTERN	Link*	ctxt;

--- a/src/cmd/gc/go.y
+++ b/src/cmd/gc/go.y
@@ -1311,6 +1311,7 @@ xfndcl:
 		$$->nbody = $3;
 		$$->endlineno = lineno;
 		$$->noescape = noescape;
+		$$->nosplit = nosplit;
 		funcbody($$);
 	}
@@ -1495,6 +1496,7 @@ xdcl_list:
 			testdclstack();
 		nointerface = 0;
 		noescape = 0;
+		nosplit = 0;
 	}
 vardcl_list:

--- a/src/cmd/gc/lex.c
+++ b/src/cmd/gc/lex.c
@@ -1592,6 +1592,10 @@ go:
 		noescape = 1;
 		goto out;
 	}
+	if(strcmp(lexbuf, "go:nosplit") == 0) {
+		nosplit = 1;
+		goto out;
+	}
 out:
 	return c;

--- a/src/cmd/gc/pgen.c
+++ b/src/cmd/gc/pgen.c
@@ -229,6 +229,8 @@ compile(Node *fn)
 		ptxt->TEXTFLAG |= WRAPPER;
 	if(fn->needctxt)
 		ptxt->TEXTFLAG |= NEEDCTXT;
+	if(fn->nosplit)
+		ptxt->TEXTFLAG |= NOSPLIT;
 	// Clumsy but important.
 	// See test/recover.go for test cases and src/pkg/reflect/value.go

--- a/src/cmd/gc/y.tab.c
+++ b/src/cmd/gc/y.tab.c
@@ -3828,6 +3828,7 @@ yyreduce:
 		(yyval.node)->nbody = (yyvsp[(3) - (3)].list);
 		(yyval.node)->endlineno = lineno;
 		(yyval.node)->noescape = noescape;
+		(yyval.node)->nosplit = nosplit;
 		funcbody((yyval.node));
 	}
    break;
@@ -4037,6 +4038,7 @@ yyreduce:
 			testdclstack();
 		nointerface = 0;
 		noescape = 0;
+		nosplit = 0;
 	}
    break;

--- a/src/pkg/runtime/asm_386.s
+++ b/src/pkg/runtime/asm_386.s
@@ -781,6 +781,12 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$0-4
 	MOVL	-4(AX),AX		// get calling pc
 	RET
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVL	x+0(FP),AX		// addr of first arg
+	MOVL	-4(AX),AX		// get calling pc
+	MOVL	AX, r+4(FP)
+	RET
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
 	MOVL	x+0(FP),AX		// addr of first arg
 	MOVL	x+4(FP), BX

--- a/src/pkg/runtime/asm_amd64.s
+++ b/src/pkg/runtime/asm_amd64.s
@@ -858,6 +858,12 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
 	MOVQ	-8(AX),AX		// get calling pc
 	RET
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVQ	x+0(FP),AX		// addr of first arg
+	MOVQ	-8(AX),AX		// get calling pc
+	MOVQ	AX,r+4(FP)
+	RET
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
 	MOVQ	x+0(FP),AX		// addr of first arg
 	MOVQ	x+8(FP), BX

--- a/src/pkg/runtime/asm_amd64p32.s
+++ b/src/pkg/runtime/asm_amd64p32.s
@@ -663,6 +663,12 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
 	MOVL	-8(AX),AX		// get calling pc
 	RET
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVL	x+0(FP),AX		// addr of first arg
+	MOVL	-8(AX),AX		// get calling pc
+	MOVL	AX, r+4(FP)
+	RET
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
 	MOVL	x+0(FP),AX		// addr of first arg
 	MOVL	pc+4(FP), BX		// pc to set

--- a/src/pkg/runtime/asm_arm.s
+++ b/src/pkg/runtime/asm_arm.s
@@ -560,6 +560,10 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4
 	MOVW	0(SP), R0
 	RET
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$-4-8
+	MOVW	R14, 4(FP)
+	RET
 TEXT runtime·setcallerpc(SB),NOSPLIT,$-4-8
 	MOVW	x+4(FP), R0
 	MOVW	R0, 0(SP)

--- a/src/pkg/runtime/error.go
+++ b/src/pkg/runtime/error.go
@@ -80,8 +80,6 @@ type errorCString struct{ cstr uintptr }
 func (e errorCString) RuntimeError() {}
-func cstringToGo(uintptr) string
 func (e errorCString) Error() string {
 	return "runtime error: " + cstringToGo(e.cstr)
 }

--- a/src/pkg/runtime/race.go
+++ b/src/pkg/runtime/race.go
@@ -29,3 +29,6 @@ func RaceWriteRange(addr unsafe.Pointer, len int)
 func RaceSemacquire(s *uint32)
 func RaceSemrelease(s *uint32)
+// private interface for the runtime
+const raceenabled = true
--- a/src/pkg/runtime/race0.go
+++ b/src/pkg/runtime/race0.go
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// +build !race
+// Dummy race detection API, used when not built with -race.
+package runtime
+const raceenabled = false
--- a/src/pkg/runtime/rune.c
+++ b/src/pkg/runtime/rune.c
@@ -18,42 +18,41 @@
 * from a subset of ../lib9/utf/rune.c
 */
-#include "runtime.h"
+package runtime
-enum
+const (
-{
+	bit1 = 7
-	Bit1	= 7,
+	bitx = 6
-	Bitx	= 6,
+	bit2 = 5
-	Bit2	= 5,
+	bit3 = 4
-	Bit3	= 4,
+	bit4 = 3
-	Bit4	= 3,
+	bit5 = 2
-	Bit5	= 2,
-	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
+	t1 = ((1 << (bit1 + 1)) - 1) ^ 0xFF /* 0000 0000 */
-	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
+	tx = ((1 << (bitx + 1)) - 1) ^ 0xFF /* 1000 0000 */
-	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
+	t2 = ((1 << (bit2 + 1)) - 1) ^ 0xFF /* 1100 0000 */
-	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
+	t3 = ((1 << (bit3 + 1)) - 1) ^ 0xFF /* 1110 0000 */
-	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
+	t4 = ((1 << (bit4 + 1)) - 1) ^ 0xFF /* 1111 0000 */
-	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
+	t5 = ((1 << (bit5 + 1)) - 1) ^ 0xFF /* 1111 1000 */
-	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
+	rune1 = (1 << (bit1 + 0*bitx)) - 1 /* 0000 0000 0111 1111 */
-	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
+	rune2 = (1 << (bit2 + 1*bitx)) - 1 /* 0000 0111 1111 1111 */
-	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
+	rune3 = (1 << (bit3 + 2*bitx)) - 1 /* 1111 1111 1111 1111 */
-	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0001 1111 1111 1111 1111 1111 */
+	rune4 = (1 << (bit4 + 3*bitx)) - 1 /* 0001 1111 1111 1111 1111 1111 */
-	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
+	maskx = (1 << bitx) - 1 /* 0011 1111 */
-	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
+	testx = maskx ^ 0xFF    /* 1100 0000 */
-	Runeerror	= 0xFFFD,
+	runeerror = 0xFFFD
-	Runeself	= 0x80,
+	runeself  = 0x80
-	SurrogateMin = 0xD800,
+	surrogateMin = 0xD800
-	SurrogateMax = 0xDFFF,
+	surrogateMax = 0xDFFF
-	Bad	= Runeerror,
+	bad = runeerror
-	Runemax	= 0x10FFFF,	/* maximum rune value */
+	runemax = 0x10FFFF /* maximum rune value */
-};
+)
 /*
 * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
@@ -73,159 +72,148 @@ enum
 * Note that if we have decoding problems for other
 * reasons, we return 1 instead of 0.
 */
-int32
+func charntorune(s string) (rune, int) {
-runtime·charntorune(int32 *rune, uint8 *str, int32 length)
-{
-	int32 c, c1, c2, c3, l;
 	/* When we're not allowed to read anything */
-	if(length <= 0) {
+	if len(s) <= 0 {
-		goto badlen;
+		return bad, 1
 	}
 	/*
 	 * one character sequence (7-bit value)
 	 *	00000-0007F => T1
 	 */
-	c = *(uint8*)str;
+	c := s[0]
-	if(c < Tx) {
+	if c < tx {
-		*rune = c;
+		return rune(c), 1
-		return 1;
 	}
 	// If we can't read more than one character we must stop
-	if(length <= 1) {
+	if len(s) <= 1 {
-		goto badlen;
+		return bad, 1
 	}
 	/*
 	 * two character sequence (11-bit value)
-	 *	0080-07FF => T2 Tx
+	 *	0080-07FF => t2 tx
 	 */
-	c1 = *(uint8*)(str+1) ^ Tx;
+	c1 := s[1] ^ tx
-	if(c1 & Testx)
+	if (c1 & testx) != 0 {
-		goto bad;
+		return bad, 1
-	if(c < T3) {
+	}
-		if(c < T2)
+	if c < t3 {
-			goto bad;
+		if c < t2 {
-		l = ((c << Bitx) | c1) & Rune2;
+			return bad, 1
-		if(l <= Rune1)
+		}
-			goto bad;
+		l := ((rune(c) << bitx) | rune(c1)) & rune2
-		*rune = l;
+		if l <= rune1 {
-		return 2;
+			return bad, 1
+		}
+		return l, 2
 	}
 	// If we can't read more than two characters we must stop
-	if(length <= 2) {
+	if len(s) <= 2 {
-		goto badlen;
+		return bad, 1
 	}
 	/*
 	 * three character sequence (16-bit value)
-	 *	0800-FFFF => T3 Tx Tx
+	 *	0800-FFFF => t3 tx tx
 	 */
-	c2 = *(uint8*)(str+2) ^ Tx;
+	c2 := s[2] ^ tx
-	if(c2 & Testx)
+	if (c2 & testx) != 0 {
-		goto bad;
+		return bad, 1
-	if(c < T4) {
+	}
-		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+	if c < t4 {
-		if(l <= Rune2)
+		l := ((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) & rune3
-			goto bad;
+		if l <= rune2 {
-		if (SurrogateMin <= l && l <= SurrogateMax)
+			return bad, 1
-			goto bad;
+		}
-		*rune = l;
+		if surrogateMin <= l && l <= surrogateMax {
-		return 3;
+			return bad, 1
-	}
+		}
+		return l, 3
-	if (length <= 3)
+	}
-		goto badlen;
+	if len(s) <= 3 {
+		return bad, 1
+	}
 	/*
 	 * four character sequence (21-bit value)
-	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 *	10000-1FFFFF => t4 tx tx tx
 	 */
-	c3 = *(uint8*)(str+3) ^ Tx;
+	c3 := s[3] ^ tx
-	if (c3 & Testx)
+	if (c3 & testx) != 0 {
-		goto bad;
+		return bad, 1
-	if (c < T5) {
+	}
-		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+	if c < t5 {
-		if (l <= Rune3 || l > Runemax)
+		l := ((((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) << bitx) | rune(c3)) & rune4
-			goto bad;
+		if l <= rune3 || l > runemax {
-		*rune = l;
+			return bad, 1
-		return 4;
+		}
+		return l, 4
 	}
 	// Support for 5-byte or longer UTF-8 would go here, but
-	// since we don't have that, we'll just fall through to bad.
+	// since we don't have that, we'll just return bad.
+	return bad, 1
-	/*
-	 * bad decoding
-	 */
-bad:
-	*rune = Bad;
-	return 1;
-badlen:
-	// was return 0, but return 1 is more convenient for the runtime.
-	*rune = Bad;
-	return 1;
 }
-int32
+// runetochar converts r to bytes and writes the result to str.
-runtime·runetochar(byte *str, int32 rune)  /* note: in original, arg2 was pointer */
+// returns the number of bytes generated.
-{
+func runetochar(str []byte, r rune) int {
-	/* Runes are signed, so convert to unsigned for range check. */
+	/* runes are signed, so convert to unsigned for range check. */
-	uint32 c;
+	c := uint32(r)
 	/*
 	 * one character sequence
 	 *	00000-0007F => 00-7F
 	 */
-	c = rune;
+	if c <= rune1 {
-	if(c <= Rune1) {
+		str[0] = byte(c)
-		str[0] = c;
+		return 1
-		return 1;
 	}
 	/*
 	 * two character sequence
-	 *	0080-07FF => T2 Tx
+	 *	0080-07FF => t2 tx
 	 */
-	if(c <= Rune2) {
+	if c <= rune2 {
-		str[0] = T2 | (c >> 1*Bitx);
+		str[0] = byte(t2 | (c >> (1 * bitx)))
-		str[1] = Tx | (c & Maskx);
+		str[1] = byte(tx | (c & maskx))
-		return 2;
+		return 2
 	}
 	/*
-	 * If the Rune is out of range or a surrogate half, convert it to the error rune.
+	 * If the rune is out of range or a surrogate half, convert it to the error rune.
 	 * Do this test here because the error rune encodes to three bytes.
 	 * Doing it earlier would duplicate work, since an out of range
-	 * Rune wouldn't have fit in one or two bytes.
+	 * rune wouldn't have fit in one or two bytes.
 	 */
-	if (c > Runemax)
+	if c > runemax {
-		c = Runeerror;
+		c = runeerror
-	if (SurrogateMin <= c && c <= SurrogateMax)
+	}
-		c = Runeerror;
+	if surrogateMin <= c && c <= surrogateMax {
+		c = runeerror
+	}
 	/*
 	 * three character sequence
-	 *	0800-FFFF => T3 Tx Tx
+	 *	0800-FFFF => t3 tx tx
 	 */
-	if (c <= Rune3) {
+	if c <= rune3 {
-		str[0] = T3 |  (c >> 2*Bitx);
+		str[0] = byte(t3 | (c >> (2 * bitx)))
-		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+		str[1] = byte(tx | ((c >> (1 * bitx)) & maskx))
-		str[2] = Tx |  (c & Maskx);
+		str[2] = byte(tx | (c & maskx))
-		return 3;
+		return 3
 	}
 	/*
 	 * four character sequence (21-bit value)
-	 *     10000-1FFFFF => T4 Tx Tx Tx
+	 *     10000-1FFFFF => t4 tx tx tx
 	 */
-	str[0] = T4 | (c >> 3*Bitx);
+	str[0] = byte(t4 | (c >> (3 * bitx)))
-	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+	str[1] = byte(tx | ((c >> (2 * bitx)) & maskx))
-	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+	str[2] = byte(tx | ((c >> (1 * bitx)) & maskx))
-	str[3] = Tx | (c & Maskx);
+	str[3] = byte(tx | (c & maskx))
-	return 4;
+	return 4
 }
--- a/src/pkg/runtime/string.goc
+++ b/src/pkg/runtime/string.goc
@@ -2,7 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
-package runtime
 #include "runtime.h"
 #include "arch_GOARCH.h"
 #include "malloc.h"
@@ -101,10 +100,6 @@ runtime·gostringnocopy(byte *str)
 	return s;
 }
-func cstringToGo(str *byte) (s String) {
-	s = runtime·gostringnocopy(str);
-}
 String
 runtime·gostringw(uint16 *str)
 {
@@ -144,68 +139,6 @@ runtime·catstring(String s1, String s2)
 	return s3;
 }
-static String
-concatstring(intgo n, String *s)
-{
-	intgo i, l, count;
-	String out;
-	l = 0;
-	count = 0;
-	for(i=0; i<n; i++) {
-		if(l + s[i].len < l)
-			runtime·throw("string concatenation too long");
-		l += s[i].len;
-		if(s[i].len > 0) {
-			count++;
-			out = s[i];
-		}
-	}
-	if(count == 0)
-		return runtime·emptystring;
-	if(count == 1) // zero or one non-empty string in concatenation
-		return out;
-	out = gostringsize(l);
-	l = 0;
-	for(i=0; i<n; i++) {
-		runtime·memmove(out.str+l, s[i].str, s[i].len);
-		l += s[i].len;
-	}
-	return out;
-}
-#pragma textflag NOSPLIT
-func concatstring2(s1 String, s2 String) (res String) {
-	USED(&s2);
-	res = concatstring(2, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstring3(s1 String, s2 String, s3 String) (res String) {
-	USED(&s2);
-	USED(&s3);
-	res = concatstring(3, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstring4(s1 String, s2 String, s3 String, s4 String) (res String) {
-	USED(&s2);
-	USED(&s3);
-	USED(&s4);
-	res = concatstring(4, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstring5(s1 String, s2 String, s3 String, s4 String, s5 String) (res String) {
-	USED(&s2);
-	USED(&s3);
-	USED(&s4);
-	USED(&s5);
-	res = concatstring(5, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstrings(s Slice) (res String) {
-	res = concatstring(s.len, (String*)s.array);
-}
 int32
 runtime·strcmp(byte *s1, byte *s2)
 {
@@ -264,155 +197,3 @@ runtime·strstr(byte *s1, byte *s2)
 	}
 	return nil;
 }
-func intstring(v int64) (s String) {
-	s = gostringsize(8);
-	s.len = runtime·runetochar(s.str, v);
-	s.str[s.len] = 0;
-}
-func slicebytetostring(b Slice) (s String) {
-	void *pc;
-	if(raceenabled) {
-		pc = runtime·getcallerpc(&b);
-		runtime·racereadrangepc(b.array, b.len, pc, runtime·slicebytetostring);
-	}
-	s = gostringsize(b.len);
-	runtime·memmove(s.str, b.array, s.len);
-}
-func slicebytetostringtmp(b Slice) (s String) {
-	void *pc;
-	if(raceenabled) {
-		pc = runtime·getcallerpc(&b);
-		runtime·racereadrangepc(b.array, b.len, pc, runtime·slicebytetostringtmp);
-	}
-	// Return a "string" referring to the actual []byte bytes.
-	// This is only for use by internal compiler optimizations
-	// that know that the string form will be discarded before
-	// the calling goroutine could possibly modify the original
-	// slice or synchronize with another goroutine.
-	// Today, the only such case is a m[string(k)] lookup where
-	// m is a string-keyed map and k is a []byte.
-	s.str = b.array;
-	s.len = b.len;
-}
-func stringtoslicebyte(s String) (b Slice) {
-	uintptr cap;
-	cap = runtime·roundupsize(s.len);
-	b.array = runtime·mallocgc(cap, 0, FlagNoScan|FlagNoZero);
-	b.len = s.len;
-	b.cap = cap;
-	runtime·memmove(b.array, s.str, s.len);
-	if(cap != b.len)
-		runtime·memclr(b.array+b.len, cap-b.len);
-}
-func slicerunetostring(b Slice) (s String) {
-	intgo siz1, siz2, i;
-	int32 *a;
-	byte dum[8];
-	void *pc;
-	if(raceenabled) {
-		pc = runtime·getcallerpc(&b);
-		runtime·racereadrangepc(b.array, b.len*sizeof(*a), pc, runtime·slicerunetostring);
-	}
-	a = (int32*)b.array;
-	siz1 = 0;
-	for(i=0; i<b.len; i++) {
-		siz1 += runtime·runetochar(dum, a[i]);
-	}
-	s = gostringsize(siz1+4);
-	siz2 = 0;
-	for(i=0; i<b.len; i++) {
-		// check for race
-		if(siz2 >= siz1)
-			break;
-		siz2 += runtime·runetochar(s.str+siz2, a[i]);
-	}
-	s.len = siz2;
-	s.str[s.len] = 0;
-}
-func stringtoslicerune(s String) (b Slice) {
-	intgo n;
-	int32 dum, *r;
-	uint8 *p, *ep;
-	uintptr mem;
-	// two passes.
-	// unlike slicerunetostring, no race because strings are immutable.
-	p = s.str;
-	ep = s.str+s.len;
-	n = 0;
-	while(p < ep) {
-		p += runtime·charntorune(&dum, p, ep-p);
-		n++;
-	}
-	if(n > MaxMem/sizeof(r[0]))
-		runtime·throw("out of memory");
-	mem = runtime·roundupsize(n*sizeof(r[0]));
-	b.array = runtime·mallocgc(mem, 0, FlagNoScan|FlagNoZero);
-	b.len = n;
-	b.cap = mem/sizeof(r[0]);
-	p = s.str;
-	r = (int32*)b.array;
-	while(p < ep)
-		p += runtime·charntorune(r++, p, ep-p);
-	if(b.cap > b.len)
-		runtime·memclr(b.array+b.len*sizeof(r[0]), (b.cap-b.len)*sizeof(r[0]));
-}
-enum
-{
-	Runeself	= 0x80,
-};
-func stringiter(s String, k int) (retk int) {
-	int32 l;
-	if(k >= s.len) {
-		// retk=0 is end of iteration
-		retk = 0;
-		goto out;
-	}
-	l = s.str[k];
-	if(l < Runeself) {
-		retk = k+1;
-		goto out;
-	}
-	// multi-char rune
-	retk = k + runtime·charntorune(&l, s.str+k, s.len-k);
-out:
-}
-func stringiter2(s String, k int) (retk int, retv int32) {
-	if(k >= s.len) {
-		// retk=0 is end of iteration
-		retk = 0;
-		retv = 0;
-		goto out;
-	}
-	retv = s.str[k];
-	if(retv < Runeself) {
-		retk = k+1;
-		goto out;
-	}
-	// multi-char rune
-	retk = k + runtime·charntorune(&retv, s.str+k, s.len-k);
-out:
-}
--- a/src/pkg/runtime/string.go
+++ b/src/pkg/runtime/string.go
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime
+import (
+	"unsafe"
+)
+func concatstrings(a []string) string {
+	idx := 0
+	l := 0
+	count := 0
+	for i, x := range a {
+		n := len(x)
+		if n == 0 {
+			continue
+		}
+		if l+n < l {
+			panic("string concatenation too long")
+		}
+		l += n
+		count++
+		idx = i
+	}
+	if count == 0 {
+		return ""
+	}
+	if count == 1 {
+		return a[idx]
+	}
+	s, b := rawstring(l)
+	l = 0
+	for _, x := range a {
+		copy(b[l:], x)
+		l += len(x)
+	}
+	return s
+}
+//go:nosplit
+func concatstring2(a [2]string) string {
+	return concatstrings(a[:])
+}
+//go:nosplit
+func concatstring3(a [3]string) string {
+	return concatstrings(a[:])
+}
+//go:nosplit
+func concatstring4(a [4]string) string {
+	return concatstrings(a[:])
+}
+//go:nosplit
+func concatstring5(a [5]string) string {
+	return concatstrings(a[:])
+}
+func slicebytetostring(b []byte) string {
+	if raceenabled && len(b) > 0 {
+		fn := slicebytetostring
+		racereadrangepc(unsafe.Pointer(&b[0]),
+			len(b),
+			gogetcallerpc(unsafe.Pointer(&b)),
+			**(**uintptr)(unsafe.Pointer(&fn)))
+	}
+	s, c := rawstring(len(b))
+	copy(c, b)
+	return s
+}
+func slicebytetostringtmp(b []byte) string {
+	// Return a "string" referring to the actual []byte bytes.
+	// This is only for use by internal compiler optimizations
+	// that know that the string form will be discarded before
+	// the calling goroutine could possibly modify the original
+	// slice or synchronize with another goroutine.
+	// Today, the only such case is a m[string(k)] lookup where
+	// m is a string-keyed map and k is a []byte.
+	if raceenabled && len(b) > 0 {
+		fn := slicebytetostringtmp
+		racereadrangepc(unsafe.Pointer(&b[0]),
+			len(b),
+			gogetcallerpc(unsafe.Pointer(&b)),
+			**(**uintptr)(unsafe.Pointer(&fn)))
+	}
+	return *(*string)(unsafe.Pointer(&b))
+}
+func stringtoslicebyte(s string) []byte {
+	b := rawbyteslice(len(s))
+	copy(b, s)
+	return b
+}
+func stringtoslicerune(s string) []rune {
+	// two passes.
+	// unlike slicerunetostring, no race because strings are immutable.
+	n := 0
+	t := s
+	for len(s) > 0 {
+		_, k := charntorune(s)
+		s = s[k:]
+		n++
+	}
+	a := rawruneslice(n)
+	n = 0
+	for len(t) > 0 {
+		r, k := charntorune(t)
+		t = t[k:]
+		a[n] = r
+		n++
+	}
+	return a
+}
+func slicerunetostring(a []rune) string {
+	if raceenabled && len(a) > 0 {
+		fn := slicerunetostring
+		racereadrangepc(unsafe.Pointer(&a[0]),
+			len(a)*int(unsafe.Sizeof(a[0])),
+			gogetcallerpc(unsafe.Pointer(&a)),
+			**(**uintptr)(unsafe.Pointer(&fn)))
+	}
+	var dum [4]byte
+	size1 := 0
+	for _, r := range a {
+		size1 += runetochar(dum[:], r)
+	}
+	s, b := rawstring(size1 + 3)
+	size2 := 0
+	for _, r := range a {
+		// check for race
+		if size2 >= size1 {
+			break
+		}
+		size2 += runetochar(b[size2:], r)
+	}
+	return s[:size2]
+}
+func cstringToGo(str uintptr) (s string) {
+	// Note: we need i to be the same type as _string.len and to start at 0.
+	i := _string{}.len
+	for ; ; i++ {
+		if *(*byte)(unsafe.Pointer(str + uintptr(i))) == 0 {
+			break
+		}
+	}
+	t := (*_string)(unsafe.Pointer(&s))
+	t.str = (*byte)(unsafe.Pointer(str))
+	t.len = i
+	return
+}
+func intstring(v int64) string {
+	s, b := rawstring(4)
+	n := runetochar(b, rune(v))
+	return s[:n]
+}
+// stringiter returns the index of the next
+// rune after the rune that starts at s[k].
+func stringiter(s string, k int) int {
+	if k >= len(s) {
+		// 0 is end of iteration
+		return 0
+	}
+	c := s[k]
+	if c < runeself {
+		return k + 1
+	}
+	// multi-char rune
+	_, n := charntorune(s[k:])
+	return k + n
+}
+// stringiter2 returns the rune that starts at s[k]
+// and the index where the next rune starts.
+func stringiter2(s string, k int) (int, rune) {
+	if k >= len(s) {
+		// 0 is end of iteration
+		return 0, 0
+	}
+	c := s[k]
+	if c < runeself {
+		return k + 1, rune(c)
+	}
+	// multi-char rune
+	r, n := charntorune(s[k:])
+	return k + n, r
+}
--- a/src/pkg/runtime/string_test.go
+++ b/src/pkg/runtime/string_test.go
@@ -75,3 +75,27 @@ func BenchmarkCompareStringBig(b *testing.B) {
 	}
 	b.SetBytes(int64(len(s1)))
 }
+func BenchmarkRuneIterate(b *testing.B) {
+	bytes := make([]byte, 100)
+	for i := range bytes {
+		bytes[i] = byte('A')
+	}
+	s := string(bytes)
+	for i := 0; i < b.N; i++ {
+		for _ = range s {
+		}
+	}
+}
+func BenchmarkRuneIterate2(b *testing.B) {
+	bytes := make([]byte, 100)
+	for i := range bytes {
+		bytes[i] = byte('A')
+	}
+	s := string(bytes)
+	for i := 0; i < b.N; i++ {
+		for _, _ = range s {
+		}
+	}
+}
--- a/src/pkg/runtime/stubs.go
+++ b/src/pkg/runtime/stubs.go
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime
+import "unsafe"
+// Declarations for runtime services implemented in C or assembly.
+// C implementations of these functions are in stubs.goc.
+// Assembly implementations are in various files, see comments with
+// each function.
+// rawstring allocates storage for a new string. The returned
+// string and byte slice both refer to the same storage.
+// The storage is not zeroed. Callers should use
+// b to set the string contents and then drop b.
+func rawstring(size int) (string, []byte)
+// rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
+func rawbyteslice(size int) []byte
+// rawruneslice allocates a new rune slice. The rune slice is not zeroed.
+func rawruneslice(size int) []rune
+//go:noescape
+func gogetcallerpc(p unsafe.Pointer) uintptr
+//go:noescape
+func racereadrangepc(addr unsafe.Pointer, len int, callpc, pc uintptr)
--- a/src/pkg/runtime/stubs.goc
+++ b/src/pkg/runtime/stubs.goc
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "../../cmd/ld/textflag.h"
+// This file contains functions called by Go but written
+// in C.  These functions are problematic for the garbage
+// collector and stack copier because we don't have
+// stack maps for them.  So we must ensure that the
+// garbage collector and stack copier cannot see these
+// frames.  So we impose the following invariants:
+// 1) Functions should be marked NOSPLIT and call
+//    out to only NOSPLIT functions (recursively).
+// 2) Functions should not block.
+// During conversion, we can still call out to splittable
+// functions.  But once conversion is done the invariants
+// above should hold.
+#pragma textflag NOSPLIT
+func rawstring(size intgo) (s String, b Slice) {
+	byte *p;
+	p = runtime·mallocgc(size, 0, FlagNoScan|FlagNoZero);
+	s.str = p;
+	s.len = size;
+	b.array = p;
+	b.len = size;
+	b.cap = size;
+}
+#pragma textflag NOSPLIT
+func rawbyteslice(size intgo) (b Slice) {
+	uintptr cap;
+	byte *p;
+	cap = runtime·roundupsize(size);
+	p = runtime·mallocgc(cap, 0, FlagNoScan|FlagNoZero);
+	if(cap != size)
+		runtime·memclr(p + size, cap - size);
+	b.array = p;
+	b.len = size;
+	b.cap = cap;
+}
+#pragma textflag NOSPLIT
+func rawruneslice(size intgo) (b Slice) {
+	uintptr mem;
+	byte *p;
+	if(size > MaxMem/sizeof(int32))
+		runtime·throw("out of memory");
+	mem = runtime·roundupsize(size*sizeof(int32));
+	p = runtime·mallocgc(mem, 0, FlagNoScan|FlagNoZero);
+	if(mem != size*sizeof(int32))
+		runtime·memclr(p + size*sizeof(int32), mem - size*sizeof(int32));
+	b.array = p;
+	b.len = size;
+	b.cap = mem/sizeof(int32);
+}