Commit 2b93c4dd authored by Rémy Oudompheng's avatar Rémy Oudompheng

cmd/6g: faster memmove/memset-like code using unaligned load/stores.

This changes makes sgen and clearfat use unaligned instructions for
the trailing bytes, like the runtime memmove does, resulting in faster
code when manipulating types whose size is not a multiple of 8.

LGTM=khr
R=khr, iant, rsc
CC=golang-codereviews
https://golang.org/cl/51740044
parent d0591d5e
...@@ -1436,15 +1436,34 @@ sgen(Node *n, Node *ns, int64 w) ...@@ -1436,15 +1436,34 @@ sgen(Node *n, Node *ns, int64 w)
gins(AMOVSQ, N, N); // MOVQ *(SI)+,*(DI)+ gins(AMOVSQ, N, N); // MOVQ *(SI)+,*(DI)+
q--; q--;
} }
// copy the remaining c bytes
if(c >= 4) { if(w < 4 || c <= 1 || (odst < osrc && osrc < odst+w)) {
gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+
c -= 4;
}
while(c > 0) { while(c > 0) {
gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+ gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+
c--; c--;
} }
} else if(w < 8 || c <= 4) {
nodsi.op = OINDREG;
noddi.op = OINDREG;
nodsi.type = types[TINT32];
noddi.type = types[TINT32];
if(c > 4) {
nodsi.xoffset = 0;
noddi.xoffset = 0;
gmove(&nodsi, &noddi);
}
nodsi.xoffset = c-4;
noddi.xoffset = c-4;
gmove(&nodsi, &noddi);
} else {
nodsi.op = OINDREG;
noddi.op = OINDREG;
nodsi.type = types[TINT64];
noddi.type = types[TINT64];
nodsi.xoffset = c-8;
noddi.xoffset = c-8;
gmove(&nodsi, &noddi);
}
} }
restx(&cx, &oldcx); restx(&cx, &oldcx);
......
...@@ -1016,7 +1016,8 @@ void ...@@ -1016,7 +1016,8 @@ void
clearfat(Node *nl) clearfat(Node *nl)
{ {
int64 w, c, q; int64 w, c, q;
Node n1, oldn1, ax, oldax; Node n1, oldn1, ax, oldax, di, z;
Prog *p;
/* clear a fat object */ /* clear a fat object */
if(debug['g']) if(debug['g'])
...@@ -1048,10 +1049,23 @@ clearfat(Node *nl) ...@@ -1048,10 +1049,23 @@ clearfat(Node *nl)
q--; q--;
} }
if(c >= 4) { z = ax;
gconreg(AMOVQ, c, D_CX); di = n1;
gins(AREP, N, N); // repeat if(w >= 8 && c >= 4) {
gins(ASTOSB, N, N); // STOB AL,*(DI)+ di.op = OINDREG;
di.type = z.type = types[TINT64];
p = gins(AMOVQ, &z, &di);
p->to.scale = 1;
p->to.offset = c-8;
} else if(c >= 4) {
di.op = OINDREG;
di.type = z.type = types[TINT32];
p = gins(AMOVL, &z, &di);
if(c > 4) {
p = gins(AMOVL, &z, &di);
p->to.scale = 1;
p->to.offset = c-4;
}
} else } else
while(c > 0) { while(c > 0) {
gins(ASTOSB, N, N); // STOB AL,*(DI)+ gins(ASTOSB, N, N); // STOB AL,*(DI)+
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment