Commit fdf42896 authored by Ma, Ling's avatar Ma, Ling Committed by H. Peter Anvin

x86, mem: Don't implement forward memmove() as memcpy()

memmove() allow source and destination address to be overlap, but
there is no such limitation for memcpy().  Therefore, explicitly
implement memmove() in both the forwards and backward directions, to
give us the ability to optimize memcpy().
Signed-off-by: default avatarMa Ling <ling.ma@intel.com>
LKML-Reference: <C10D3FB0CD45994C8A51FEC1227CE22F0E483AD86A@shsmsx502.ccr.corp.intel.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 76be97c1
...@@ -25,19 +25,35 @@ void *memmove(void *dest, const void *src, size_t n) ...@@ -25,19 +25,35 @@ void *memmove(void *dest, const void *src, size_t n)
int d0, d1, d2; int d0, d1, d2;
if (dest < src) { if (dest < src) {
memcpy(dest, src, n); if ((dest + n) < src)
return memcpy(dest, src, n);
else
__asm__ __volatile__(
"rep\n\t"
"movsb\n\t"
: "=&c" (d0), "=&S" (d1), "=&D" (d2)
:"0" (n),
"1" (src),
"2" (dest)
:"memory");
} else { } else {
__asm__ __volatile__(
"std\n\t" if((src + count) < dest)
"rep\n\t" return memcpy(dest, src, count);
"movsb\n\t" else
"cld" __asm__ __volatile__(
: "=&c" (d0), "=&S" (d1), "=&D" (d2) "std\n\t"
:"0" (n), "rep\n\t"
"1" (n-1+src), "movsb\n\t"
"2" (n-1+dest) "cld"
:"memory"); : "=&c" (d0), "=&S" (d1), "=&D" (d2)
:"0" (n),
"1" (n-1+src),
"2" (n-1+dest)
:"memory");
} }
return dest; return dest;
} }
EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memmove);
...@@ -8,13 +8,49 @@ ...@@ -8,13 +8,49 @@
#undef memmove #undef memmove
void *memmove(void *dest, const void *src, size_t count) void *memmove(void *dest, const void *src, size_t count)
{ {
unsigned long d0, d1, d2, d3;
if (dest < src) { if (dest < src) {
return memcpy(dest, src, count); if ((dest + count) < src)
return memcpy(dest, src, count);
else
__asm__ __volatile__(
"movq %0, %3\n\t"
"shr $3, %0\n\t"
"andq $7, %3\n\t"
"rep\n\t"
"movsq\n\t"
"movq %3, %0\n\t"
"rep\n\t"
"movsb"
: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
:"0" (count),
"1" (src),
"2" (dest)
:"memory");
} else { } else {
char *p = dest + count; if((src + count) < dest)
const char *s = src + count; return memcpy(dest, src, count);
while (count--) else
*--p = *--s; __asm__ __volatile__(
"movq %0, %3\n\t"
"lea -8(%1, %0), %1\n\t"
"lea -8(%2, %0), %2\n\t"
"shr $3, %0\n\t"
"andq $7, %3\n\t"
"std\n\t"
"rep\n\t"
"movsq\n\t"
"lea 7(%1), %1\n\t"
"lea 7(%2), %2\n\t"
"movq %3, %0\n\t"
"rep\n\t"
"movsb\n\t"
"cld"
: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
:"0" (count),
"1" (src),
"2" (dest)
:"memory");
} }
return dest; return dest;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment