Commit d405bee0 authored by Daniel Black's avatar Daniel Black Committed by Monty

mysys: disable "optimized" memcpy from 18 years ago

MDEV-15843 mysys: remove optimized memcpy from 18 years ago

While this code has remained dormant for 18 years, libc implementers
have used assembly features to gain improvements using achitecture
features optimized and by the buffer length like:
* https://svnweb.freebsd.org/base/head/lib/libc/amd64/string/memcmp.S
* https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/aarch64/memcmp.S
* https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/powerpc/powerpc64/memcpy.S

From an sysbench-1.0.6 oltp_read_only test on binary charset table:

x86_64:
was ptr_compare_0:
perf report -g --no-children:
+    3.37%  mysqld   mysqld               [.] hp_rec_hashnr
+    3.15%  mysqld   libc-2.26.so         [.] __memmove_avx_unaligned_erms
+    2.73%  mysqld   mysqld               [.] row_search_mvcc
+    1.97%  mysqld   mysqld               [.] rec_get_offsets_func
+    1.24%  mysqld   mysqld               [.] ptr_compare_0
+    1.14%  mysqld   mysqld               [.] my_qsort2

After: __memcmp_avx2_movbe
+    3.42%  mysqld   mysqld               [.] hp_rec_hashnr
+    2.96%  mysqld   libc-2.26.so         [.] __memmove_avx_unaligned_erms
+    2.91%  mysqld   mysqld               [.] row_search_mvcc
+    2.13%  mysqld   mysqld               [.] rec_get_offsets_func
+    1.18%  mysqld   libc-2.26.so         [.] __memcmp_avx2_movbe
+    1.04%  mysqld   mysqld               [.] evaluate_join_record
+    1.02%  mysqld   mysqld               [.] my_qsort2

Power9:
Before: ptr_compare_0
+    4.24%  mysqld   mysqld               [.] _Z15row_search_mvccPh15page_cur_mode_tP14row_prebuilt_tmm
+    2.18%  mysqld   mysqld               [.] hp_rec_hashnr
+    2.07%  mysqld   mysqld               [.] _Z20rec_get_offsets_funcPKhPK12dict_index_tPmbmPP16mem_block_info_t
+    1.60%  mysqld   mysqld               [.] _ZL20evaluate_join_recordP4JOINP13st_join_tablei
+    1.20%  mysqld   mysqld               [.] _ZN11ha_innobase13general_fetchEPhjj
+    1.05%  mysqld   mysqld               [.] _ZN17Item_func_between15val_int_cmp_intEv
+    0.92%  mysqld   mysqld               [.] _Z40row_sel_field_store_in_mysql_format_funcPhPK17mysql_row_templ_tPKhm
+    0.91%  mysqld   mysqld               [.] _ZNK10Item_param6PValue7val_intEPK19Type_std_attributes
+    0.84%  mysqld   mysqld               [.] ptr_compare_0

After: __memcmp_power8
+    2.29%  mysqld           mysqld                  [.] _Z15row_search_mvccPh15page_cur_mode_tP14row_prebuilt_tmm
+    1.32%  mysqld           mysqld                  [.] hp_rec_hashnr
+    1.18%  swapper          [kernel.kallsyms]       [k] power_enter_stop
+    1.12%  mysqld           mysqld                  [.] _Z20rec_get_offsets_funcPKhPK12dict_index_tPmbmPP16mem_block_info_t
+    0.87%  mysqld           mysqld                  [.] _ZL20evaluate_join_recordP4JOINP13st_join_tablei
+    0.87%  mysqld           [kernel.kallsyms]       [k] ___bpf_prog_run
+    0.76%  mysqld           libc-2.26.so            [.] __memcmp_power8
+    0.68%  mysqld           mysqld                  [.] _ZN11ha_innobase13general_fetchEPhjj
+    0.58%  mysqld           mysqld                  [.] _ZN17Item_func_between15val_int_cmp_intEv
parent 1a4c355a
...@@ -27,14 +27,18 @@ ...@@ -27,14 +27,18 @@
* written in assembler. for example one in /usr/lib/libc/libc_hwcap*.so.1. * written in assembler. for example one in /usr/lib/libc/libc_hwcap*.so.1.
* on Solaris, or on Windows inside C runtime linrary. * on Solaris, or on Windows inside C runtime linrary.
* *
* On Solaris, native implementation is also usually faster than the * On Solaris, native implementation is also usually faster than the
* built-in memcmp supplied by GCC, so it is recommended to build * built-in memcmp supplied by GCC, so it is recommended to build
* with "-fno-builtin-memcmp"in CFLAGS if building with GCC on Solaris. * with "-fno-builtin-memcmp"in CFLAGS if building with GCC on Solaris.
*/ */
#if defined (__sun) || defined (_WIN32) /*
Daniel Blacks tests shows that libc memcmp is generally faster than
ptr_cmp() at least of x86 and power8 platforms, so we use the libc
code as deafult for now
*/
#define USE_NATIVE_MEMCMP 1 #define USE_NATIVE_MEMCMP 1
#endif
#ifdef USE_NATIVE_MEMCMP #ifdef USE_NATIVE_MEMCMP
...@@ -45,23 +49,19 @@ static int native_compare(size_t *length, unsigned char **a, unsigned char **b) ...@@ -45,23 +49,19 @@ static int native_compare(size_t *length, unsigned char **a, unsigned char **b)
return memcmp(*a, *b, *length); return memcmp(*a, *b, *length);
} }
#else /* USE_NATIVE_MEMCMP */ qsort2_cmp get_ptr_compare (size_t size __attribute__((unused)))
{
return (qsort2_cmp) native_compare;
}
#else /* USE_NATIVE_MEMCMP */
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b); static int ptr_compare(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_0(size_t *compare_length, uchar **a, uchar **b); static int ptr_compare_0(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_1(size_t *compare_length, uchar **a, uchar **b); static int ptr_compare_1(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_2(size_t *compare_length, uchar **a, uchar **b); static int ptr_compare_2(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_3(size_t *compare_length, uchar **a, uchar **b); static int ptr_compare_3(size_t *compare_length, uchar **a, uchar **b);
#endif /* __sun */
/* Get a pointer to a optimal byte-compare function for a given size */
#ifdef USE_NATIVE_MEMCMP
qsort2_cmp get_ptr_compare (size_t size __attribute__((unused)))
{
return (qsort2_cmp) native_compare;
}
#else
qsort2_cmp get_ptr_compare (size_t size) qsort2_cmp get_ptr_compare (size_t size)
{ {
if (size < 4) if (size < 4)
...@@ -74,9 +74,6 @@ qsort2_cmp get_ptr_compare (size_t size) ...@@ -74,9 +74,6 @@ qsort2_cmp get_ptr_compare (size_t size)
} }
return 0; /* Impossible */ return 0; /* Impossible */
} }
#endif /* USE_NATIVE_MEMCMP */
/* /*
Compare to keys to see witch is smaller. Compare to keys to see witch is smaller.
Loop unrolled to make it quick !! Loop unrolled to make it quick !!
...@@ -84,8 +81,6 @@ qsort2_cmp get_ptr_compare (size_t size) ...@@ -84,8 +81,6 @@ qsort2_cmp get_ptr_compare (size_t size)
#define cmp(N) if (first[N] != last[N]) return (int) first[N] - (int) last[N] #define cmp(N) if (first[N] != last[N]) return (int) first[N] - (int) last[N]
#ifndef USE_NATIVE_MEMCMP
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b) static int ptr_compare(size_t *compare_length, uchar **a, uchar **b)
{ {
size_t length= *compare_length; size_t length= *compare_length;
...@@ -189,7 +184,7 @@ static int ptr_compare_3(size_t *compare_length,uchar **a, uchar **b) ...@@ -189,7 +184,7 @@ static int ptr_compare_3(size_t *compare_length,uchar **a, uchar **b)
return (0); return (0);
} }
#endif /* !__sun */ #endif /* USE_NATIVE_MEMCMP */
void my_store_ptr(uchar *buff, size_t pack_length, my_off_t pos) void my_store_ptr(uchar *buff, size_t pack_length, my_off_t pos)
{ {
...@@ -227,4 +222,3 @@ my_off_t my_get_ptr(uchar *ptr, size_t pack_length) ...@@ -227,4 +222,3 @@ my_off_t my_get_ptr(uchar *ptr, size_t pack_length)
} }
return pos; return pos;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment