Commit b3346c2f authored by Sergey Vojtovich's avatar Sergey Vojtovich Committed by Monty

Restore LF_BACKOFF

Moved InnoDB UT_RELAX_CPU() to server. Restored cross-platform LF_BACKOFF
implementation basing on UT_RELAX_CPU().
parent 07e9ff1f
......@@ -90,37 +90,7 @@ C_MODE_END
ret= 0; /* avoid compiler warning */ \
ret= IL_COMP_EXCHG ## S (a, ret, ret);
#endif
/*
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
spin loops also on non-HT machines to reduce power consumption (see e.g
http://softwarecommunity.intel.com/articles/eng/2004.htm)
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
#ifndef YIELD_LOOPS
#define YIELD_LOOPS 200
#endif
static __inline int my_yield_processor()
{
int i;
for(i=0; i<YIELD_LOOPS; i++)
{
#if (_MSC_VER <= 1310)
/* On older compilers YieldProcessor is not available, use inline assembly*/
__asm { rep nop }
#else
YieldProcessor();
#endif
}
return 1;
}
#define LF_BACKOFF my_yield_processor()
#else /* cleanup */
#undef IL_EXCHG_ADD32
......
......@@ -17,6 +17,7 @@
#define INCLUDE_LF_INCLUDED
#include <my_atomic.h>
#include <my_cpu.h>
C_MODE_START
......
......@@ -346,15 +346,6 @@ make_atomic_store(ptr)
#undef make_atomic_fas_body
#undef intptr
/*
the macro below defines (as an expression) the code that
will be run in spin-loops. Intel manuals recummend to have PAUSE there.
It is expected to be defined in include/atomic/ *.h files
*/
#ifndef LF_BACKOFF
#define LF_BACKOFF (1)
#endif
#define MY_ATOMIC_OK 0
#define MY_ATOMIC_NOT_1CPU 1
extern int my_atomic_initialize();
......
#ifndef MY_CPU_INCLUDED
#define MY_CPU_INCLUDED
/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
This program is free software; you can redistribute it and/or modify
......@@ -42,3 +44,58 @@
#define HMT_medium_high()
#define HMT_high()
#endif
static inline void MY_RELAX_CPU(void)
{
#ifdef HAVE_PAUSE_INSTRUCTION
/*
According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory).
*/
#ifdef __SUNPRO_CC
asm ("pause" );
#else
__asm__ __volatile__ ("pause");
#endif
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
__asm__ __volatile__ ("rep; nop");
#elif defined _WIN32
/*
In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor.
*/
YieldProcessor();
#elif defined(_ARCH_PWR8)
__ppc_get_timebase();
#else
int32 var, oldval = 0;
my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
MY_MEMORY_ORDER_RELAXED);
#endif
}
/*
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
recommends to use it in spin loops also on non-HT machines to reduce power
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
static inline int LF_BACKOFF(void)
{
int i;
for (i= 0; i < 200; i++)
MY_RELAX_CPU();
return 1;
}
#endif
......@@ -430,7 +430,7 @@ static void alloc_free(uchar *first,
{
anext_node(last)= tmp.node;
} while (!my_atomic_casptr((void **)(char *)&allocator->top,
(void **)&tmp.ptr, first) && LF_BACKOFF);
(void **)&tmp.ptr, first) && LF_BACKOFF());
}
/*
......@@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins)
{
node= allocator->top;
lf_pin(pins, 0, node);
} while (node != allocator->top && LF_BACKOFF);
} while (node != allocator->top && LF_BACKOFF());
if (!node)
{
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
......
......@@ -102,7 +102,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
do { /* PTR() isn't necessary below, head is a dummy node */
cursor->curr= (LF_SLIST *)(*cursor->prev);
lf_pin(pins, 1, cursor->curr);
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF());
for (;;)
{
......@@ -117,7 +117,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
link= cursor->curr->link;
cursor->next= PTR(link);
lf_pin(pins, 0, cursor->next);
} while (link != cursor->curr->link && LF_BACKOFF);
} while (link != cursor->curr->link && LF_BACKOFF());
if (!DELETED(link))
{
......@@ -145,7 +145,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
and remove this deleted node
*/
if (my_atomic_casptr((void **) cursor->prev,
(void **) &cursor->curr, cursor->next) && LF_BACKOFF)
(void **) &cursor->curr, cursor->next) && LF_BACKOFF())
lf_alloc_free(pins, cursor->curr);
else
goto retry;
......
......@@ -617,7 +617,7 @@ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
{
rc= *shared_ptr;
lf_pin(arg->thd->pins, 0, rc);
} while (rc != *shared_ptr && LF_BACKOFF);
} while (rc != *shared_ptr && LF_BACKOFF());
if (rc == 0)
{
......
......@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
#include "univ.i"
#include "ut0ut.h"
#include "my_cpu.h"
/** Execute a given function exactly once in a multi-threaded environment
or wait for the function to be executed by another thread.
......@@ -110,7 +111,7 @@ class os_once {
ut_error;
}
UT_RELAX_CPU();
MY_RELAX_CPU();
}
}
}
......
......@@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri
/** Time stamp */
typedef time_t ib_time_t;
#ifdef HAVE_PAUSE_INSTRUCTION
/* According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory). */
# ifdef __SUNPRO_CC
# define UT_RELAX_CPU() asm ("pause" )
# else
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
# endif /* __SUNPRO_CC */
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
#elif defined _WIN32
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor. */
# define UT_RELAX_CPU() YieldProcessor()
#elif defined(__powerpc__) && defined __GLIBC__
# include <sys/platform/ppc.h>
# define UT_RELAX_CPU() __ppc_get_timebase()
#else
# define UT_RELAX_CPU() do { \
volatile int32 volatile_var; \
int32 oldval= 0; \
my_atomic_cas32(&volatile_var, &oldval, 1); \
} while (0)
#endif
#if defined (__GNUC__)
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
#elif defined (_MSC_VER)
......
......@@ -293,7 +293,7 @@ ut_delay(
UT_LOW_PRIORITY_CPU();
for (i = 0; i < delay * 50; i++) {
UT_RELAX_CPU();
MY_RELAX_CPU();
UT_COMPILER_BARRIER();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment