Commit d167a518 authored by Gerd Hoffmann's avatar Gerd Hoffmann Committed by Linus Torvalds

[PATCH] x86_64: x86_64 version of the smp alternative patch.

Changes are largely identical to the i386 version:

 * alternative #define are moved to the new alternative.h file.
 * one new elf section with pointers to the lock prefixes which can be
   nop'ed out for non-smp.
 * two new elf sections simliar to the "classic" alternatives to
   replace SMP code with simpler UP code.
 * fixup headers to use alternative.h instead of defining their own
   LOCK / LOCK_PREFIX macros.

The patch reuses the i386 version of the alternatives code to avoid code
duplication.  The code in alternatives.c was shuffled around a bit to
reduce the number of #ifdefs needed.  It also got some tweaks needed for
x86_64 (vsyscall page handling) and new features (noreplacement option
which was x86_64 only up to now).  Debug printk's are changed from
compile-time to runtime.

Loosely based on a early version from Bastian Blank <waldi@debian.org>
Signed-off-by: default avatarGerd Hoffmann <kraxel@suse.de>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 240cd6a8
......@@ -4,27 +4,41 @@
#include <asm/alternative.h>
#include <asm/sections.h>
#define DEBUG 0
#if DEBUG
# define DPRINTK(fmt, args...) printk(fmt, args)
#else
# define DPRINTK(fmt, args...)
#endif
static int no_replacement = 0;
static int smp_alt_once = 0;
static int debug_alternative = 0;
static int __init noreplacement_setup(char *s)
{
no_replacement = 1;
return 1;
}
static int __init bootonly(char *str)
{
smp_alt_once = 1;
return 1;
}
static int __init debug_alt(char *str)
{
debug_alternative = 1;
return 1;
}
__setup("noreplacement", noreplacement_setup);
__setup("smp-alt-boot", bootonly);
__setup("debug-alternative", debug_alt);
#define DPRINTK(fmt, args...) if (debug_alternative) \
printk(KERN_DEBUG fmt, args)
#ifdef GENERIC_NOP1
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
asm("\t.data\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8);
asm("\t.data\nk7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
K7_NOP7 K7_NOP8);
extern unsigned char intelnops[], k8nops[], k7nops[];
extern unsigned char intelnops[];
static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
......@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
intelnops + 1 + 2 + 3 + 4 + 5 + 6,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
#endif
#ifdef K8_NOP1
asm("\t.data\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8);
extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
......@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
#endif
#ifdef K7_NOP1
asm("\t.data\nk7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
K7_NOP7 K7_NOP8);
extern unsigned char k7nops[];
static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
......@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
k7nops + 1 + 2 + 3 + 4 + 5 + 6,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
#endif
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
static inline unsigned char** find_nop_table(void)
{
return k8_nops;
}
#else /* CONFIG_X86_64 */
static struct nop {
int cpuid;
unsigned char **noptable;
......@@ -67,14 +107,6 @@ static struct nop {
{ -1, NULL }
};
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
extern u8 *__smp_locks[], *__smp_locks_end[];
extern u8 __smp_alt_begin[], __smp_alt_end[];
static unsigned char** find_nop_table(void)
{
unsigned char **noptable = intel_nops;
......@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
return noptable;
}
#endif /* CONFIG_X86_64 */
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
extern u8 *__smp_locks[], *__smp_locks_end[];
extern u8 __smp_alt_begin[], __smp_alt_end[];
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
......@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
unsigned char **noptable = find_nop_table();
struct alt_instr *a;
u8 *instr;
int diff, i, k;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
......@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
BUG_ON(a->replacementlen > a->instrlen);
if (!boot_cpu_has(a->cpuid))
continue;
memcpy(a->instr, a->replacement, a->replacementlen);
instr = a->instr;
#ifdef CONFIG_X86_64
/* vsyscall code is not mapped yet. resolve it manually. */
if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
DPRINTK("%s: vsyscall fixup: %p => %p\n",
__FUNCTION__, a->instr, instr);
}
#endif
memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
......@@ -186,14 +236,6 @@ struct smp_alt_module {
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
static int smp_alt_once = 0;
static int __init bootonly(char *str)
{
smp_alt_once = 1;
return 1;
}
__setup("smp-alt-boot", bootonly);
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end)
......@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
struct smp_alt_module *smp;
unsigned long flags;
if (no_replacement)
return;
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
......@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
struct smp_alt_module *item;
unsigned long flags;
if (smp_alt_once)
if (no_replacement || smp_alt_once)
return;
spin_lock_irqsave(&smp_alt, flags);
......@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
struct smp_alt_module *mod;
unsigned long flags;
if (smp_alt_once)
if (no_replacement || smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
......@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
void __init alternative_instructions(void)
{
if (no_replacement) {
printk(KERN_INFO "(SMP-)alternatives turned off\n");
free_init_pages("SMP alternatives",
(unsigned long)__smp_alt_begin,
(unsigned long)__smp_alt_end);
return;
}
apply_alternatives(__alt_instructions, __alt_instructions_end);
/* switch to patch-once-at-boottime-only mode and free the
......
......@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o
pci-dma.o pci-nommu.o alternative.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
......@@ -49,3 +49,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
alternative-y += ../../i386/kernel/alternative.o
......@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
return -ENOSYS;
}
extern void apply_alternatives(void *start, void *end);
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
const Elf_Shdr *sechdrs,
struct module *me)
{
const Elf_Shdr *s;
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
/* look for .altinstructions to patch */
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
void *seg;
if (strcmp(".altinstructions", secstrings + s->sh_name))
continue;
seg = (void *)s->sh_addr;
apply_alternatives(seg, seg + s->sh_size);
}
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".text", secstrings + s->sh_name))
text = s;
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks= s;
}
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size);
}
if (locks && text) {
void *lseg = (void *)locks->sh_addr;
void *tseg = (void *)text->sh_addr;
alternatives_smp_module_add(me, me->name,
lseg, lseg + locks->sh_size,
tseg, tseg + text->sh_size);
}
return 0;
}
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
}
......@@ -473,80 +473,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
}
#endif
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8);
extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
k8nops + 1,
k8nops + 1 + 2,
k8nops + 1 + 2 + 3,
k8nops + 1 + 2 + 3 + 4,
k8nops + 1 + 2 + 3 + 4 + 5,
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
extern char __vsyscall_0;
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
APs have less capabilities than the boot processor are not handled.
In this case boot with "noreplacement". */
void apply_alternatives(void *start, void *end)
{
struct alt_instr *a;
int diff, i, k;
for (a = start; (void *)a < end; a++) {
u8 *instr;
if (!boot_cpu_has(a->cpuid))
continue;
BUG_ON(a->replacementlen > a->instrlen);
instr = a->instr;
/* vsyscall code is not mapped yet. resolve it manually. */
if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
__inline_memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
k = diff;
if (k > ASM_NOP_MAX)
k = ASM_NOP_MAX;
__inline_memcpy(instr + i, k8_nops[k], k);
}
}
}
static int no_replacement __initdata = 0;
void __init alternative_instructions(void)
{
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
if (no_replacement)
return;
apply_alternatives(__alt_instructions, __alt_instructions_end);
}
static int __init noreplacement_setup(char *s)
{
no_replacement = 1;
return 1;
}
__setup("noreplacement", noreplacement_setup);
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
#ifdef CONFIG_EDD_MODULE
......@@ -1303,7 +1229,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* Other (Linux-defined) */
"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
"constant_tsc", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
"up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
......
......@@ -797,6 +797,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
}
alternatives_smp_switch(1);
c_idle.idle = get_idle_for_cpu(cpu);
if (c_idle.idle) {
......@@ -1259,6 +1261,8 @@ void __cpu_die(unsigned int cpu)
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
printk ("CPU %d is now offline\n", cpu);
if (1 == num_online_cpus())
alternatives_smp_switch(0);
return;
}
msleep(100);
......
......@@ -131,6 +131,26 @@ SECTIONS
*(.data.page_aligned)
}
/* might get freed after init */
. = ALIGN(4096);
__smp_alt_begin = .;
__smp_alt_instructions = .;
.smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
*(.smp_altinstructions)
}
__smp_alt_instructions_end = .;
. = ALIGN(8);
__smp_locks = .;
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
*(.smp_locks)
}
__smp_locks_end = .;
.smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
*(.smp_altinstr_replacement)
}
. = ALIGN(4096);
__smp_alt_end = .;
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
......
......@@ -644,20 +644,29 @@ void __init mem_init(void)
#endif
}
void free_initmem(void)
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
unsigned long addr;
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
if (begin >= end)
return;
printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
for (addr = begin; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
}
}
void free_initmem(void)
{
memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
free_init_pages("unused kernel memory",
(unsigned long)(&__init_begin),
(unsigned long)(&__init_end));
}
#ifdef CONFIG_DEBUG_RODATA
......@@ -686,15 +695,7 @@ void mark_rodata_ro(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
if (start >= end)
return;
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
free_init_pages("initrd memory", start, end);
}
#endif
......
......@@ -5,6 +5,8 @@
#include <asm/types.h>
#include <linux/types.h>
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
......
#ifndef _X86_64_ALTERNATIVE_H
#define _X86_64_ALTERNATIVE_H
#ifdef __KERNEL__
#include <linux/types.h>
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
u8 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
u8 pad[5];
};
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
struct module;
extern void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end);
extern void alternatives_smp_module_del(struct module *mod);
extern void alternatives_smp_switch(int smp);
#endif
/*
* Alternative instructions for different CPU types or capabilities.
*
* This allows to use optimized instructions even on generic binary
* kernels.
*
* length of oldinstr must be longer or equal the length of newinstr
* It can be padded with nops as needed.
*
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, feature) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
".previous" :: "i" (feature) : "memory")
/*
* Alternative inline assembly with input.
*
* Pecularities:
* No memory clobber here.
* Argument numbers start with 1.
* Best is to use constraints that are fixed size (like (%1) ... "r")
* If you use variable sized constraints like "m" or "g" in the
* replacement make sure to pad to the worst case length.
*/
#define alternative_input(oldinstr, newinstr, feature, input...) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
".previous" :: "i" (feature), ##input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte %c[feat]\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
".previous" : output : [feat] "i" (feature), ##input)
/*
* Alternative inline assembly for SMP.
*
* alternative_smp() takes two versions (SMP first, UP second) and is
* for more complex stuff such as spinlocks.
*
* The LOCK_PREFIX macro defined here replaces the LOCK and
* LOCK_PREFIX macros used everywhere in the source tree.
*
* SMP alternatives use the same data structures as the other
* alternatives and the X86_FEATURE_UP flag to indicate the case of a
* UP system running a SMP kernel. The existing apply_alternatives()
* works fine for patching a SMP kernel for UP.
*
* The SMP alternative tables can be kept after boot and contain both
* UP and SMP versions of the instructions to allow switching back to
* SMP at runtime, when hotplugging in a new CPU, which is especially
* useful in virtualized environments.
*
* The very common lock prefix is handled as special case in a
* separate table which is a pure address list without replacement ptr
* and size information. That keeps the table sizes small.
*/
#ifdef CONFIG_SMP
#define alternative_smp(smpinstr, upinstr, args...) \
asm volatile ("661:\n\t" smpinstr "\n662:\n" \
".section .smp_altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte 0x66\n" /* X86_FEATURE_UP */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .smp_altinstr_replacement,\"awx\"\n" \
"663:\n\t" upinstr "\n" /* replacement */ \
"664:\n\t.fill 662b-661b,1,0x42\n" /* space for original */ \
".previous" : args)
#define LOCK_PREFIX \
".section .smp_locks,\"a\"\n" \
" .align 8\n" \
" .quad 661f\n" /* address */ \
".previous\n" \
"661:\n\tlock; "
#else /* ! CONFIG_SMP */
#define alternative_smp(smpinstr, upinstr, args...) \
asm volatile (upinstr : args)
#define LOCK_PREFIX ""
#endif
#endif /* _X86_64_ALTERNATIVE_H */
#ifndef __ARCH_X86_64_ATOMIC__
#define __ARCH_X86_64_ATOMIC__
#include <asm/types.h>
#include <asm/alternative.h>
/* atomic_t should be 32 bit signed type */
......@@ -52,7 +52,7 @@ typedef struct { volatile int counter; } atomic_t;
static __inline__ void atomic_add(int i, atomic_t *v)
{
__asm__ __volatile__(
LOCK "addl %1,%0"
LOCK_PREFIX "addl %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
......@@ -67,7 +67,7 @@ static __inline__ void atomic_add(int i, atomic_t *v)
static __inline__ void atomic_sub(int i, atomic_t *v)
{
__asm__ __volatile__(
LOCK "subl %1,%0"
LOCK_PREFIX "subl %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
......@@ -86,7 +86,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "subl %2,%0; sete %1"
LOCK_PREFIX "subl %2,%0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
......@@ -101,7 +101,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
static __inline__ void atomic_inc(atomic_t *v)
{
__asm__ __volatile__(
LOCK "incl %0"
LOCK_PREFIX "incl %0"
:"=m" (v->counter)
:"m" (v->counter));
}
......@@ -115,7 +115,7 @@ static __inline__ void atomic_inc(atomic_t *v)
static __inline__ void atomic_dec(atomic_t *v)
{
__asm__ __volatile__(
LOCK "decl %0"
LOCK_PREFIX "decl %0"
:"=m" (v->counter)
:"m" (v->counter));
}
......@@ -133,7 +133,7 @@ static __inline__ int atomic_dec_and_test(atomic_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "decl %0; sete %1"
LOCK_PREFIX "decl %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
......@@ -152,7 +152,7 @@ static __inline__ int atomic_inc_and_test(atomic_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "incl %0; sete %1"
LOCK_PREFIX "incl %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
......@@ -172,7 +172,7 @@ static __inline__ int atomic_add_negative(int i, atomic_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "addl %2,%0; sets %1"
LOCK_PREFIX "addl %2,%0; sets %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
......@@ -189,7 +189,7 @@ static __inline__ int atomic_add_return(int i, atomic_t *v)
{
int __i = i;
__asm__ __volatile__(
LOCK "xaddl %0, %1;"
LOCK_PREFIX "xaddl %0, %1;"
:"=r"(i)
:"m"(v->counter), "0"(i));
return i + __i;
......@@ -237,7 +237,7 @@ typedef struct { volatile long counter; } atomic64_t;
static __inline__ void atomic64_add(long i, atomic64_t *v)
{
__asm__ __volatile__(
LOCK "addq %1,%0"
LOCK_PREFIX "addq %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
......@@ -252,7 +252,7 @@ static __inline__ void atomic64_add(long i, atomic64_t *v)
static __inline__ void atomic64_sub(long i, atomic64_t *v)
{
__asm__ __volatile__(
LOCK "subq %1,%0"
LOCK_PREFIX "subq %1,%0"
:"=m" (v->counter)
:"ir" (i), "m" (v->counter));
}
......@@ -271,7 +271,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "subq %2,%0; sete %1"
LOCK_PREFIX "subq %2,%0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
......@@ -286,7 +286,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v)
static __inline__ void atomic64_inc(atomic64_t *v)
{
__asm__ __volatile__(
LOCK "incq %0"
LOCK_PREFIX "incq %0"
:"=m" (v->counter)
:"m" (v->counter));
}
......@@ -300,7 +300,7 @@ static __inline__ void atomic64_inc(atomic64_t *v)
static __inline__ void atomic64_dec(atomic64_t *v)
{
__asm__ __volatile__(
LOCK "decq %0"
LOCK_PREFIX "decq %0"
:"=m" (v->counter)
:"m" (v->counter));
}
......@@ -318,7 +318,7 @@ static __inline__ int atomic64_dec_and_test(atomic64_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "decq %0; sete %1"
LOCK_PREFIX "decq %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
......@@ -337,7 +337,7 @@ static __inline__ int atomic64_inc_and_test(atomic64_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "incq %0; sete %1"
LOCK_PREFIX "incq %0; sete %1"
:"=m" (v->counter), "=qm" (c)
:"m" (v->counter) : "memory");
return c != 0;
......@@ -357,7 +357,7 @@ static __inline__ int atomic64_add_negative(long i, atomic64_t *v)
unsigned char c;
__asm__ __volatile__(
LOCK "addq %2,%0; sets %1"
LOCK_PREFIX "addq %2,%0; sets %1"
:"=m" (v->counter), "=qm" (c)
:"ir" (i), "m" (v->counter) : "memory");
return c;
......@@ -374,7 +374,7 @@ static __inline__ long atomic64_add_return(long i, atomic64_t *v)
{
long __i = i;
__asm__ __volatile__(
LOCK "xaddq %0, %1;"
LOCK_PREFIX "xaddq %0, %1;"
:"=r"(i)
:"m"(v->counter), "0"(i));
return i + __i;
......@@ -418,11 +418,11 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t *v)
/* These are x86-specific, used by some header files */
#define atomic_clear_mask(mask, addr) \
__asm__ __volatile__(LOCK "andl %0,%1" \
__asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \
: : "r" (~(mask)),"m" (*addr) : "memory")
#define atomic_set_mask(mask, addr) \
__asm__ __volatile__(LOCK "orl %0,%1" \
__asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \
: : "r" ((unsigned)mask),"m" (*(addr)) : "memory")
/* Atomic operations are already serializing on x86 */
......
......@@ -5,12 +5,7 @@
* Copyright 1992, Linus Torvalds.
*/
#ifdef CONFIG_SMP
#define LOCK_PREFIX "lock ; "
#else
#define LOCK_PREFIX ""
#endif
#include <asm/alternative.h>
#define ADDR (*(volatile long *) addr)
......
......@@ -65,6 +65,8 @@
#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */
#define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */
#define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
......
......@@ -24,7 +24,7 @@ do { \
typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
\
__asm__ __volatile__( \
LOCK " decl (%%rdi) \n" \
LOCK_PREFIX " decl (%%rdi) \n" \
" js 2f \n" \
"1: \n" \
\
......@@ -74,7 +74,7 @@ do { \
typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
\
__asm__ __volatile__( \
LOCK " incl (%%rdi) \n" \
LOCK_PREFIX " incl (%%rdi) \n" \
" jle 2f \n" \
"1: \n" \
\
......
......@@ -24,7 +24,7 @@
#define RW_LOCK_BIAS_STR "0x01000000"
#define __build_read_lock_ptr(rw, helper) \
asm volatile(LOCK "subl $1,(%0)\n\t" \
asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \
"js 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
......@@ -34,7 +34,7 @@
::"a" (rw) : "memory")
#define __build_read_lock_const(rw, helper) \
asm volatile(LOCK "subl $1,%0\n\t" \
asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \
"js 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
......@@ -54,7 +54,7 @@
} while (0)
#define __build_write_lock_ptr(rw, helper) \
asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
"jnz 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
......@@ -64,7 +64,7 @@
::"a" (rw) : "memory")
#define __build_write_lock_const(rw, helper) \
asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
"jnz 2f\n" \
"1:\n" \
LOCK_SECTION_START("") \
......
......@@ -106,7 +106,7 @@ static inline void down(struct semaphore * sem)
__asm__ __volatile__(
"# atomic down operation\n\t"
LOCK "decl %0\n\t" /* --sem->count */
LOCK_PREFIX "decl %0\n\t" /* --sem->count */
"js 2f\n"
"1:\n"
LOCK_SECTION_START("")
......@@ -130,7 +130,7 @@ static inline int down_interruptible(struct semaphore * sem)
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */
LOCK_PREFIX "decl %1\n\t" /* --sem->count */
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
......@@ -154,7 +154,7 @@ static inline int down_trylock(struct semaphore * sem)
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */
LOCK_PREFIX "decl %1\n\t" /* --sem->count */
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
......@@ -178,7 +178,7 @@ static inline void up(struct semaphore * sem)
{
__asm__ __volatile__(
"# atomic up operation\n\t"
LOCK "incl %0\n\t" /* ++sem->count */
LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
"jle 2f\n"
"1:\n"
LOCK_SECTION_START("")
......
......@@ -31,15 +31,19 @@
"jmp 1b\n" \
LOCK_SECTION_END
#define __raw_spin_lock_string_up \
"\n\tdecl %0"
#define __raw_spin_unlock_string \
"movl $1,%0" \
:"=m" (lock->slock) : : "memory"
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
__asm__ __volatile__(
__raw_spin_lock_string
:"=m" (lock->slock) : : "memory");
alternative_smp(
__raw_spin_lock_string,
__raw_spin_lock_string_up,
"=m" (lock->slock) : : "memory");
}
#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
......
......@@ -3,15 +3,10 @@
#include <linux/kernel.h>
#include <asm/segment.h>
#include <asm/alternative.h>
#ifdef __KERNEL__
#ifdef CONFIG_SMP
#define LOCK_PREFIX "lock ; "
#else
#define LOCK_PREFIX ""
#endif
#define __STR(x) #x
#define STR(x) __STR(x)
......@@ -34,7 +29,7 @@
"thread_return:\n\t" \
"movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
"jc ret_from_fork\n\t" \
RESTORE_CONTEXT \
......@@ -69,82 +64,6 @@ extern void load_gs_index(unsigned);
".previous" \
: :"r" (value), "r" (0))
#ifdef __KERNEL__
struct alt_instr {
__u8 *instr; /* original instruction */
__u8 *replacement;
__u8 cpuid; /* cpuid bit set for replacement */
__u8 instrlen; /* length of original instruction */
__u8 replacementlen; /* length of new instruction, <= instrlen */
__u8 pad[5];
};
#endif
/*
* Alternative instructions for different CPU types or capabilities.
*
* This allows to use optimized instructions even on generic binary
* kernels.
*
* length of oldinstr must be longer or equal the length of newinstr
* It can be padded with nops as needed.
*
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, feature) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
".previous" :: "i" (feature) : "memory")
/*
* Alternative inline assembly with input.
*
* Peculiarities:
* No memory clobber here.
* Argument numbers start with 1.
* Best is to use constraints that are fixed size (like (%1) ... "r")
* If you use variable sized constraints like "m" or "g" in the
* replacement make sure to pad to the worst case length.
*/
#define alternative_input(oldinstr, newinstr, feature, input...) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte %c0\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
".previous" :: "i" (feature), ##input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 8\n" \
" .quad 661b\n" /* label */ \
" .quad 663f\n" /* new instruction */ \
" .byte %c[feat]\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
".previous" : output : [feat] "i" (feature), ##input)
/*
* Clear and set 'TS' bit respectively
*/
......@@ -366,5 +285,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
void cpu_idle_wait(void);
extern unsigned long arch_align_stack(unsigned long sp);
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment