Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
a31a4dea
Commit
a31a4dea
authored
Apr 29, 2003
by
Andi Kleen
Committed by
Christoph Hellwig
Apr 29, 2003
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PATCH] Update alt_instr to handle SSE2 prefetch and better nops
parent
946f68b9
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
207 additions
and
48 deletions
+207
-48
arch/i386/Kconfig
arch/i386/Kconfig
+8
-11
arch/i386/kernel/cpu/amd.c
arch/i386/kernel/cpu/amd.c
+9
-0
arch/i386/kernel/cpu/intel.c
arch/i386/kernel/cpu/intel.c
+5
-0
arch/i386/kernel/setup.c
arch/i386/kernel/setup.c
+69
-19
arch/i386/vmlinux.lds.S
arch/i386/vmlinux.lds.S
+1
-0
include/asm-i386/cpufeature.h
include/asm-i386/cpufeature.h
+5
-0
include/asm-i386/processor.h
include/asm-i386/processor.h
+75
-13
include/asm-i386/system.h
include/asm-i386/system.h
+35
-5
No files found.
arch/i386/Kconfig
View file @
a31a4dea
...
@@ -273,6 +273,13 @@ config MVIAC3_2
...
@@ -273,6 +273,13 @@ config MVIAC3_2
endchoice
endchoice
config X86_GENERIC
bool "Generic x86 support"
help
Including some tuning for non selected x86 CPUs too.
when it has moderate overhead. This is intended for generic
distributions kernels.
#
#
# Define implied options from the CPU selection here
# Define implied options from the CPU selection here
#
#
...
@@ -288,10 +295,10 @@ config X86_XADD
...
@@ -288,10 +295,10 @@ config X86_XADD
config X86_L1_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
int
default "7" if MPENTIUM4 || X86_GENERIC
default "4" if MELAN || M486 || M386
default "4" if MELAN || M486 || M386
default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2
default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2
default "6" if MK7 || MK8
default "6" if MK7 || MK8
default "7" if MPENTIUM4
config RWSEM_GENERIC_SPINLOCK
config RWSEM_GENERIC_SPINLOCK
bool
bool
...
@@ -363,16 +370,6 @@ config X86_OOSTORE
...
@@ -363,16 +370,6 @@ config X86_OOSTORE
depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6
depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6
default y
default y
config X86_PREFETCH
bool
depends on MPENTIUMIII || MPENTIUM4 || MVIAC3_2
default y
config X86_SSE2
bool
depends on MK8 || MPENTIUM4
default y
config HUGETLB_PAGE
config HUGETLB_PAGE
bool "Huge TLB Page Support"
bool "Huge TLB Page Support"
help
help
...
...
arch/i386/kernel/cpu/amd.c
View file @
a31a4dea
...
@@ -178,6 +178,15 @@ static void __init init_amd(struct cpuinfo_x86 *c)
...
@@ -178,6 +178,15 @@ static void __init init_amd(struct cpuinfo_x86 *c)
break
;
break
;
}
}
switch
(
c
->
x86
)
{
case
15
:
set_bit
(
X86_FEATURE_K8
,
c
->
x86_capability
);
break
;
case
6
:
set_bit
(
X86_FEATURE_K7
,
c
->
x86_capability
);
break
;
}
display_cacheinfo
(
c
);
display_cacheinfo
(
c
);
}
}
...
...
arch/i386/kernel/cpu/intel.c
View file @
a31a4dea
...
@@ -353,6 +353,11 @@ static void __init init_intel(struct cpuinfo_x86 *c)
...
@@ -353,6 +353,11 @@ static void __init init_intel(struct cpuinfo_x86 *c)
break
;
break
;
}
}
#endif
#endif
if
(
c
->
x86
==
15
)
set_bit
(
X86_FEATURE_P4
,
c
->
x86_capability
);
if
(
c
->
x86
==
6
)
set_bit
(
X86_FEATURE_P3
,
c
->
x86_capability
);
}
}
...
...
arch/i386/kernel/setup.c
View file @
a31a4dea
...
@@ -795,41 +795,91 @@ static void __init register_memory(unsigned long max_low_pfn)
...
@@ -795,41 +795,91 @@ static void __init register_memory(unsigned long max_low_pfn)
pci_mem_start
=
low_mem_size
;
pci_mem_start
=
low_mem_size
;
}
}
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm
(
"intelnops: "
GENERIC_NOP1
GENERIC_NOP2
GENERIC_NOP3
GENERIC_NOP4
GENERIC_NOP5
GENERIC_NOP6
GENERIC_NOP7
GENERIC_NOP8
);
asm
(
"k8nops: "
K8_NOP1
K8_NOP2
K8_NOP3
K8_NOP4
K8_NOP5
K8_NOP6
K8_NOP7
K8_NOP8
);
asm
(
"k7nops: "
K7_NOP1
K7_NOP2
K7_NOP3
K7_NOP4
K7_NOP5
K7_NOP6
K7_NOP7
K7_NOP8
);
extern
unsigned
char
intelnops
[],
k8nops
[],
k7nops
[];
static
unsigned
char
*
intel_nops
[
ASM_NOP_MAX
+
1
]
=
{
NULL
,
intelnops
,
intelnops
+
1
,
intelnops
+
1
+
2
,
intelnops
+
1
+
2
+
3
,
intelnops
+
1
+
2
+
3
+
4
,
intelnops
+
1
+
2
+
3
+
4
+
5
,
intelnops
+
1
+
2
+
3
+
4
+
5
+
6
,
intelnops
+
1
+
2
+
3
+
4
+
5
+
6
+
7
,
};
static
unsigned
char
*
k8_nops
[
ASM_NOP_MAX
+
1
]
=
{
NULL
,
k8nops
,
k8nops
+
1
,
k8nops
+
1
+
2
,
k8nops
+
1
+
2
+
3
,
k8nops
+
1
+
2
+
3
+
4
,
k8nops
+
1
+
2
+
3
+
4
+
5
,
k8nops
+
1
+
2
+
3
+
4
+
5
+
6
,
k8nops
+
1
+
2
+
3
+
4
+
5
+
6
+
7
,
};
static
unsigned
char
*
k7_nops
[
ASM_NOP_MAX
+
1
]
=
{
NULL
,
k7nops
,
k7nops
+
1
,
k7nops
+
1
+
2
,
k7nops
+
1
+
2
+
3
,
k7nops
+
1
+
2
+
3
+
4
,
k7nops
+
1
+
2
+
3
+
4
+
5
,
k7nops
+
1
+
2
+
3
+
4
+
5
+
6
,
k7nops
+
1
+
2
+
3
+
4
+
5
+
6
+
7
,
};
static
struct
nop
{
int
cpuid
;
unsigned
char
**
noptable
;
}
noptypes
[]
=
{
{
X86_FEATURE_K8
,
k8_nops
},
{
X86_FEATURE_K7
,
k7_nops
},
{
-
1
,
0
}
};
/* Replace instructions with better alternatives for this CPU type.
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
self modifying code. This implies that assymetric systems where
APs have less capabilities than the boot processor are not handled.
APs have less capabilities than the boot processor are not handled.
In this case boot with "noreplacement". */
In this case boot with "noreplacement". */
void
apply_alternatives
(
void
*
start
,
void
*
end
)
void
apply_alternatives
(
void
*
start
,
void
*
end
)
{
{
struct
alt_instr
*
a
;
struct
alt_instr
*
a
;
int
diff
,
i
,
k
;
int
diff
,
i
,
k
;
unsigned
char
**
noptable
=
intel_nops
;
for
(
a
=
start
;
a
<
(
struct
alt_instr
*
)
end
;
for
(
i
=
0
;
noptypes
[
i
].
cpuid
>=
0
;
i
++
)
{
a
=
(
void
*
)
ALIGN
((
unsigned
long
)(
a
+
1
)
+
a
->
instrlen
,
4
))
{
if
(
boot_cpu_has
(
noptypes
[
i
].
cpuid
))
{
noptable
=
noptypes
[
i
].
noptable
;
break
;
}
}
for
(
a
=
start
;
(
void
*
)
a
<
end
;
a
++
)
{
if
(
!
boot_cpu_has
(
a
->
cpuid
))
if
(
!
boot_cpu_has
(
a
->
cpuid
))
continue
;
continue
;
BUG_ON
(
a
->
replacementlen
>
a
->
instrlen
);
BUG_ON
(
a
->
replacementlen
>
a
->
instrlen
);
memcpy
(
a
->
instr
,
a
->
replacement
,
a
->
replacementlen
);
memcpy
(
a
->
instr
,
a
->
replacement
,
a
->
replacementlen
);
diff
=
a
->
instrlen
-
a
->
replacementlen
;
diff
=
a
->
instrlen
-
a
->
replacementlen
;
/* Pad the rest with nops */
for
(
i
=
a
->
replacementlen
;
diff
>
0
;
diff
-=
k
,
i
+=
k
)
{
for
(
i
=
a
->
replacementlen
;
diff
>
0
;
diff
-=
k
,
i
+=
k
)
{
static
const
char
*
nops
[]
=
{
k
=
diff
;
0
,
if
(
k
>
ASM_NOP_MAX
)
"
\x90
"
,
k
=
ASM_NOP_MAX
;
#if CONFIG_MK7 || CONFIG_MK8
memcpy
(
a
->
instr
+
i
,
noptable
[
k
],
k
);
"
\x66\x90
"
,
"
\x66\x66\x90
"
,
"
\x66\x66\x66\x90
"
,
#else
"
\x89\xf6
"
,
"
\x8d\x76\x00
"
,
"
\x8d\x74\x26\x00
"
,
#endif
};
k
=
min_t
(
int
,
diff
,
ARRAY_SIZE
(
nops
));
memcpy
(
a
->
instr
+
i
,
nops
[
k
],
k
);
}
}
}
}
}
}
...
...
arch/i386/vmlinux.lds.S
View file @
a31a4dea
...
@@ -85,6 +85,7 @@ SECTIONS
...
@@ -85,6 +85,7 @@ SECTIONS
__alt_instructions
=
.
;
__alt_instructions
=
.
;
.
altinstructions
:
{
*(
.
altinstructions
)
}
.
altinstructions
:
{
*(
.
altinstructions
)
}
__alt_instructions_end
=
.
;
__alt_instructions_end
=
.
;
.
altinstr_replacement
:
{
*(
.
altinstr_replacement
)
}
.
=
ALIGN
(
4096
)
;
.
=
ALIGN
(
4096
)
;
__initramfs_start
=
.
;
__initramfs_start
=
.
;
.
init.ramfs
:
{
*(
.
init
.
ramfs
)
}
.
init.ramfs
:
{
*(
.
init
.
ramfs
)
}
...
...
include/asm-i386/cpufeature.h
View file @
a31a4dea
...
@@ -63,6 +63,11 @@
...
@@ -63,6 +63,11 @@
#define X86_FEATURE_K6_MTRR (3*32+ 1)
/* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_K6_MTRR (3*32+ 1)
/* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR (3*32+ 2)
/* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CYRIX_ARR (3*32+ 2)
/* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR (3*32+ 3)
/* Centaur MCRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR (3*32+ 3)
/* Centaur MCRs (= MTRRs) */
/* cpu types for specific tunings: */
#define X86_FEATURE_K8 (3*32+ 4)
/* Opteron, Athlon64 */
#define X86_FEATURE_K7 (3*32+ 5)
/* Athlon */
#define X86_FEATURE_P3 (3*32+ 6)
/* P3 */
#define X86_FEATURE_P4 (3*32+ 7)
/* P4 */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
...
...
include/asm-i386/processor.h
View file @
a31a4dea
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include <asm/sigcontext.h>
#include <asm/sigcontext.h>
#include <asm/cpufeature.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include <asm/msr.h>
#include <asm/system.h>
#include <linux/cache.h>
#include <linux/cache.h>
#include <linux/config.h>
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/threads.h>
...
@@ -495,32 +496,93 @@ static inline void rep_nop(void)
...
@@ -495,32 +496,93 @@ static inline void rep_nop(void)
#define cpu_relax() rep_nop()
#define cpu_relax() rep_nop()
/* Prefetch instructions for Pentium III and AMD Athlon */
/* generic versions from gas */
#ifdef CONFIG_X86_PREFETCH
#define GENERIC_NOP1 ".byte 0x90\n"
#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
/* Opteron nops */
#define K8_NOP1 GENERIC_NOP1
#define K8_NOP2 ".byte 0x66,0x90\n"
#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n"
#define K8_NOP5 K8_NOP3 K8_NOP2
#define K8_NOP6 K8_NOP3 K8_NOP3
#define K8_NOP7 K8_NOP4 K8_NOP3
#define K8_NOP8 K8_NOP4 K8_NOP4
/* K7 nops */
/* uses eax dependencies (arbitary choice) */
#define K7_NOP1 GENERIC_NOP1
#define K7_NOP2 ".byte 0x8b,0xc0\n"
#define K7_NOP3 ".byte 0x8d,0x04,0x20\n"
#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n"
#define K7_NOP5 K7_NOP4 ASM_NOP1
#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n"
#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n"
#define K7_NOP8 K7_NOP7 ASM_NOP1
#ifdef CONFIG_MK8
#define ASM_NOP1 K8_NOP1
#define ASM_NOP2 K8_NOP2
#define ASM_NOP3 K8_NOP3
#define ASM_NOP4 K8_NOP4
#define ASM_NOP5 K8_NOP5
#define ASM_NOP6 K8_NOP6
#define ASM_NOP7 K8_NOP7
#define ASM_NOP8 K8_NOP8
#elif CONFIG_MK7
#define ASM_NOP1 K7_NOP1
#define ASM_NOP2 K7_NOP2
#define ASM_NOP3 K7_NOP3
#define ASM_NOP4 K7_NOP4
#define ASM_NOP5 K7_NOP5
#define ASM_NOP6 K7_NOP6
#define ASM_NOP7 K7_NOP7
#define ASM_NOP8 K7_NOP8
#else
#define ASM_NOP1 GENERIC_NOP1
#define ASM_NOP2 GENERIC_NOP2
#define ASM_NOP3 GENERIC_NOP3
#define ASM_NOP4 GENERIC_NOP4
#define ASM_NOP5 GENERIC_NOP5
#define ASM_NOP6 GENERIC_NOP6
#define ASM_NOP7 GENERIC_NOP7
#define ASM_NOP8 GENERIC_NOP8
#endif
#define ASM_NOP_MAX 8
/* Prefetch instructions for Pentium III and AMD Athlon */
/* It's not worth to care about 3dnow! prefetches for the K6
because they are microcoded there and very slow. */
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCH
extern
inline
void
prefetch
(
const
void
*
x
)
extern
inline
void
prefetch
(
const
void
*
x
)
{
{
__asm__
__volatile__
(
"prefetchnta (%0)"
:
:
"r"
(
x
));
alternative_input
(
ASM_NOP3
,
"prefetchnta (%1)"
,
X86_FEATURE_XMM
,
"r"
(
x
));
}
}
#elif defined CONFIG_X86_USE_3DNOW
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_SPINLOCK_PREFETCH
#define ARCH_HAS_SPINLOCK_PREFETCH
extern
inline
void
prefetch
(
const
void
*
x
)
/* 3dnow! prefetch to get an exclusive cache line. Useful for
{
spinlocks to avoid one state transition in the cache coherency protocol. */
__asm__
__volatile__
(
"prefetch (%0)"
:
:
"r"
(
x
));
}
extern
inline
void
prefetchw
(
const
void
*
x
)
extern
inline
void
prefetchw
(
const
void
*
x
)
{
{
__asm__
__volatile__
(
"prefetchw (%0)"
:
:
"r"
(
x
));
alternative_input
(
ASM_NOP3
,
"prefetchw (%1)"
,
X86_FEATURE_3DNOW
,
"r"
(
x
));
}
}
#define spin_lock_prefetch(x) prefetchw(x)
#define spin_lock_prefetch(x) prefetchw(x)
#endif
#endif
/* __ASM_I386_PROCESSOR_H */
#endif
/* __ASM_I386_PROCESSOR_H */
include/asm-i386/system.h
View file @
a31a4dea
...
@@ -277,13 +277,16 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
...
@@ -277,13 +277,16 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */
/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */
#endif
#endif
#ifdef __KERNEL__
struct
alt_instr
{
struct
alt_instr
{
u8
*
instr
;
/* original instruction */
__u8
*
instr
;
/* original instruction */
u8
cpuid
;
/* cpuid bit set for replacement */
__u8
*
replacement
;
u8
instrlen
;
/* length of original instruction */
__u8
cpuid
;
/* cpuid bit set for replacement */
u8
replacementlen
;
/* length of new instruction, <= instrlen */
__u8
instrlen
;
/* length of original instruction */
u8
replacement
[
0
];
/* new instruction */
__u8
replacementlen
;
/* length of new instruction, <= instrlen */
__u8
pad
;
};
};
#endif
/*
/*
* Alternative instructions for different CPU types or capabilities.
* Alternative instructions for different CPU types or capabilities.
...
@@ -302,12 +305,39 @@ struct alt_instr {
...
@@ -302,12 +305,39 @@ struct alt_instr {
".section .altinstructions,\"a\"\n" \
".section .altinstructions,\"a\"\n" \
" .align 4\n" \
" .align 4\n" \
" .long 661b\n"
/* label */
\
" .long 661b\n"
/* label */
\
" .long 663f\n"
/* new instruction */
\
" .byte %c0\n"
/* feature bit */
\
" .byte %c0\n"
/* feature bit */
\
" .byte 662b-661b\n"
/* sourcelen */
\
" .byte 662b-661b\n"
/* sourcelen */
\
" .byte 664f-663f\n"
/* replacementlen */
\
" .byte 664f-663f\n"
/* replacementlen */
\
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n"
/* replacement */
\
"663:\n\t" newinstr "\n664:\n"
/* replacement */
\
".previous" :: "i" (feature) : "memory")
".previous" :: "i" (feature) : "memory")
/*
* Alternative inline assembly with input.
*
* Pecularities:
* No memory clobber here.
* Argument numbers start with 1.
* Best is to use constraints that are fixed size (like (%1) ... "r")
* If you use variable sized constraints like "m" or "g" in the
* replacement maake sure to pad to the worst case length.
*/
#define alternative_input(oldinstr, newinstr, feature, input) \
asm volatile ("661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
" .align 4\n" \
" .long 661b\n"
/* label */
\
" .long 663f\n"
/* new instruction */
\
" .byte %c0\n"
/* feature bit */
\
" .byte 662b-661b\n"
/* sourcelen */
\
" .byte 664f-663f\n"
/* replacementlen */
\
".previous\n" \
".section .altinstr_replacement,\"ax\"\n" \
"663:\n\t" newinstr "\n664:\n"
/* replacement */
\
".previous" :: "i" (feature), input)
/*
/*
* Force strict CPU ordering.
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking
* And yes, this is required on UP too when we're talking
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment