Commit 36bc33ba authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] NX (No eXecute) support for x86

we'd like to announce the availability of the following kernel patch:

     http://redhat.com/~mingo/nx-patches/nx-2.6.7-rc2-bk2-AE

which makes use of the 'NX' x86 feature pioneered in AMD64 CPUs and for
which support has also been announced by Intel. (other x86 CPU vendors,
Transmeta and VIA announced support as well. Windows support for NX has
also been announced by Microsoft, for their next service pack.) The NX
feature is also being marketed as 'Enhanced Virus Protection'. This
patch makes sure Linux has full support for this hardware feature on x86
too.

What does this patch do? The pagetable format of current x86 CPUs does
not have an 'execute' bit. This means that even if an application maps a
memory area without PROT_EXEC, the CPU will still allow code to be
executed in this memory. This property is often abused by exploits when
they manage to inject hostile code into this memory, for example via a
buffer overflow.

The NX feature changes this and adds a 'dont execute' bit to the PAE
pagetable format. But since the flag defaults to zero (for compatibility
reasons), all pages are executable by default and the kernel has to be
taught to make use of this bit.

If the NX feature is supported by the CPU then the patched kernel turns
on NX and it will enforce userspace executability constraints such as a
no-exec stack and no-exec mmap and data areas. This means less chance
for stack overflows and buffer-overflows to cause exploits.

furthermore, the patch also implements 'NX protection' for kernelspace
code: only the kernel code and modules are executable - so even
kernel-space overflows are harder (in some cases, impossible) to
exploit. Here is how kernel code that tries to execute off the stack is 
stopped:

 kernel tried to access NX-protected page - exploit attempt? (uid: 500)
 Unable to handle kernel paging request at virtual address f78d0f40
  printing eip:
 ...

The patch is based on a prototype NX patch written for 2.4 by Intel -
special thanks go to Suresh Siddha and Jun Nakajima @ Intel. The
existing NX support in the 64-bit x86_64 kernels has been written by
Andi Kleen and this patch is modeled after his code.

Arjan van de Ven has also provided lots of feedback and he has
integrated the patch into the Fedora Core 2 kernel. Test rpms are
available for download at:

    http://redhat.com/~arjanv/2.6/RPMS.kernel/

the kernel-2.6.6-1.411 rpms have the NX patch applied.

here's a quickstart to recompile the vanilla kernel from source with the
NX patch:

    http://redhat.com/~mingo/nx-patches/QuickStart-NX.txt

update:

 - make the heap non-executable on PT_GNU_STACK binaries.

 - make all data mmap()s (and the heap) executable on !PT_GNU_STACK
   (legacy) binaries. This has no effect on non-NX CPUs, but should be
   much more compatible on NX CPUs. The only effect it has it has on
   non-NX CPUs is the extra 'x' bit displayed in /proc/PID/maps.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent cfe7f4f6
...@@ -27,7 +27,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) ...@@ -27,7 +27,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* AMD-defined */ /* AMD-defined */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
/* Transmeta-defined */ /* Transmeta-defined */
......
...@@ -153,6 +153,32 @@ ENTRY(startup_32_smp) ...@@ -153,6 +153,32 @@ ENTRY(startup_32_smp)
orl %edx,%eax orl %edx,%eax
movl %eax,%cr4 movl %eax,%cr4
btl $5, %eax # check if PAE is enabled
jnc 6f
/* Check if extended functions are implemented */
movl $0x80000000, %eax
cpuid
cmpl $0x80000000, %eax
jbe 6f
mov $0x80000001, %eax
cpuid
/* Execute Disable bit supported? */
btl $20, %edx
jnc 6f
/* Setup EFER (Extended Feature Enable Register) */
movl $0xc0000080, %ecx
rdmsr
btsl $11, %eax
/* Make changes effective */
wrmsr
6:
/* cpuid clobbered ebx, set it up again: */
xorl %ebx,%ebx
incl %ebx
3: 3:
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
......
...@@ -32,7 +32,7 @@ void *module_alloc(unsigned long size) ...@@ -32,7 +32,7 @@ void *module_alloc(unsigned long size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
return vmalloc(size); return vmalloc_exec(size);
} }
......
...@@ -45,7 +45,7 @@ static int __init sysenter_setup(void) ...@@ -45,7 +45,7 @@ static int __init sysenter_setup(void)
{ {
unsigned long page = get_zeroed_page(GFP_ATOMIC); unsigned long page = get_zeroed_page(GFP_ATOMIC);
__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY); __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
if (!boot_cpu_has(X86_FEATURE_SEP)) { if (!boot_cpu_has(X86_FEATURE_SEP)) {
memcpy((void *) page, memcpy((void *) page,
......
...@@ -405,6 +405,21 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -405,6 +405,21 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
bust_spinlocks(1); bust_spinlocks(1);
#ifdef CONFIG_X86_PAE
{
pgd_t *pgd;
pmd_t *pmd;
pgd = init_mm.pgd + pgd_index(address);
if (pgd_present(*pgd)) {
pmd = pmd_offset(pgd, address);
if (pmd_val(*pmd) & _PAGE_NX)
printk(KERN_CRIT "kernel tried to access NX-protected page - exploit attempt? (uid: %d)\n", current->uid);
}
}
#endif
if (address < PAGE_SIZE) if (address < PAGE_SIZE)
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
else else
......
...@@ -122,6 +122,13 @@ static void __init page_table_range_init (unsigned long start, unsigned long end ...@@ -122,6 +122,13 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
} }
} }
static inline int is_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
return 1;
return 0;
}
/* /*
* This maps the physical memory to kernel virtual address space, a total * This maps the physical memory to kernel virtual address space, a total
* of max_low_pfn pages, by creating page tables starting from address * of max_low_pfn pages, by creating page tables starting from address
...@@ -144,18 +151,29 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) ...@@ -144,18 +151,29 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
if (pfn >= max_low_pfn) if (pfn >= max_low_pfn)
continue; continue;
for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
/* Map with big pages if possible, otherwise create normal page tables. */ /* Map with big pages if possible, otherwise create normal page tables. */
if (cpu_has_pse) { if (cpu_has_pse) {
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
if (is_kernel_text(address) || is_kernel_text(address2))
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
else
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
pfn += PTRS_PER_PTE; pfn += PTRS_PER_PTE;
} else { } else {
pte = one_page_table_init(pmd); pte = one_page_table_init(pmd);
for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); if (is_kernel_text(address))
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
else
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
}
} }
} }
} }
} }
static inline int page_kills_ppro(unsigned long pagenr) static inline int page_kills_ppro(unsigned long pagenr)
...@@ -272,7 +290,8 @@ extern void set_highmem_pages_init(int); ...@@ -272,7 +290,8 @@ extern void set_highmem_pages_init(int);
#define set_highmem_pages_init(bad_ppro) do { } while (0) #define set_highmem_pages_init(bad_ppro) do { } while (0)
#endif /* CONFIG_HIGHMEM */ #endif /* CONFIG_HIGHMEM */
unsigned long __PAGE_KERNEL = _PAGE_KERNEL; unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
#ifndef CONFIG_DISCONTIGMEM #ifndef CONFIG_DISCONTIGMEM
#define remap_numa_kva() do {} while (0) #define remap_numa_kva() do {} while (0)
...@@ -301,6 +320,7 @@ static void __init pagetable_init (void) ...@@ -301,6 +320,7 @@ static void __init pagetable_init (void)
if (cpu_has_pge) { if (cpu_has_pge) {
set_in_cr4(X86_CR4_PGE); set_in_cr4(X86_CR4_PGE);
__PAGE_KERNEL |= _PAGE_GLOBAL; __PAGE_KERNEL |= _PAGE_GLOBAL;
__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
} }
kernel_physical_mapping_init(pgd_base); kernel_physical_mapping_init(pgd_base);
...@@ -391,6 +411,52 @@ void __init zone_sizes_init(void) ...@@ -391,6 +411,52 @@ void __init zone_sizes_init(void)
extern void zone_sizes_init(void); extern void zone_sizes_init(void);
#endif /* !CONFIG_DISCONTIGMEM */ #endif /* !CONFIG_DISCONTIGMEM */
static int disable_nx __initdata = 0;
u64 __supported_pte_mask = ~_PAGE_NX;
/*
* noexec = on|off
*
* Control non executable mappings.
*
* on Enable
* off Disable
*/
static int __init noexec_setup(char *str)
{
if (!strncmp(str, "on",2) && cpu_has_nx) {
__supported_pte_mask |= _PAGE_NX;
disable_nx = 0;
} else if (!strncmp(str,"off",3)) {
disable_nx = 1;
__supported_pte_mask &= ~_PAGE_NX;
}
return 1;
}
__setup("noexec=", noexec_setup);
#ifdef CONFIG_X86_PAE
static int use_nx = 0;
static void __init set_nx(void)
{
unsigned int v[4], l, h;
if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
if ((v[3] & (1 << 20)) && !disable_nx) {
rdmsr(MSR_EFER, l, h);
l |= EFER_NX;
wrmsr(MSR_EFER, l, h);
use_nx = 1;
__supported_pte_mask |= _PAGE_NX;
}
}
}
#endif
/* /*
* paging_init() sets up the page tables - note that the first 8MB are * paging_init() sets up the page tables - note that the first 8MB are
* already mapped by head.S. * already mapped by head.S.
...@@ -400,6 +466,12 @@ extern void zone_sizes_init(void); ...@@ -400,6 +466,12 @@ extern void zone_sizes_init(void);
*/ */
void __init paging_init(void) void __init paging_init(void)
{ {
#ifdef CONFIG_X86_PAE
set_nx();
if (use_nx)
printk("NX (Execute Disable) protection: active\n");
#endif
pagetable_init(); pagetable_init();
load_cr3(swapper_pg_dir); load_cr3(swapper_pg_dir);
......
...@@ -121,7 +121,7 @@ void *module_alloc(unsigned long size) ...@@ -121,7 +121,7 @@ void *module_alloc(unsigned long size)
goto fail; goto fail;
} }
if (map_vm_area(area, PAGE_KERNEL_EXECUTABLE, &pages)) if (map_vm_area(area, PAGE_KERNEL_EXEC, &pages))
goto fail; goto fail;
memset(addr, 0, size); memset(addr, 0, size);
......
...@@ -180,7 +180,7 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) ...@@ -180,7 +180,7 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
unsigned long addr2; unsigned long addr2;
addr2 = __START_KERNEL_map + page_to_phys(page); addr2 = __START_KERNEL_map + page_to_phys(page);
err = __change_page_attr(addr2, page, prot, err = __change_page_attr(addr2, page, prot,
PAGE_KERNEL_EXECUTABLE); PAGE_KERNEL_EXEC);
} }
} }
up_write(&init_mm.mmap_sem); up_write(&init_mm.mmap_sem);
......
...@@ -490,6 +490,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) ...@@ -490,6 +490,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
char passed_fileno[6]; char passed_fileno[6];
struct files_struct *files; struct files_struct *files;
int executable_stack = EXSTACK_DEFAULT; int executable_stack = EXSTACK_DEFAULT;
unsigned long def_flags = 0;
/* Get the exec-header */ /* Get the exec-header */
elf_ex = *((struct elfhdr *) bprm->buf); elf_ex = *((struct elfhdr *) bprm->buf);
...@@ -621,7 +622,10 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) ...@@ -621,7 +622,10 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
executable_stack = EXSTACK_ENABLE_X; executable_stack = EXSTACK_ENABLE_X;
else else
executable_stack = EXSTACK_DISABLE_X; executable_stack = EXSTACK_DISABLE_X;
break;
} }
if (i == elf_ex.e_phnum)
def_flags |= VM_EXEC | VM_MAYEXEC;
/* Some simple consistency checks for the interpreter */ /* Some simple consistency checks for the interpreter */
if (elf_interpreter) { if (elf_interpreter) {
...@@ -689,6 +693,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) ...@@ -689,6 +693,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
current->mm->end_code = 0; current->mm->end_code = 0;
current->mm->mmap = NULL; current->mm->mmap = NULL;
current->flags &= ~PF_FORKNOEXEC; current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages /* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */ may depend on the personality. */
......
...@@ -430,6 +430,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) ...@@ -430,6 +430,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
else else
mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_flags = VM_STACK_FLAGS;
mpnt->vm_flags |= mm->def_flags;
mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
insert_vm_struct(mm, mpnt); insert_vm_struct(mm, mpnt);
mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
/* Don't duplicate feature flags which are redundant with Intel! */ /* Don't duplicate feature flags which are redundant with Intel! */
#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ #define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ #define X86_FEATURE_MP (1*32+19) /* MP Capable. */
#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
...@@ -100,6 +101,7 @@ ...@@ -100,6 +101,7 @@
#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) #define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) #define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) #define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) #define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
......
...@@ -217,6 +217,15 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) ...@@ -217,6 +217,15 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val)
#define MSR_K7_FID_VID_CTL 0xC0010041 #define MSR_K7_FID_VID_CTL 0xC0010041
#define MSR_K7_FID_VID_STATUS 0xC0010042 #define MSR_K7_FID_VID_STATUS 0xC0010042
/* extended feature register */
#define MSR_EFER 0xc0000080
/* EFER bits: */
/* Execute Disable enable */
#define _EFER_NX 11
#define EFER_NX (1<<_EFER_NX)
/* Centaur-Hauls/IDT defined MSRs. */ /* Centaur-Hauls/IDT defined MSRs. */
#define MSR_IDT_FCR1 0x107 #define MSR_IDT_FCR1 0x107
#define MSR_IDT_FCR2 0x108 #define MSR_IDT_FCR2 0x108
......
...@@ -40,15 +40,18 @@ ...@@ -40,15 +40,18 @@
* These are used to make use of C type-checking.. * These are used to make use of C type-checking..
*/ */
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
extern unsigned long long __supported_pte_mask;
typedef struct { unsigned long pte_low, pte_high; } pte_t; typedef struct { unsigned long pte_low, pte_high; } pte_t;
typedef struct { unsigned long long pmd; } pmd_t; typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t; typedef struct { unsigned long long pgd; } pgd_t;
typedef struct { unsigned long long pgprot; } pgprot_t;
#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
#define HPAGE_SHIFT 21 #define HPAGE_SHIFT 21
#else #else
typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pte_low; } pte_t;
typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
#define boot_pte_t pte_t /* or would you rather have a typedef */ #define boot_pte_t pte_t /* or would you rather have a typedef */
#define pte_val(x) ((x).pte_low) #define pte_val(x) ((x).pte_low)
#define HPAGE_SHIFT 22 #define HPAGE_SHIFT 22
...@@ -61,7 +64,6 @@ typedef struct { unsigned long pgd; } pgd_t; ...@@ -61,7 +64,6 @@ typedef struct { unsigned long pgd; } pgd_t;
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#endif #endif
typedef struct { unsigned long pgprot; } pgprot_t;
#define pmd_val(x) ((x).pmd) #define pmd_val(x) ((x).pmd)
#define pgd_val(x) ((x).pgd) #define pgd_val(x) ((x).pgd)
...@@ -136,7 +138,7 @@ static __inline__ int get_order(unsigned long size) ...@@ -136,7 +138,7 @@ static __inline__ int get_order(unsigned long size)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -101,18 +101,24 @@ static inline unsigned long pte_pfn(pte_t pte) ...@@ -101,18 +101,24 @@ static inline unsigned long pte_pfn(pte_t pte)
(pte.pte_high << (32 - PAGE_SHIFT)); (pte.pte_high << (32 - PAGE_SHIFT));
} }
extern unsigned long long __supported_pte_mask;
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{ {
pte_t pte; pte_t pte;
pte.pte_high = page_nr >> (32 - PAGE_SHIFT); pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \
pte.pte_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); (pgprot_val(pgprot) >> 32);
pte.pte_high &= (__supported_pte_mask >> 32);
pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \
__supported_pte_mask;
return pte; return pte;
} }
static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{ {
return __pmd(((unsigned long long)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \
pgprot_val(pgprot)) & __supported_pte_mask);
} }
/* /*
......
...@@ -110,6 +110,7 @@ void paging_init(void); ...@@ -110,6 +110,7 @@ void paging_init(void);
#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ #define _PAGE_BIT_UNUSED1 9 /* available for programmer */
#define _PAGE_BIT_UNUSED2 10 #define _PAGE_BIT_UNUSED2 10
#define _PAGE_BIT_UNUSED3 11 #define _PAGE_BIT_UNUSED3 11
#define _PAGE_BIT_NX 63
#define _PAGE_PRESENT 0x001 #define _PAGE_PRESENT 0x001
#define _PAGE_RW 0x002 #define _PAGE_RW 0x002
...@@ -126,28 +127,51 @@ void paging_init(void); ...@@ -126,28 +127,51 @@ void paging_init(void);
#define _PAGE_FILE 0x040 /* set:pagecache unset:swap */ #define _PAGE_FILE 0x040 /* set:pagecache unset:swap */
#define _PAGE_PROTNONE 0x080 /* If not present */ #define _PAGE_PROTNONE 0x080 /* If not present */
#ifdef CONFIG_X86_PAE
#define _PAGE_NX (1ULL<<_PAGE_BIT_NX)
#else
#define _PAGE_NX 0
#endif
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_NONE \
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_SHARED \
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_SHARED_EXEC \
__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_COPY_NOEXEC \
__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
#define PAGE_COPY_EXEC \
__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_COPY \
PAGE_COPY_NOEXEC
#define PAGE_READONLY \
__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
#define PAGE_READONLY_EXEC \
__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define _PAGE_KERNEL \ #define _PAGE_KERNEL \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
#define _PAGE_KERNEL_EXEC \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
extern unsigned long __PAGE_KERNEL; extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
/* /*
* The i386 can't do page protection for execute, and considers that * The i386 can't do page protection for execute, and considers that
...@@ -158,19 +182,19 @@ extern unsigned long __PAGE_KERNEL; ...@@ -158,19 +182,19 @@ extern unsigned long __PAGE_KERNEL;
#define __P001 PAGE_READONLY #define __P001 PAGE_READONLY
#define __P010 PAGE_COPY #define __P010 PAGE_COPY
#define __P011 PAGE_COPY #define __P011 PAGE_COPY
#define __P100 PAGE_READONLY #define __P100 PAGE_READONLY_EXEC
#define __P101 PAGE_READONLY #define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_COPY #define __P110 PAGE_COPY_EXEC
#define __P111 PAGE_COPY #define __P111 PAGE_COPY_EXEC
#define __S000 PAGE_NONE #define __S000 PAGE_NONE
#define __S001 PAGE_READONLY #define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED #define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED #define __S011 PAGE_SHARED
#define __S100 PAGE_READONLY #define __S100 PAGE_READONLY_EXEC
#define __S101 PAGE_READONLY #define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED #define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED #define __S111 PAGE_SHARED_EXEC
/* /*
* Define this if things work differently on an i386 and an i486: * Define this if things work differently on an i386 and an i486:
...@@ -256,6 +280,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -256,6 +280,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ {
pte.pte_low &= _PAGE_CHG_MASK; pte.pte_low &= _PAGE_CHG_MASK;
pte.pte_low |= pgprot_val(newprot); pte.pte_low |= pgprot_val(newprot);
#ifdef CONFIG_X86_PAE
/*
* Chop off the NX bit (if present), and add the NX portion of
* the newprot (if present):
*/
pte.pte_high &= -1 ^ (1 << (_PAGE_BIT_NX - 32));
pte.pte_high |= (pgprot_val(newprot) >> 32) & \
(__supported_pte_mask >> 32);
#endif
return pte; return pte;
} }
......
...@@ -172,7 +172,7 @@ static inline void set_pml4(pml4_t *dst, pml4_t val) ...@@ -172,7 +172,7 @@ static inline void set_pml4(pml4_t *dst, pml4_t val)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define __PAGE_KERNEL \ #define __PAGE_KERNEL \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
#define __PAGE_KERNEL_EXECUTABLE \ #define __PAGE_KERNEL_EXEC \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
#define __PAGE_KERNEL_NOCACHE \ #define __PAGE_KERNEL_NOCACHE \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX) (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX)
...@@ -188,7 +188,7 @@ static inline void set_pml4(pml4_t *dst, pml4_t val) ...@@ -188,7 +188,7 @@ static inline void set_pml4(pml4_t *dst, pml4_t val)
#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) #define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) #define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
#define PAGE_KERNEL_EXECUTABLE MAKE_GLOBAL(__PAGE_KERNEL_EXECUTABLE) #define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) #define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL) #define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
......
...@@ -23,6 +23,7 @@ struct vm_struct { ...@@ -23,6 +23,7 @@ struct vm_struct {
* Highlevel APIs for driver use * Highlevel APIs for driver use
*/ */
extern void *vmalloc(unsigned long size); extern void *vmalloc(unsigned long size);
extern void *vmalloc_exec(unsigned long size);
extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32(unsigned long size);
extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot); extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
extern void vfree(void *addr); extern void vfree(void *addr);
......
...@@ -454,6 +454,28 @@ void *vmalloc(unsigned long size) ...@@ -454,6 +454,28 @@ void *vmalloc(unsigned long size)
EXPORT_SYMBOL(vmalloc); EXPORT_SYMBOL(vmalloc);
/**
* vmalloc_exec - allocate virtually contiguous, executable memory
*
* @size: allocation size
*
* Kernel-internal function to allocate enough pages to cover @size
* the page level allocator and map them into contiguous and
* executable kernel virtual space.
*
* For tight cotrol over page level allocator and protection flags
* use __vmalloc() instead.
*/
#ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
#endif
void *vmalloc_exec(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
}
/** /**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable) * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment