Commit c9d3808f authored by Andrew Morton's avatar Andrew Morton Committed by Christoph Hellwig

[PATCH] hugetlb pages

Rohit Seth's ia32 huge tlb pages patch.

Anton Blanchard took a look at this today; he seemed happy
with it and said he could borrow bits.
parent fca174cc
...@@ -25,6 +25,15 @@ CONFIG_SMP ...@@ -25,6 +25,15 @@ CONFIG_SMP
If you don't know what to do here, say N. If you don't know what to do here, say N.
CONFIG_HUGETLB_PAGE
This enables support for huge pages. User space applications
can make use of this support with the sys_alloc_hugepages and
sys_free_hugepages system calls. If your applications are
huge page aware and your processor (Pentium or later for x86)
supports this, then say Y here.
Otherwise, say N.
CONFIG_PREEMPT CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to This option reduces the latency of the kernel when reacting to
real-time or interactive events by allowing a low priority process to real-time or interactive events by allowing a low priority process to
......
...@@ -154,6 +154,8 @@ if [ "$CONFIG_MWINCHIP3D" = "y" ]; then ...@@ -154,6 +154,8 @@ if [ "$CONFIG_MWINCHIP3D" = "y" ]; then
define_bool CONFIG_X86_OOSTORE y define_bool CONFIG_X86_OOSTORE y
fi fi
bool 'IA-32 Huge TLB Page Support (if available on processor)' CONFIG_HUGETLB_PAGE
bool 'Symmetric multi-processing support' CONFIG_SMP bool 'Symmetric multi-processing support' CONFIG_SMP
bool 'Preemptible Kernel' CONFIG_PREEMPT bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" != "y" ]; then if [ "$CONFIG_SMP" != "y" ]; then
......
...@@ -759,8 +759,8 @@ ENTRY(sys_call_table) ...@@ -759,8 +759,8 @@ ENTRY(sys_call_table)
.long sys_io_getevents .long sys_io_getevents
.long sys_io_submit .long sys_io_submit
.long sys_io_cancel .long sys_io_cancel
.long sys_ni_syscall /* 250 */ /* sys_alloc_hugepages */ .long sys_alloc_hugepages /* 250 */
.long sys_ni_syscall /* sys_free_hugepages */ .long sys_free_hugepages
.long sys_exit_group .long sys_exit_group
.rept NR_syscalls-(.-sys_call_table)/4 .rept NR_syscalls-(.-sys_call_table)/4
......
...@@ -246,3 +246,94 @@ asmlinkage int sys_olduname(struct oldold_utsname * name) ...@@ -246,3 +246,94 @@ asmlinkage int sys_olduname(struct oldold_utsname * name)
return error; return error;
} }
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_ALIGN(x) (((unsigned long)x + (HPAGE_SIZE -1)) & HPAGE_MASK)
extern long sys_munmap(unsigned long, size_t);
/* get_addr function gets the currently unused virtaul range in
* current process's address space. It returns the LARGE_PAGE_SIZE
* aligned address (in cases of success). Other kernel generic
* routines only could gurantee that allocated address is PAGE_SIZSE aligned.
*/
static unsigned long
get_addr(unsigned long addr, unsigned long len)
{
struct vm_area_struct *vma;
if (addr) {
addr = HPAGE_ALIGN(addr);
vma = find_vma(current->mm, addr);
if (((TASK_SIZE - len) >= addr) &&
(!vma || addr + len <= vma->vm_start))
goto found_addr;
}
addr = HPAGE_ALIGN(TASK_UNMAPPED_BASE);
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
if (TASK_SIZE - len < addr)
return -ENOMEM;
if (!vma || ((addr + len) < vma->vm_start))
goto found_addr;
addr = vma->vm_end;
}
found_addr:
addr = HPAGE_ALIGN(addr);
return addr;
}
asmlinkage unsigned long
sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag)
{
struct mm_struct *mm = current->mm;
unsigned long raddr;
int retval = 0;
extern int alloc_hugetlb_pages(int, unsigned long, unsigned long, int, int);
if (!(cpu_has_pse))
return -EINVAL;
if (key < 0)
return -EINVAL;
if (len & (HPAGE_SIZE - 1))
return -EINVAL;
down_write(&mm->mmap_sem);
raddr = get_addr(addr, len);
if (raddr == -ENOMEM)
goto raddr_out;
retval = alloc_hugetlb_pages(key, raddr, len, prot, flag);
raddr_out: up_write(&mm->mmap_sem);
if (retval < 0)
return (unsigned long) retval;
return raddr;
}
asmlinkage int
sys_free_hugepages(unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int retval;
extern int free_hugepages(struct vm_area_struct *);
vma = find_vma(current->mm, addr);
if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr))
return -EINVAL;
down_write(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
retval = free_hugepages(vma);
spin_unlock(&mm->page_table_lock);
up_write(&mm->mmap_sem);
return retval;
}
#else
asmlinkage unsigned long
sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag)
{
return -ENOSYS;
}
asmlinkage int
sys_free_hugepages(unsigned long addr)
{
return -ENOSYS;
}
#endif
...@@ -12,5 +12,6 @@ O_TARGET := mm.o ...@@ -12,5 +12,6 @@ O_TARGET := mm.o
obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o
obj-$(CONFIG_DISCONTIGMEM) += discontig.o obj-$(CONFIG_DISCONTIGMEM) += discontig.o
export-objs := pageattr.o export-objs := pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
include $(TOPDIR)/Rules.make include $(TOPDIR)/Rules.make
/*
* IA-32 Huge TLB Page Support for Kernel.
*
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
#include <asm/mman.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
static struct vm_operations_struct hugetlb_vm_ops;
struct list_head htlbpage_freelist;
spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
extern long htlbpagemem;
void zap_hugetlb_resources(struct vm_area_struct *);
#define MAX_ID 32
struct htlbpagekey {
struct inode *in;
int key;
} htlbpagek[MAX_ID];
static struct inode *
find_key_inode(int key)
{
int i;
for (i = 0; i < MAX_ID; i++) {
if (htlbpagek[i].key == key)
return (htlbpagek[i].in);
}
return NULL;
}
static struct page *
alloc_hugetlb_page(void)
{
struct list_head *curr, *head;
struct page *page;
spin_lock(&htlbpage_lock);
head = &htlbpage_freelist;
curr = head->next;
if (curr == head) {
spin_unlock(&htlbpage_lock);
return NULL;
}
page = list_entry(curr, struct page, list);
list_del(curr);
htlbpagemem--;
spin_unlock(&htlbpage_lock);
set_page_count(page, 1);
memset(page_address(page), 0, HPAGE_SIZE);
return page;
}
static void
free_hugetlb_page(struct page *page)
{
spin_lock(&htlbpage_lock);
if ((page->mapping != NULL) && (page_count(page) == 2)) {
struct inode *inode = page->mapping->host;
int i;
ClearPageDirty(page);
remove_from_page_cache(page);
set_page_count(page, 1);
if ((inode->i_size -= HPAGE_SIZE) == 0) {
for (i = 0; i < MAX_ID; i++)
if (htlbpagek[i].key == inode->i_ino) {
htlbpagek[i].key = 0;
htlbpagek[i].in = NULL;
break;
}
kfree(inode);
}
}
if (put_page_testzero(page)) {
list_add(&page->list, &htlbpage_freelist);
htlbpagemem++;
}
spin_unlock(&htlbpage_lock);
}
static pte_t *
huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
pmd = pmd_alloc(mm, pgd, addr);
return (pte_t *) pmd;
}
static pte_t *
huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
pmd = pmd_offset(pgd, addr);
return (pte_t *) pmd;
}
#define mk_pte_huge(entry) {entry.pte_low |= (_PAGE_PRESENT | _PAGE_PSE);}
static void
set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page, pte_t * page_table, int write_access)
{
pte_t entry;
mm->rss += (HPAGE_SIZE / PAGE_SIZE);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
} else
entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
entry = pte_mkyoung(entry);
mk_pte_huge(entry);
set_pte(page_table, entry);
return;
}
static int
anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
int write_access, pte_t * page_table)
{
struct page *page;
page = alloc_hugetlb_page();
if (page == NULL)
return -1;
set_huge_pte(mm, vma, page, page_table, write_access);
return 1;
}
int
make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags)
{
int write;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
pte_t *pte;
vma = find_vma(mm, addr);
if (!vma)
goto out_error1;
write = (vma->vm_flags & VM_WRITE) != 0;
if ((vma->vm_end - vma->vm_start) & (HPAGE_SIZE - 1))
goto out_error1;
spin_lock(&mm->page_table_lock);
do {
pte = huge_pte_alloc(mm, addr);
if ((pte) && (pte_none(*pte))) {
if (anon_get_hugetlb_page(mm, vma,
write ? VM_WRITE : VM_READ,
pte) == -1)
goto out_error;
} else
goto out_error;
addr += HPAGE_SIZE;
} while (addr < end);
spin_unlock(&mm->page_table_lock);
vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
if (flags & MAP_PRIVATE)
vma->vm_flags |= VM_DONTCOPY;
vma->vm_ops = &hugetlb_vm_ops;
return 0;
out_error: /* Error case, remove the partial lp_resources. */
if (addr > vma->vm_start) {
vma->vm_end = addr;
zap_hugetlb_resources(vma);
vma->vm_end = end;
}
spin_unlock(&mm->page_table_lock);
out_error1:
return -1;
}
int
copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
{
pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
while (addr < end) {
dst_pte = huge_pte_alloc(dst, addr);
if (!dst_pte)
goto nomem;
src_pte = huge_pte_offset(src, addr);
entry = *src_pte;
ptepage = pte_page(entry);
get_page(ptepage);
set_pte(dst_pte, entry);
dst->rss += (HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
nomem:
return -ENOMEM;
}
int
follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *st, int *length, int i)
{
pte_t *ptep, pte;
unsigned long start = *st;
unsigned long pstart;
int len = *length;
struct page *page;
do {
pstart = start;
ptep = huge_pte_offset(mm, start);
pte = *ptep;
back1:
page = pte_page(pte);
if (pages) {
page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
pages[i] = page;
}
if (vmas)
vmas[i] = vma;
i++;
len--;
start += PAGE_SIZE;
if (((start & HPAGE_MASK) == pstart) && len &&
(start < vma->vm_end))
goto back1;
} while (len && start < vma->vm_end);
*length = len;
*st = start;
return i;
}
void
zap_hugetlb_resources(struct vm_area_struct *mpnt)
{
struct mm_struct *mm = mpnt->vm_mm;
unsigned long len, addr, end;
pte_t *ptep;
struct page *page;
addr = mpnt->vm_start;
end = mpnt->vm_end;
len = end - addr;
do {
ptep = huge_pte_offset(mm, addr);
page = pte_page(*ptep);
pte_clear(ptep);
free_hugetlb_page(page);
addr += HPAGE_SIZE;
} while (addr < end);
mm->rss -= (len >> PAGE_SHIFT);
mpnt->vm_ops = NULL;
flush_tlb_range(mpnt, end - len, end);
}
static void
unlink_vma(struct vm_area_struct *mpnt)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
vma = mm->mmap;
if (vma == mpnt) {
mm->mmap = vma->vm_next;
} else {
while (vma->vm_next != mpnt) {
vma = vma->vm_next;
}
vma->vm_next = mpnt->vm_next;
}
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
mm->mmap_cache = NULL;
mm->map_count--;
}
int
free_hugepages(struct vm_area_struct *mpnt)
{
unlink_vma(mpnt);
zap_hugetlb_resources(mpnt);
kmem_cache_free(vm_area_cachep, mpnt);
return 1;
}
static struct inode *
set_new_inode(unsigned long len, int prot, int flag, int key)
{
struct inode *inode;
int i;
for (i = 0; i < MAX_ID; i++) {
if (htlbpagek[i].key == 0)
break;
}
if (i == MAX_ID)
return NULL;
inode = kmalloc(sizeof (struct inode), GFP_KERNEL);
if (inode == NULL)
return NULL;
inode_init_once(inode);
atomic_inc(&inode->i_writecount);
inode->i_mapping = &inode->i_data;
inode->i_mapping->host = inode;
inode->i_ino = (unsigned long)key;
htlbpagek[i].key = key;
htlbpagek[i].in = inode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_mode = prot;
inode->i_size = len;
return inode;
}
static int
check_size_prot(struct inode *inode, unsigned long len, int prot, int flag)
{
if (inode->i_uid != current->fsuid)
return -1;
if (inode->i_gid != current->fsgid)
return -1;
if (inode->i_size != len)
return -1;
return 0;
}
static int
alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len,
int prot, int flag)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct inode *inode;
struct address_space *mapping;
struct page *page;
int idx;
int retval = -ENOMEM;
int newalloc = 0;
try_again:
spin_lock(&htlbpage_lock);
inode = find_key_inode(key);
if (inode == NULL) {
if (!capable(CAP_SYS_ADMIN)) {
if (!in_group_p(0)) {
retval = -EPERM;
goto out_err;
}
}
if (!(flag & IPC_CREAT)) {
retval = -ENOENT;
goto out_err;
}
inode = set_new_inode(len, prot, flag, key);
if (inode == NULL)
goto out_err;
newalloc = 1;
} else {
if (check_size_prot(inode, len, prot, flag) < 0) {
retval = -EINVAL;
goto out_err;
}
else if (atomic_read(&inode->i_writecount)) {
spin_unlock(&htlbpage_lock);
goto try_again;
}
}
spin_unlock(&htlbpage_lock);
mapping = inode->i_mapping;
addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
MAP_NORESERVE|MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0);
if (IS_ERR((void *) addr))
goto freeinode;
vma = find_vma(mm, addr);
if (!vma) {
retval = -EINVAL;
goto freeinode;
}
spin_lock(&mm->page_table_lock);
do {
pte_t *pte = huge_pte_alloc(mm, addr);
if ((pte) && (pte_none(*pte))) {
idx = (addr - vma->vm_start) >> HPAGE_SHIFT;
page = find_get_page(mapping, idx);
if (page == NULL) {
page = alloc_hugetlb_page();
if (page == NULL)
goto out;
add_to_page_cache(page, mapping, idx);
}
set_huge_pte(mm, vma, page, pte,
(vma->vm_flags & VM_WRITE));
} else
goto out;
addr += HPAGE_SIZE;
} while (addr < vma->vm_end);
retval = 0;
vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
vma->vm_ops = &hugetlb_vm_ops;
spin_unlock(&mm->page_table_lock);
spin_lock(&htlbpage_lock);
atomic_set(&inode->i_writecount, 0);
spin_unlock(&htlbpage_lock);
return retval;
out:
if (addr > vma->vm_start) {
unsigned long raddr;
raddr = vma->vm_end;
vma->vm_end = addr;
zap_hugetlb_resources(vma);
vma->vm_end = raddr;
}
spin_unlock(&mm->page_table_lock);
do_munmap(mm, vma->vm_start, len);
if (newalloc)
goto freeinode;
return retval;
out_err: spin_unlock(&htlbpage_lock);
freeinode:
if (newalloc) {
for(idx=0;idx<MAX_ID;idx++)
if (htlbpagek[idx].key == inode->i_ino) {
htlbpagek[idx].key = 0;
htlbpagek[idx].in = NULL;
break;
}
kfree(inode);
}
return retval;
}
static int
alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len,
int prot, int flag)
{
if (!capable(CAP_SYS_ADMIN)) {
if (!in_group_p(0))
return -EPERM;
}
addr = do_mmap_pgoff(NULL, addr, len, prot,
MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0);
if (IS_ERR((void *) addr))
return -ENOMEM;
if (make_hugetlb_pages_present(addr, (addr + len), flag) < 0) {
do_munmap(current->mm, addr, len);
return -ENOMEM;
}
return 0;
}
int
alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot,
int flag)
{
if (key > 0)
return alloc_shared_hugetlb_pages(key, addr, len, prot, flag);
return alloc_private_hugetlb_pages(key, addr, len, prot, flag);
}
int
set_hugetlb_mem_size(int count)
{
int j, lcount;
struct page *page, *map;
extern long htlbzone_pages;
extern struct list_head htlbpage_freelist;
if (count < 0)
lcount = count;
else
lcount = count - htlbzone_pages;
if (lcount > 0) { /* Increase the mem size. */
while (lcount--) {
page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
if (page == NULL)
break;
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
spin_lock(&htlbpage_lock);
list_add(&page->list, &htlbpage_freelist);
htlbpagemem++;
htlbzone_pages++;
spin_unlock(&htlbpage_lock);
}
return (int) htlbzone_pages;
}
/* Shrink the memory size. */
while (lcount++) {
page = alloc_hugetlb_page();
if (page == NULL)
break;
spin_lock(&htlbpage_lock);
htlbzone_pages--;
spin_unlock(&htlbpage_lock);
map = page;
for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
1 << PG_private | 1<< PG_writeback);
set_page_count(page, 0);
map++;
}
set_page_count(page, 1);
__free_pages(page, HUGETLB_PAGE_ORDER);
}
return (int) htlbzone_pages;
}
static struct vm_operations_struct hugetlb_vm_ops = {
.close = zap_hugetlb_resources,
};
...@@ -431,6 +431,13 @@ static void __init set_max_mapnr_init(void) ...@@ -431,6 +431,13 @@ static void __init set_max_mapnr_init(void)
extern void set_max_mapnr_init(void); extern void set_max_mapnr_init(void);
#endif /* !CONFIG_DISCONTIGMEM */ #endif /* !CONFIG_DISCONTIGMEM */
#ifdef CONFIG_HUGETLB_PAGE
long htlbpagemem = 0;
int htlbpage_max;
long htlbzone_pages;
extern struct list_head htlbpage_freelist;
#endif
void __init mem_init(void) void __init mem_init(void)
{ {
extern int ppro_with_ram_bug(void); extern int ppro_with_ram_bug(void);
...@@ -493,6 +500,30 @@ void __init mem_init(void) ...@@ -493,6 +500,30 @@ void __init mem_init(void)
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
zap_low_mappings(); zap_low_mappings();
#endif #endif
#ifdef CONFIG_HUGETLB_PAGE
{
long i, j;
struct page *page, *map;
/*For now reserve quarter for hugetlb_pages.*/
htlbzone_pages = (max_low_pfn >> ((HPAGE_SHIFT - PAGE_SHIFT) + 2)) ;
/*Will make this kernel command line. */
INIT_LIST_HEAD(&htlbpage_freelist);
for (i=0; i<htlbzone_pages; i++) {
page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
if (page == NULL)
break;
map = page;
for (j=0; j<(HPAGE_SIZE/PAGE_SIZE); j++) {
SetPageReserved(map);
map++;
}
list_add(&page->list, &htlbpage_freelist);
}
printk("Total Huge_TLB_Page memory pages allocated %ld\n", i);
htlbzone_pages = htlbpagemem = i;
htlbpage_max = i;
}
#endif
} }
#if CONFIG_X86_PAE #if CONFIG_X86_PAE
......
...@@ -487,7 +487,18 @@ int proc_pid_statm(struct task_struct *task, char * buffer) ...@@ -487,7 +487,18 @@ int proc_pid_statm(struct task_struct *task, char * buffer)
while (vma) { while (vma) {
pgd_t *pgd = pgd_offset(mm, vma->vm_start); pgd_t *pgd = pgd_offset(mm, vma->vm_start);
int pages = 0, shared = 0, dirty = 0, total = 0; int pages = 0, shared = 0, dirty = 0, total = 0;
if (is_vm_hugetlb_page(vma)) {
int num_pages = ((vma->vm_end - vma->vm_start)/PAGE_SIZE);
resident += num_pages;
if (!(vma->vm_flags & VM_DONTCOPY))
share += num_pages;
if (vma->vm_flags & VM_WRITE)
dt += num_pages;
drs += num_pages;
vma = vma->vm_next;
continue;
}
statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total); statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
resident += pages; resident += pages;
share += shared; share += shared;
......
...@@ -206,6 +206,19 @@ static int meminfo_read_proc(char *page, char **start, off_t off, ...@@ -206,6 +206,19 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
non_flushes non_flushes
); );
#ifdef CONFIG_HUGETLB_PAGE
{
extern unsigned long htlbpagemem, htlbzone_pages;
len += sprintf(page + len,
"HugePages: %8lu\n"
"Available: %8lu\n"
"Size: %8lu kB\n",
htlbzone_pages,
htlbpagemem,
HPAGE_SIZE/1024);
}
#endif
return proc_calc_metrics(page, start, off, count, eof, len); return proc_calc_metrics(page, start, off, count, eof, len);
#undef K #undef K
} }
......
...@@ -44,14 +44,22 @@ typedef struct { unsigned long pte_low, pte_high; } pte_t; ...@@ -44,14 +44,22 @@ typedef struct { unsigned long pte_low, pte_high; } pte_t;
typedef struct { unsigned long long pmd; } pmd_t; typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t; typedef struct { unsigned long long pgd; } pgd_t;
#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
#define HPAGE_SHIFT 21
#else #else
typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pte_low; } pte_t;
typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgd; } pgd_t;
#define pte_val(x) ((x).pte_low) #define pte_val(x) ((x).pte_low)
#define HPAGE_SHIFT 22
#endif #endif
#define PTE_MASK PAGE_MASK #define PTE_MASK PAGE_MASK
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#endif
typedef struct { unsigned long pgprot; } pgprot_t; typedef struct { unsigned long pgprot; } pgprot_t;
#define pmd_val(x) ((x).pmd) #define pmd_val(x) ((x).pmd)
......
...@@ -104,6 +104,7 @@ struct vm_area_struct { ...@@ -104,6 +104,7 @@ struct vm_area_struct {
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ #define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_STACK_FLAGS (0x00000100 | VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT) #define VM_STACK_FLAGS (0x00000100 | VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT)
...@@ -377,6 +378,20 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long ...@@ -377,6 +378,20 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
int __set_page_dirty_buffers(struct page *page); int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page);
#ifdef CONFIG_HUGETLB_PAGE
#define is_vm_hugetlb_page(vma) (vma->vm_flags & VM_HUGETLB)
extern int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
extern int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
extern int free_hugepages(struct vm_area_struct *);
#else
#define is_vm_hugetlb_page(vma) (0)
#define follow_hugetlb_page(mm, vma, pages, vmas, start, len, i) (0)
#define copy_hugetlb_page_range(dst, src, vma) (0)
#define free_hugepages(mpnt) do { } while(0)
#endif
/* /*
* If the mapping doesn't provide a set_page_dirty a_op, then * If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads. * just fall through and assume that it wants buffer_heads.
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
*/ */
#ifndef CONFIG_FORCE_MAX_ZONEORDER #ifndef CONFIG_FORCE_MAX_ZONEORDER
#define MAX_ORDER 10 #define MAX_ORDER 11
#else #else
#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
#endif #endif
......
...@@ -128,6 +128,7 @@ enum ...@@ -128,6 +128,7 @@ enum
KERN_TAINTED=53, /* int: various kernel tainted flags */ KERN_TAINTED=53, /* int: various kernel tainted flags */
KERN_CADPID=54, /* int: PID of the process to notify on CAD */ KERN_CADPID=54, /* int: PID of the process to notify on CAD */
KERN_PIDMAX=55, /* int: PID # limit */ KERN_PIDMAX=55, /* int: PID # limit */
KERN_HUGETLB_PAGE_NUM=56, /* int: Number of available Huge Pages */
}; };
......
...@@ -98,6 +98,11 @@ int proc_dol2crvec(ctl_table *table, int write, struct file *filp, ...@@ -98,6 +98,11 @@ int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
extern int acct_parm[]; extern int acct_parm[];
#endif #endif
#ifdef CONFIG_HUGETLB_PAGE
extern int htlbpage_max;
extern int set_hugetlb_mem_size(int);
#endif
static int parse_table(int *, int, void *, size_t *, void *, size_t, static int parse_table(int *, int, void *, size_t *, void *, size_t,
ctl_table *, void **); ctl_table *, void **);
static int proc_doutsstring(ctl_table *table, int write, struct file *filp, static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
...@@ -258,6 +263,10 @@ static ctl_table kern_table[] = { ...@@ -258,6 +263,10 @@ static ctl_table kern_table[] = {
#endif #endif
{KERN_PIDMAX, "pid_max", &pid_max, sizeof (int), {KERN_PIDMAX, "pid_max", &pid_max, sizeof (int),
0600, NULL, &proc_dointvec}, 0600, NULL, &proc_dointvec},
#ifdef CONFIG_HUGETLB_PAGE
{KERN_HUGETLB_PAGE_NUM, "numhugepages", &htlbpage_max, sizeof(int), 0644, NULL,
&proc_dointvec},
#endif
{0} {0}
}; };
...@@ -897,6 +906,10 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, ...@@ -897,6 +906,10 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
val = -val; val = -val;
buffer += len; buffer += len;
left -= len; left -= len;
#ifdef CONFIG_HUGETLB_PAGE
if (i == &htlbpage_max)
val = set_hugetlb_mem_size(val);
#endif
switch(op) { switch(op) {
case OP_SET: *i = val; break; case OP_SET: *i = val; break;
case OP_AND: *i &= val; break; case OP_AND: *i &= val; break;
......
...@@ -208,6 +208,9 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -208,6 +208,9 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
unsigned long end = vma->vm_end; unsigned long end = vma->vm_end;
unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst, src, vma);
src_pgd = pgd_offset(src, address)-1; src_pgd = pgd_offset(src, address)-1;
dst_pgd = pgd_offset(dst, address)-1; dst_pgd = pgd_offset(dst, address)-1;
...@@ -530,6 +533,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -530,6 +533,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|| !(flags & vma->vm_flags)) || !(flags & vma->vm_flags))
return i ? : -EFAULT; return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &len, i);
continue;
}
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
do { do {
struct page *map; struct page *map;
......
...@@ -1031,10 +1031,14 @@ static struct vm_area_struct *touched_by_munmap(struct mm_struct *mm, ...@@ -1031,10 +1031,14 @@ static struct vm_area_struct *touched_by_munmap(struct mm_struct *mm,
touched = NULL; touched = NULL;
do { do {
struct vm_area_struct *next = mpnt->vm_next; struct vm_area_struct *next = mpnt->vm_next;
mpnt->vm_next = touched; if (!(is_vm_hugetlb_page(mpnt))) {
touched = mpnt; mpnt->vm_next = touched;
mm->map_count--; touched = mpnt;
rb_erase(&mpnt->vm_rb, &mm->mm_rb); rb_erase(&mpnt->vm_rb, &mm->mm_rb);
mm->map_count--;
}
else
free_hugepages(mpnt);
mpnt = next; mpnt = next;
} while (mpnt && mpnt->vm_start < end); } while (mpnt && mpnt->vm_start < end);
*npp = mpnt; *npp = mpnt;
...@@ -1273,7 +1277,10 @@ void exit_mmap(struct mm_struct * mm) ...@@ -1273,7 +1277,10 @@ void exit_mmap(struct mm_struct * mm)
vm_unacct_memory((end - start) >> PAGE_SHIFT); vm_unacct_memory((end - start) >> PAGE_SHIFT);
mm->map_count--; mm->map_count--;
unmap_page_range(tlb, mpnt, start, end); if (!(is_vm_hugetlb_page(mpnt)))
unmap_page_range(tlb, mpnt, start, end);
else
mpnt->vm_ops->close(mpnt);
mpnt = mpnt->vm_next; mpnt = mpnt->vm_next;
} }
......
...@@ -321,6 +321,11 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot ...@@ -321,6 +321,11 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */ /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
if (is_vm_hugetlb_page(vma)) {
error = -EACCES;
goto out;
}
newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC)); newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC));
if ((newflags & ~(newflags >> 4)) & 0xf) { if ((newflags & ~(newflags >> 4)) & 0xf) {
error = -EACCES; error = -EACCES;
......
...@@ -311,6 +311,10 @@ unsigned long do_mremap(unsigned long addr, ...@@ -311,6 +311,10 @@ unsigned long do_mremap(unsigned long addr,
vma = find_vma(current->mm, addr); vma = find_vma(current->mm, addr);
if (!vma || vma->vm_start > addr) if (!vma || vma->vm_start > addr)
goto out; goto out;
if (is_vm_hugetlb_page(vma)) {
ret = -EINVAL;
goto out;
}
/* We can't remap across vm area boundaries */ /* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr) if (old_len > vma->vm_end - addr)
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment