Commit 402b0862 authored by Carsten Otte's avatar Carsten Otte Committed by Avi Kivity

s390: KVM preparation: provide hook to enable pgstes in user pagetable

The SIE instruction on s390 uses the 2nd half of the page table page to
virtualize the storage keys of a guest. This patch offers the s390_enable_sie
function, which reorganizes the page tables of a single-threaded process to
reserve space in the page table:
s390_enable_sie makes sure that the process is single threaded and then uses
dup_mm to create a new mm with reorganized page tables. The old mm is freed
and the process has now a page status extended field after every page table.

Code that wants to exploit pgstes should SELECT CONFIG_PGSTE.

This patch has a small common code hit, namely making dup_mm non-static.

Edit (Carsten): I've modified Martin's patch, following Jeremy Fitzhardinge's
review feedback. Now we do have the prototype for dup_mm in
include/linux/sched.h. Following Martin's suggestion, s390_enable_sie() does now
call task_lock() to prevent race against ptrace modification of mm_users.
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: default avatarCarsten Otte <cotte@de.ibm.com>
Acked-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarAvi Kivity <avi@qumranet.com>
parent 37817f29
...@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK ...@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
default y default y
depends on SMP && PREEMPT depends on SMP && PREEMPT
config PGSTE
bool
default y if KVM
mainmenu "Linux Kernel Configuration" mainmenu "Linux Kernel Configuration"
config S390 config S390
......
...@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p) ...@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
early_param("ipldelay", early_parse_ipldelay); early_param("ipldelay", early_parse_ipldelay);
#ifdef CONFIG_S390_SWITCH_AMODE #ifdef CONFIG_S390_SWITCH_AMODE
#ifdef CONFIG_PGSTE
unsigned int switch_amode = 1;
#else
unsigned int switch_amode = 0; unsigned int switch_amode = 0;
#endif
EXPORT_SYMBOL_GPL(switch_amode); EXPORT_SYMBOL_GPL(switch_amode);
static void set_amode_and_uaccess(unsigned long user_amode, static void set_amode_and_uaccess(unsigned long user_amode,
......
...@@ -30,11 +30,27 @@ ...@@ -30,11 +30,27 @@
#define TABLES_PER_PAGE 4 #define TABLES_PER_PAGE 4
#define FRAG_MASK 15UL #define FRAG_MASK 15UL
#define SECOND_HALVES 10UL #define SECOND_HALVES 10UL
void clear_table_pgstes(unsigned long *table)
{
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
memset(table + 256, 0, PAGE_SIZE/4);
clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
memset(table + 768, 0, PAGE_SIZE/4);
}
#else #else
#define ALLOC_ORDER 2 #define ALLOC_ORDER 2
#define TABLES_PER_PAGE 2 #define TABLES_PER_PAGE 2
#define FRAG_MASK 3UL #define FRAG_MASK 3UL
#define SECOND_HALVES 2UL #define SECOND_HALVES 2UL
void clear_table_pgstes(unsigned long *table)
{
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
memset(table + 256, 0, PAGE_SIZE/2);
}
#endif #endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
...@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
unsigned long *table; unsigned long *table;
unsigned long bits; unsigned long bits;
bits = mm->context.noexec ? 3UL : 1UL; bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
page = NULL; page = NULL;
if (!list_empty(&mm->context.pgtable_list)) { if (!list_empty(&mm->context.pgtable_list)) {
...@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
pgtable_page_ctor(page); pgtable_page_ctor(page);
page->flags &= ~FRAG_MASK; page->flags &= ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page); table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); if (mm->context.pgstes)
clear_table_pgstes(table);
else
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
list_add(&page->lru, &mm->context.pgtable_list); list_add(&page->lru, &mm->context.pgtable_list);
} }
...@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) ...@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page; struct page *page;
unsigned long bits; unsigned long bits;
bits = mm->context.noexec ? 3UL : 1UL; bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT); page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
...@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) ...@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
mm->context.noexec = 0; mm->context.noexec = 0;
update_mm(mm, tsk); update_mm(mm, tsk);
} }
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
struct task_struct *tsk = current;
struct mm_struct *mm;
int rc;
task_lock(tsk);
rc = 0;
if (tsk->mm->context.pgstes)
goto unlock;
rc = -EINVAL;
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
goto unlock;
tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
mm = dup_mm(tsk);
tsk->mm->context.pgstes = 0;
rc = -ENOMEM;
if (!mm)
goto unlock;
mmput(tsk->mm);
tsk->mm = tsk->active_mm = mm;
preempt_disable();
update_mm(mm, tsk);
cpu_set(smp_processor_id(), mm->cpu_vm_mask);
preempt_enable();
rc = 0;
unlock:
task_unlock(tsk);
return rc;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
...@@ -7,6 +7,7 @@ typedef struct { ...@@ -7,6 +7,7 @@ typedef struct {
unsigned long asce_bits; unsigned long asce_bits;
unsigned long asce_limit; unsigned long asce_limit;
int noexec; int noexec;
int pgstes;
} mm_context_t; } mm_context_t;
#endif #endif
...@@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk, ...@@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION3; mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif #endif
mm->context.noexec = s390_noexec; if (current->mm->context.pgstes) {
mm->context.noexec = 0;
mm->context.pgstes = 1;
} else {
mm->context.noexec = s390_noexec;
mm->context.pgstes = 0;
}
mm->context.asce_limit = STACK_TOP_MAX; mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0; return 0;
......
...@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) ...@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern int add_shared_memory(unsigned long start, unsigned long size); extern int add_shared_memory(unsigned long start, unsigned long size);
extern int remove_shared_memory(unsigned long start, unsigned long size); extern int remove_shared_memory(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
/* /*
* No page table caches to initialise * No page table caches to initialise
......
...@@ -1798,6 +1798,8 @@ extern void mmput(struct mm_struct *); ...@@ -1798,6 +1798,8 @@ extern void mmput(struct mm_struct *);
extern struct mm_struct *get_task_mm(struct task_struct *task); extern struct mm_struct *get_task_mm(struct task_struct *task);
/* Remove the current tasks stale references to the old mm_struct */ /* Remove the current tasks stale references to the old mm_struct */
extern void mm_release(struct task_struct *, struct mm_struct *); extern void mm_release(struct task_struct *, struct mm_struct *);
/* Allocate a new mm structure and copy contents from tsk->mm */
extern struct mm_struct *dup_mm(struct task_struct *tsk);
extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void); extern void flush_thread(void);
......
...@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) ...@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* Allocate a new mm structure and copy contents from the * Allocate a new mm structure and copy contents from the
* mm structure of the passed in task structure. * mm structure of the passed in task structure.
*/ */
static struct mm_struct *dup_mm(struct task_struct *tsk) struct mm_struct *dup_mm(struct task_struct *tsk)
{ {
struct mm_struct *mm, *oldmm = current->mm; struct mm_struct *mm, *oldmm = current->mm;
int err; int err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment