Commit 42aef4b0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] s390: system call speedup part 1.

From: Martin Schwidefsky <schwidefsky@de.ibm.com>

The purpose of this is to speed up system calls on s390.  I managed to
squeeze about 65 cycles from each system call.  This improved e.g.  getpid()
from 232 to 157 cycles.  As a nice side-effect it simplified the uaccess
functions considerably.
parent ff3d0776
......@@ -17,10 +17,10 @@
int main(void)
{
DEFINE(__THREAD_info, offsetof(struct task_struct, thread_info),);
DEFINE(__THREAD_ar2, offsetof(struct task_struct, thread.ar2),);
DEFINE(__THREAD_ar4, offsetof(struct task_struct, thread.ar4),);
DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),);
DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),);
DEFINE(__THREAD_mm_segment,
offsetof(struct task_struct, thread.mm_segment),);
BLANK();
DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),);
DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),);
......@@ -31,5 +31,12 @@ int main(void)
DEFINE(__TI_flags, offsetof(struct thread_info, flags),);
DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),);
DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),);
BLANK();
DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),);
DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),);
DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),);
DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),);
DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),);
DEFINE(__PT_SIZE, sizeof(struct pt_regs),);
return 0;
}
......@@ -57,18 +57,7 @@
/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
now struct_user_regs, they are different) */
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
{ \
int i; \
memcpy(&pr_reg.psw.mask, &regs->psw.mask, 4); \
memcpy(&pr_reg.psw.addr, ((char*)&regs->psw.addr)+4, 4); \
for(i=0; i<NUM_GPRS; i++) \
pr_reg.gprs[i] = regs->gprs[i]; \
for(i=0; i<NUM_ACRS; i++) \
pr_reg.acrs[i] = regs->acrs[i]; \
pr_reg.orig_gpr2 = regs->orig_gpr2; \
}
#define ELF_CORE_COPY_REGS(pr_reg, regs) dump_regs32(regs, &pr_reg);
/* This yields a mask that user programs can use to figure out what
......@@ -107,6 +96,18 @@ typedef struct
} s390_regs32;
typedef s390_regs32 elf_gregset_t;
static inline int dump_regs32(struct pt_regs *ptregs, elf_gregset_t *regs)
{
int i;
memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
memcpy(&regs->psw.addr, &ptregs->psw.addr, 4);
for (i = 0; i < NUM_GPRS; i++)
regs->gprs[i] = ptregs->gprs[i];
regs->orig_gpr2 = ptregs->orig_gpr2;
return 1;
}
#include <asm/processor.h>
#include <linux/module.h>
#include <linux/config.h>
......
......@@ -297,7 +297,8 @@ static int save_sigregs32(struct pt_regs *regs,_sigregs32 *sregs)
regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr;
for (i = 0; i < NUM_GPRS; i++)
regs32.gprs[i] = (__u32) regs->gprs[i];
memcpy(regs32.acrs, regs->acrs, sizeof(regs32.acrs));
save_access_regs(current->thread.acrs);
memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs));
err = __copy_to_user(&sregs->regs, &regs32, sizeof(regs32));
if (err)
return err;
......@@ -323,7 +324,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 *sregs)
regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
for (i = 0; i < NUM_GPRS; i++)
regs->gprs[i] = (__u64) regs32.gprs[i];
memcpy(regs->acrs, regs32.acrs, sizeof(regs32.acrs));
memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs));
restore_access_regs(current->thread.acrs);
err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
sizeof(_s390_fp_regs32));
......
This diff is collapsed.
This diff is collapsed.
......@@ -179,7 +179,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
memset(&regs, 0, sizeof(regs));
regs.psw.mask = PSW_KERNEL_BITS;
regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
regs.gprs[7] = STACK_FRAME_OVERHEAD;
regs.gprs[7] = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
regs.gprs[8] = __LC_KERNEL_STACK;
regs.gprs[9] = (unsigned long) fn;
regs.gprs[10] = (unsigned long) arg;
......@@ -230,6 +230,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
(THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
p->thread.ksp = (unsigned long) frame;
p->set_child_tid = p->clear_child_tid = NULL;
/* Store access registers to kernel stack of new process. */
frame->childregs = *regs;
frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
frame->childregs.gprs[15] = new_stackp;
......@@ -240,6 +241,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
/* fake return stack for resume(), don't go back to schedule */
frame->gprs[9] = (unsigned long) frame;
/* Save access registers to new thread structure. */
save_access_regs(&p->thread.acrs[0]);
#ifndef CONFIG_ARCH_S390X
/*
* save fprs to current->thread.fp_regs to merge them with
......@@ -251,7 +256,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _SEGMENT_TABLE;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS)
frame->childregs.acrs[0] = regs->gprs[6];
p->thread.acrs[0] = regs->gprs[6];
#else /* CONFIG_ARCH_S390X */
/* Save the fpu registers to new thread structure. */
save_fp_regs(&p->thread.fp_regs);
......@@ -259,18 +264,15 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
if (test_thread_flag(TIF_31BIT)) {
frame->childregs.acrs[0] =
(unsigned int) regs->gprs[6];
p->thread.acrs[0] = (unsigned int) regs->gprs[6];
} else {
frame->childregs.acrs[0] =
(unsigned int)(regs->gprs[6] >> 32);
frame->childregs.acrs[1] =
(unsigned int) regs->gprs[6];
p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32);
p->thread.acrs[1] = (unsigned int) regs->gprs[6];
}
}
#endif /* CONFIG_ARCH_S390X */
/* start new process with ar4 pointing to the correct address space */
p->thread.ar4 = get_fs().ar4;
p->thread.mm_segment = get_fs();
/* Don't copy debug registers */
memset(&p->thread.per_info,0,sizeof(p->thread.per_info));
......
......@@ -137,25 +137,36 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
if ((addr & 3) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
if (addr <= (addr_t) &dummy->regs.orig_gpr2) {
if (addr < (addr_t) &dummy->regs.acrs) {
/*
* psw, gprs, acrs and orig_gpr2 are stored on the stack
* psw and gprs are stored on the stack
*/
tmp = *(addr_t *)((addr_t) __KSTK_PTREGS(child) + addr);
if (addr == (addr_t) &dummy->regs.psw.mask)
/* Remove per bit from user psw. */
tmp &= ~PSW_MASK_PER;
} else if (addr >= (addr_t) &dummy->regs.fp_regs &&
addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
} else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
/*
* access registers are stored in the thread structure
*/
offset = addr - (addr_t) &dummy->regs.acrs;
tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
} else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
/*
* orig_gpr2 is stored on the kernel stack
*/
tmp = (addr_t) __KSTK_PTREGS(child)->orig_gpr2;
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
*/
offset = addr - (addr_t) &dummy->regs.fp_regs;
tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
} else if (addr >= (addr_t) &dummy->regs.per_info &&
addr < (addr_t) (&dummy->regs.per_info + 1)) {
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
* per_info is found in the thread structure
*/
......@@ -187,9 +198,9 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
if ((addr & 3) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
if (addr <= (addr_t) &dummy->regs.orig_gpr2) {
if (addr < (addr_t) &dummy->regs.acrs) {
/*
* psw, gprs, acrs and orig_gpr2 are stored on the stack
* psw and gprs are stored on the stack
*/
if (addr == (addr_t) &dummy->regs.psw.mask &&
#ifdef CONFIG_S390_SUPPORT
......@@ -206,8 +217,20 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
#endif
*(addr_t *)((addr_t) __KSTK_PTREGS(child) + addr) = data;
} else if (addr >= (addr_t) &dummy->regs.fp_regs &&
addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
offset = addr - (addr_t) &dummy->regs.acrs;
*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
} else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
/*
* orig_gpr2 is stored on the kernel stack
*/
__KSTK_PTREGS(child)->orig_gpr2 = data;
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
*/
......@@ -217,8 +240,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
offset = addr - (addr_t) &dummy->regs.fp_regs;
*(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
} else if (addr >= (addr_t) &dummy->regs.per_info &&
addr < (addr_t) (&dummy->regs.per_info + 1)) {
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
* per_info is found in the thread structure
*/
......@@ -324,9 +346,9 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
(addr & 3) || addr > sizeof(struct user) - 3)
return -EIO;
if (addr <= (addr_t) &dummy32->regs.orig_gpr2) {
if (addr < (addr_t) &dummy32->regs.acrs) {
/*
* psw, gprs, acrs and orig_gpr2 are stored on the stack
* psw and gprs are stored on the stack
*/
if (addr == (addr_t) &dummy32->regs.psw.mask) {
/* Fake a 31 bit psw mask. */
......@@ -336,28 +358,32 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
/* Fake a 31 bit psw address. */
tmp = (__u32) __KSTK_PTREGS(child)->psw.addr |
PSW32_ADDR_AMODE31;
} else if (addr < (addr_t) &dummy32->regs.acrs[0]) {
} else {
/* gpr 0-15 */
tmp = *(__u32 *)((addr_t) __KSTK_PTREGS(child) +
addr*2 + 4);
} else if (addr < (addr_t) &dummy32->regs.orig_gpr2) {
offset = PT_ACR0 + addr - (addr_t) &dummy32->regs.acrs;
tmp = *(__u32*)((addr_t) __KSTK_PTREGS(child) + offset);
} else {
/* orig gpr 2 */
offset = PT_ORIGGPR2 + 4;
tmp = *(__u32*)((addr_t) __KSTK_PTREGS(child) + offset);
}
} else if (addr >= (addr_t) &dummy32->regs.fp_regs &&
addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
offset = addr - (addr_t) &dummy32->regs.acrs;
tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
} else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
tmp = *(__u32*)((addr_t) &__KSTK_PTREGS(child)->orig_gpr2 + 4);
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs;
tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset);
} else if (addr >= (addr_t) &dummy32->regs.per_info &&
addr < (addr_t) (&dummy32->regs.per_info + 1)) {
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
* per_info is found in the thread structure
*/
......@@ -396,7 +422,7 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
tmp = (__u32) data;
if (addr <= (addr_t) &dummy32->regs.orig_gpr2) {
if (addr < (addr_t) &dummy32->regs.acrs) {
/*
* psw, gprs, acrs and orig_gpr2 are stored on the stack
*/
......@@ -411,19 +437,25 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
/* Build a 64 bit psw address from 31 bit address. */
__KSTK_PTREGS(child)->psw.addr =
(__u64) tmp & PSW32_ADDR_INSN;
} else if (addr < (addr_t) &dummy32->regs.acrs[0]) {
} else {
/* gpr 0-15 */
*(__u32*)((addr_t) __KSTK_PTREGS(child) + addr*2 + 4) =
tmp;
} else if (addr < (addr_t) &dummy32->regs.orig_gpr2) {
offset = PT_ACR0 + addr - (addr_t) &dummy32->regs.acrs;
*(__u32*)((addr_t) __KSTK_PTREGS(child) + offset) = tmp;
} else {
offset = PT_ORIGGPR2 + 4;
*(__u32*)((addr_t) __KSTK_PTREGS(child) + offset) = tmp;
}
} else if (addr >= (addr_t) &dummy32->regs.fp_regs &&
addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
offset = addr - (addr_t) &dummy32->regs.acrs;
*(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
} else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
*(__u32*)((addr_t) &__KSTK_PTREGS(child)->orig_gpr2 + 4) = tmp;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
*/
......@@ -434,8 +466,7 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
offset = addr - (addr_t) &dummy32->regs.fp_regs;
*(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
} else if (addr >= (addr_t) &dummy32->regs.per_info &&
addr < (addr_t) (&dummy32->regs.per_info + 1)) {
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
* per_info is found in the thread structure.
*/
......
......@@ -33,6 +33,8 @@ EXPORT_SYMBOL_NOVERS(_sb_findmap);
EXPORT_SYMBOL_NOVERS(__copy_from_user_asm);
EXPORT_SYMBOL_NOVERS(__copy_to_user_asm);
EXPORT_SYMBOL_NOVERS(__clear_user_asm);
EXPORT_SYMBOL_NOVERS(__strncpy_from_user_asm);
EXPORT_SYMBOL_NOVERS(__strnlen_user_asm);
EXPORT_SYMBOL(diag10);
/*
......
......@@ -496,7 +496,7 @@ void __init setup_arch(char **cmdline_p)
lc->external_new_psw.mask = PSW_KERNEL_BITS;
lc->external_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
lc->svc_new_psw.mask = PSW_KERNEL_BITS;
lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
lc->program_new_psw.mask = PSW_KERNEL_BITS;
lc->program_new_psw.addr =
......@@ -512,6 +512,7 @@ void __init setup_arch(char **cmdline_p)
lc->async_stack = (unsigned long)
__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
#ifdef CONFIG_ARCH_S390X
if (MACHINE_HAS_DIAG44)
lc->diag44_opcode = 0x83000044;
......
......@@ -151,11 +151,18 @@ static int save_sigregs(struct pt_regs *regs, _sigregs *sregs)
unsigned long old_mask = regs->psw.mask;
int err;
save_access_regs(current->thread.acrs);
/* Copy a 'clean' PSW mask to the user to avoid leaking
information about whether PER is currently on. */
regs->psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask);
err = __copy_to_user(&sregs->regs, regs, sizeof(_s390_regs_common));
err = __copy_to_user(&sregs->regs.psw, &regs->psw,
sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs));
regs->psw.mask = old_mask;
if (err != 0)
return err;
err = __copy_to_user(&sregs->regs.acrs, current->thread.acrs,
sizeof(sregs->regs.acrs));
if (err != 0)
return err;
/*
......@@ -176,11 +183,17 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs *sregs)
/* Alwys make any pending restarted system call return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
err = __copy_from_user(regs, &sregs->regs, sizeof(_s390_regs_common));
err = __copy_from_user(&regs->psw, &sregs->regs.psw,
sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs));
regs->psw.mask = PSW_MASK_MERGE(old_mask, regs->psw.mask);
regs->psw.addr |= PSW_ADDR_AMODE;
if (err)
return err;
err = __copy_from_user(&current->thread.acrs, &sregs->regs.acrs,
sizeof(sregs->regs.acrs));
if (err)
return err;
restore_access_regs(current->thread.acrs);
err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
sizeof(s390_fp_regs));
......
......@@ -173,6 +173,10 @@ void show_registers(struct pt_regs *regs)
printk(" " FOURLONG,
regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
#if 0
/* FIXME: this isn't needed any more but it changes the ksymoops
* input. To remove or not to remove ... */
save_access_regs(regs->acrs);
printk("%s ACRS: %08x %08x %08x %08x\n", mode,
regs->acrs[0], regs->acrs[1], regs->acrs[2], regs->acrs[3]);
printk(" %08x %08x %08x %08x\n",
......@@ -181,6 +185,7 @@ void show_registers(struct pt_regs *regs)
regs->acrs[8], regs->acrs[9], regs->acrs[10], regs->acrs[11]);
printk(" %08x %08x %08x %08x\n",
regs->acrs[12], regs->acrs[13], regs->acrs[14], regs->acrs[15]);
#endif
/*
* Print the first 20 byte of the instruction stream at the
......@@ -229,17 +234,17 @@ char *task_show_regs(struct task_struct *task, char *buffer)
regs->gprs[12], regs->gprs[13],
regs->gprs[14], regs->gprs[15]);
buffer += sprintf(buffer, "User ACRS: %08x %08x %08x %08x\n",
regs->acrs[0], regs->acrs[1],
regs->acrs[2], regs->acrs[3]);
task->thread.acrs[0], task->thread.acrs[1],
task->thread.acrs[2], task->thread.acrs[3]);
buffer += sprintf(buffer, " %08x %08x %08x %08x\n",
regs->acrs[4], regs->acrs[5],
regs->acrs[6], regs->acrs[7]);
task->thread.acrs[4], task->thread.acrs[5],
task->thread.acrs[6], task->thread.acrs[7]);
buffer += sprintf(buffer, " %08x %08x %08x %08x\n",
regs->acrs[8], regs->acrs[9],
regs->acrs[10], regs->acrs[11]);
task->thread.acrs[8], task->thread.acrs[9],
task->thread.acrs[10], task->thread.acrs[11]);
buffer += sprintf(buffer, " %08x %08x %08x %08x\n",
regs->acrs[12], regs->acrs[13],
regs->acrs[14], regs->acrs[15]);
task->thread.acrs[12], task->thread.acrs[13],
task->thread.acrs[14], task->thread.acrs[15]);
return buffer;
}
......
......@@ -9,135 +9,191 @@
* These functions have standard call interface
*/
#include <linux/errno.h>
#include <asm/lowcore.h>
#include <asm/offsets.h>
.text
.align 4
.globl __copy_from_user_asm
# %r2 = to, %r3 = n, %r4 = from
__copy_from_user_asm:
lr %r5,%r3
sacf 512
0: mvcle %r2,%r4,0
jo 0b
1: sacf 0
lr %r2,%r5
slr %r0,%r0
0: mvcp 0(%r3,%r2),0(%r4),%r0
jnz 1f
slr %r2,%r2
br %r14
1: la %r2,256(%r2)
la %r4,256(%r4)
ahi %r3,-256
2: mvcp 0(%r3,%r2),0(%r4),%r0
jnz 1b
3: slr %r2,%r2
br %r14
4: lhi %r0,-4096
lr %r5,%r4
slr %r5,%r0
nr %r5,%r0 # %r5 = (%r4 + 4096) & -4096
slr %r5,%r4 # %r5 = #bytes to next user page boundary
clr %r3,%r5 # copy crosses next page boundary ?
jnh 6f # no, the current page faulted
# move with the reduced length which is < 256
5: mvcp 0(%r5,%r2),0(%r4),%r0
slr %r3,%r5
6: lr %r2,%r3
br %r14
2: lhi %r1,-4096
lr %r3,%r4
slr %r3,%r1 # %r3 = %r4 + 4096
nr %r3,%r1 # %r3 = (%r4 + 4096) & -4096
slr %r3,%r4 # %r3 = #bytes to next user page boundary
clr %r5,%r3 # copy crosses next page boundary ?
jnh 1b # no, this page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
3: mvcle %r2,%r4,0
jo 3b
j 1b
.section __ex_table,"a"
.long 0b,2b
.long 3b,1b
.long 0b,4b
.long 2b,4b
.long 5b,6b
.previous
.align 4
.text
.globl __copy_to_user_asm
# %r2 = from, %r3 = n, %r4 = to
__copy_to_user_asm:
lr %r5,%r3
sacf 512
0: mvcle %r4,%r2,0
jo 0b
1: sacf 0
lr %r2,%r3
slr %r0,%r0
0: mvcs 0(%r3,%r4),0(%r2),%r0
jnz 1f
slr %r2,%r2
br %r14
2: lhi %r1,-4096
1: la %r2,256(%r2)
la %r4,256(%r4)
ahi %r3,-256
2: mvcs 0(%r3,%r4),0(%r2),%r0
jnz 1b
3: slr %r2,%r2
br %r14
4: lhi %r0,-4096
lr %r5,%r4
slr %r5,%r1 # %r5 = %r4 + 4096
nr %r5,%r1 # %r5 = (%r4 + 4096) & -4096
slr %r5,%r4 # %r5 = #bytes to next user page boundary
clr %r3,%r5 # copy crosses next page boundary ?
jnh 1b # no, the current page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
3: mvcle %r4,%r2,0
jo 3b
j 1b
slr %r5,%r0
nr %r5,%r0 # %r5 = (%r4 + 4096) & -4096
slr %r5,%r4 # %r5 = #bytes to next user page boundary
clr %r3,%r5 # copy crosses next page boundary ?
jnh 6f # no, the current page faulted
# move with the reduced length which is < 256
5: mvcs 0(%r5,%r4),0(%r2),%r0
slr %r3,%r5
6: lr %r2,%r3
br %r14
.section __ex_table,"a"
.long 0b,2b
.long 3b,1b
.long 0b,4b
.long 2b,4b
.long 5b,6b
.previous
.align 4
.text
.globl __copy_in_user_asm
# %r2 = from, %r3 = n, %r4 = to
__copy_in_user_asm:
stm %r6,%r15,24(%r15)
lr %r5,%r3
lr %r7,%r3
lr %r6,%r2
cpya 6,4 # ar6 = ar4
sacf 512
0: mvcle %r4,%r6,0
jo 0b
1: sacf 0
lr %r2,%r7
lm %r6,%r15,24(%r15)
sacf 256
bras 1,1f
mvc 0(1,%r4),0(%r2)
0: mvc 0(256,%r4),0(%r2)
la %r2,256(%r2)
la %r4,256(%r4)
1: ahi %r3,-256
jnm 0b
2: ex %r3,0(%r1)
sacf 0
slr %r2,%r2
br 14
3: mvc 0(1,%r4),0(%r2)
la %r2,1(%r2)
la %r4,1(%r4)
ahi %r3,-1
jnm 3b
4: lr %r2,%r3
sacf 0
br %r14
2: lhi %r1,-4096
lr %r5,%r4
slr %r5,%r1 # %r5 = %r4 + 4096
nr %r5,%r1 # %r5 = (%r4 + 4096) & -4096
slr %r5,%r4 # %r5 = #bytes to next user page boundary
clr %r7,%r5 # copy crosses next page boundary ?
jnh 1b # no, the current page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
3: mvcle %r4,%r6,0
jo 3b
j 1b
.section __ex_table,"a"
.long 0b,2b
.long 3b,1b
.long 0b,3b
.long 2b,3b
.long 3b,4b
.previous
.align 4
.text
.globl __clear_user_asm
# %r2 = to, %r3 = n
__clear_user_asm:
bras %r5,0f
.long empty_zero_page
0: l %r5,0(%r5)
slr %r0,%r0
1: mvcs 0(%r3,%r2),0(%r5),%r0
jnz 2f
slr %r2,%r2
br %r14
2: la %r2,256(%r2)
ahi %r3,-256
3: mvcs 0(%r3,%r2),0(%r5),%r0
jnz 2b
4: slr %r2,%r2
br %r14
5: lhi %r0,-4096
lr %r4,%r2
lr %r5,%r3
sr %r2,%r2
sr %r3,%r3
sacf 512
0: mvcle %r4,%r2,0
jo 0b
1: sacf 0
slr %r4,%r0
nr %r4,%r0 # %r4 = (%r2 + 4096) & -4096
slr %r4,%r2 # %r4 = #bytes to next user page boundary
clr %r3,%r4 # clear crosses next page boundary ?
jnh 7f # no, the current page faulted
# clear with the reduced length which is < 256
6: mvcs 0(%r4,%r2),0(%r5),%r0
slr %r3,%r4
7: lr %r2,%r3
br %r14
2: lr %r2,%r5
lhi %r1,-4096
slr %r5,%r1 # %r5 = %r4 + 4096
nr %r5,%r1 # %r5 = (%r4 + 4096) & -4096
slr %r5,%r4 # %r5 = #bytes to next user page boundary
clr %r2,%r5 # copy crosses next page boundary ?
jnh 1b # no, the current page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
slr %r2,%r5
3: mvcle %r4,%r2,0
jo 3b
j 1b
4: alr %r2,%r5
j 1b
.section __ex_table,"a"
.long 0b,2b
.long 3b,4b
.long 1b,5b
.long 3b,5b
.long 6b,7b
.previous
.align 4
.text
.globl __strncpy_from_user_asm
# %r2 = dst, %r3 = src, %r4 = count
__strncpy_from_user_asm:
lhi %r0,0
lhi %r1,1
lhi %r5,0
0: mvcp 0(%r1,%r2),0(%r3),%r0
tm 0(%r2),0xff
jz 1f
la %r2,1(%r2)
la %r3,1(%r3)
ahi %r5,1
clr %r5,%r4
jl 0b
1: lr %r2,%r5
br %r14
2: lhi %r2,-EFAULT
br %r14
.section __ex_table,"a"
.long 0b,2b
.previous
.align 4
.text
.globl __strnlen_user_asm
# %r2 = src, %r3 = count
__strnlen_user_asm:
lhi %r0,0
lhi %r1,1
lhi %r5,0
0: mvcp 24(%r1,%r15),0(%r2),%r0
ahi %r5,1
tm 24(%r15),0xff
jz 1f
la %r2,1(%r2)
clr %r5,%r3
jl 0b
1: lr %r2,%r5
br %r14
2: lhi %r2,-EFAULT
br %r14
.section __ex_table,"a"
.long 0b,2b
.previous
......@@ -9,134 +9,189 @@
* These functions have standard call interface
*/
#include <linux/errno.h>
#include <asm/lowcore.h>
#include <asm/offsets.h>
.text
.align 4
.globl __copy_from_user_asm
# %r2 = to, %r3 = n, %r4 = from
__copy_from_user_asm:
lgr %r5,%r3
sacf 512
0: mvcle %r2,%r4,0
jo 0b
1: sacf 0
lgr %r2,%r5
slgr %r0,%r0
0: mvcp 0(%r3,%r2),0(%r4),%r0
jnz 1f
slgr %r2,%r2
br %r14
1: la %r2,256(%r2)
la %r4,256(%r4)
aghi %r3,-256
2: mvcp 0(%r3,%r2),0(%r4),%r0
jnz 1b
3: slgr %r2,%r2
br %r14
4: lghi %r0,-4096
lgr %r5,%r4
slgr %r5,%r0
ngr %r5,%r0 # %r5 = (%r4 + 4096) & -4096
slgr %r5,%r4 # %r5 = #bytes to next user page boundary
clgr %r3,%r5 # copy crosses next page boundary ?
jnh 6f # no, the current page faulted
# move with the reduced length which is < 256
5: mvcp 0(%r5,%r2),0(%r4),%r0
slgr %r3,%r5
6: lgr %r2,%r3
br %r14
2: lghi %r1,-4096
lgr %r3,%r4
slgr %r3,%r1 # %r3 = %r4 + 4096
ngr %r3,%r1 # %r3 = (%r4 + 4096) & -4096
slgr %r3,%r4 # %r3 = #bytes to next user page boundary
clgr %r5,%r3 # copy crosses next page boundary ?
jnh 1b # no, this page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
3: mvcle %r2,%r4,0
jo 3b
j 1b
.section __ex_table,"a"
.quad 0b,2b
.quad 3b,1b
.quad 0b,4b
.quad 2b,4b
.quad 5b,6b
.previous
.align 4
.text
.globl __copy_to_user_asm
# %r2 = from, %r3 = n, %r4 = to
__copy_to_user_asm:
lgr %r5,%r3
sacf 512
0: mvcle %r4,%r2,0
jo 0b
1: sacf 0
lgr %r2,%r3
slgr %r0,%r0
0: mvcs 0(%r3,%r4),0(%r2),%r0
jnz 1f
slgr %r2,%r2
br %r14
2: lghi %r1,-4096
1: la %r2,256(%r2)
la %r4,256(%r4)
aghi %r3,-256
2: mvcs 0(%r3,%r4),0(%r2),%r0
jnz 1b
3: slgr %r2,%r2
br %r14
4: lghi %r0,-4096
lgr %r5,%r4
slgr %r5,%r1 # %r5 = %r4 + 4096
ngr %r5,%r1 # %r5 = (%r4 + 4096) & -4096
slgr %r5,%r4 # %r5 = #bytes to next user page boundary
clgr %r3,%r5 # copy crosses next page boundary ?
jnh 1b # no, the current page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
3: mvcle %r4,%r2,0
jo 3b
j 1b
slgr %r5,%r0
ngr %r5,%r0 # %r5 = (%r4 + 4096) & -4096
slgr %r5,%r4 # %r5 = #bytes to next user page boundary
clgr %r3,%r5 # copy crosses next page boundary ?
jnh 6f # no, the current page faulted
# move with the reduced length which is < 256
5: mvcs 0(%r5,%r4),0(%r2),%r0
slgr %r3,%r5
6: lgr %r2,%r3
br %r14
.section __ex_table,"a"
.quad 0b,2b
.quad 3b,1b
.quad 0b,4b
.quad 2b,4b
.quad 5b,6b
.previous
.align 4
.text
.globl __copy_in_user_asm
# %r2 = from, %r3 = n, %r4 = to
__copy_in_user_asm:
stmg %r6,%r15,48(%r15)
lgr %r5,%r3
lgr %r7,%r5
lgr %r6,%r2
cpya 6,4 # ar6 = ar4
sacf 512
0: mvcle %r4,%r6,0
jo 0b
1: sacf 0
lgr %r2,%r7
lmg %r6,%r15,48(%r15)
sacf 256
bras 1,1f
mvc 0(1,%r4),0(%r2)
0: mvc 0(256,%r4),0(%r2)
la %r2,256(%r2)
la %r4,256(%r4)
1: aghi %r3,-256
jnm 0b
2: ex %r3,0(%r1)
sacf 0
slgr %r2,%r2
br 14
3: mvc 0(1,%r4),0(%r2)
la %r2,1(%r2)
la %r4,1(%r4)
aghi %r3,-1
jnm 3b
4: lgr %r2,%r3
sacf 0
br %r14
2: lghi %r1,-4096
lgr %r5,%r4
slgr %r5,%r1 # %r5 = %r4 + 4096
ngr %r5,%r1 # %r5 = (%r4 + 4096) & -4096
slgr %r5,%r4 # %r5 = #bytes to next user page boundary
clgr %r7,%r5 # copy crosses next page boundary ?
jnh 1b # no, the current page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
3: mvcle %r4,%r6,0
jo 3b
j 1b
.section __ex_table,"a"
.quad 0b,2b
.quad 3b,1b
.quad 0b,3b
.quad 2b,3b
.quad 3b,4b
.previous
.align 4
.text
.globl __clear_user_asm
# %r2 = to, %r3 = n
__clear_user_asm:
slgr %r0,%r0
larl %r5,empty_zero_page
1: mvcs 0(%r3,%r2),0(%r5),%r0
jnz 2f
slgr %r2,%r2
br %r14
2: la %r2,256(%r2)
aghi %r3,-256
3: mvcs 0(%r3,%r2),0(%r5),%r0
jnz 2b
4: slgr %r2,%r2
br %r14
5: lghi %r0,-4096
lgr %r4,%r2
lgr %r5,%r3
sgr %r2,%r2
sgr %r3,%r3
sacf 512
0: mvcle %r4,%r2,0
jo 0b
1: sacf 0
slgr %r4,%r0
ngr %r4,%r0 # %r4 = (%r2 + 4096) & -4096
slgr %r4,%r2 # %r4 = #bytes to next user page boundary
clgr %r3,%r4 # clear crosses next page boundary ?
jnh 7f # no, the current page faulted
# clear with the reduced length which is < 256
6: mvcs 0(%r4,%r2),0(%r5),%r0
slgr %r3,%r4
7: lgr %r2,%r3
br %r14
2: lgr %r2,%r5
lghi %r1,-4096
slgr %r5,%r1 # %r5 = %r4 + 4096
ngr %r5,%r1 # %r5 = (%r4 + 4096) & -4096
slgr %r5,%r4 # %r5 = #bytes to next user page boundary
clgr %r2,%r5 # copy crosses next page boundary ?
jnh 1b # no, the current page fauled
# The page after the current user page might have faulted.
# We cant't find out which page because the program check handler
# might have callled schedule, destroying all lowcore information.
# We retry with the shortened length.
slgr %r2,%r5
3: mvcle %r4,%r2,0
jo 3b
j 1b
4: algr %r2,%r5
j 1b
.section __ex_table,"a"
.quad 0b,2b
.quad 3b,4b
.quad 1b,5b
.quad 3b,5b
.quad 6b,7b
.previous
.align 4
.text
.globl __strncpy_from_user_asm
# %r2 = dst, %r3 = src, %r4 = count
__strncpy_from_user_asm:
lghi %r0,0
lghi %r1,1
lghi %r5,0
0: mvcp 0(%r1,%r2),0(%r3),%r0
tm 0(%r2),0xff
jz 1f
la %r2,1(%r2)
la %r3,1(%r3)
aghi %r5,1
clgr %r5,%r4
jl 0b
1: lgr %r2,%r5
br %r14
2: lghi %r2,-EFAULT
br %r14
.section __ex_table,"a"
.quad 0b,2b
.previous
.align 4
.text
.globl __strnlen_user_asm
# %r2 = src, %r3 = count
__strnlen_user_asm:
lghi %r0,0
lghi %r1,1
lghi %r5,0
0: mvcp 24(%r1,%r15),0(%r2),%r0
aghi %r5,1
tm 24(%r15),0xff
jz 1f
la %r2,1(%r2)
clgr %r5,%r3
jl 0b
1: lgr %r2,%r5
br %r14
2: lghi %r2,-EFAULT
br %r14
.section __ex_table,"a"
.quad 0b,2b
.previous
......@@ -87,12 +87,12 @@ static int __check_access_register(struct pt_regs *regs, int error_code)
if (areg == 0)
/* Access via access register 0 -> kernel address */
return 0;
if (regs && areg < NUM_ACRS && regs->acrs[areg] <= 1)
if (regs && areg < NUM_ACRS && current->thread.acrs[areg] <= 1)
/*
* access register contains 0 -> kernel address,
* access register contains 1 -> user space address
*/
return regs->acrs[areg];
return current->thread.acrs[areg];
/* Something unhealthy was done with the access registers... */
die("page fault via unknown access register", regs, error_code);
......@@ -115,8 +115,10 @@ static inline int check_user_space(struct pt_regs *regs, int error_code)
* 3: Home Segment Table Descriptor
*/
int descriptor = S390_lowcore.trans_exc_code & 3;
if (descriptor == 1)
if (descriptor == 1) {
save_access_regs(current->thread.acrs);
return __check_access_register(regs, error_code);
}
return descriptor >> 1;
}
......
......@@ -138,6 +138,8 @@ void __init paging_init(void)
}
}
S390_lowcore.kernel_asce = pgdir_k;
/* enable virtual mapping in kernel mode */
__asm__ __volatile__(" LCTL 1,1,%0\n"
" LCTL 7,7,%0\n"
......@@ -223,6 +225,8 @@ void __init paging_init(void)
}
}
S390_lowcore.kernel_asce = pgdir_k;
/* enable virtual mapping in kernel mode */
__asm__ __volatile__("lctlg 1,1,%0\n\t"
"lctlg 7,7,%0\n\t"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment