Commit f244d910 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-s390-20140130' of...

Merge tag 'kvm-s390-20140130' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

Two new features are added by this patch set:
- The floating interrupt controller (flic) that allows us to inject,
  clear and inspect non-vcpu local interrupts. This also gives us an
  opportunity to fix deficiencies in our existing interrupt definitions.
- Support for asynchronous page faults via the pfault mechanism. Testing
  show significant guest performance improvements under host swap.
parents 4f34d683 536336c2
FLIC (floating interrupt controller)
====================================
FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
machine check interruptions. All interrupts are stored in a per-vm list of
pending interrupts. FLIC performs operations on this list.
Only one FLIC instance may be instantiated.
FLIC provides support to
- add interrupts (KVM_DEV_FLIC_ENQUEUE)
- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
- enable/disable for the guest transparent async page faults
Groups:
KVM_DEV_FLIC_ENQUEUE
Passes a buffer and length into the kernel which are then injected into
the list of pending interrupts.
attr->addr contains the pointer to the buffer and attr->attr contains
the length of the buffer.
The format of the data structure kvm_s390_irq as it is copied from userspace
is defined in usr/include/linux/kvm.h.
KVM_DEV_FLIC_GET_ALL_IRQS
Copies all floating interrupts into a buffer provided by userspace.
When the buffer is too small it returns -ENOMEM, which is the indication
for userspace to try again with a bigger buffer.
All interrupts remain pending, i.e. are not deleted from the list of
currently pending interrupts.
attr->addr contains the userspace address of the buffer into which all
interrupt data will be copied.
attr->attr contains the size of the buffer in bytes.
KVM_DEV_FLIC_CLEAR_IRQS
Simply deletes all elements from the list of currently pending floating
interrupts. No interrupts are injected into the guest.
KVM_DEV_FLIC_APF_ENABLE
Enables async page faults for the guest. So in case of a major page fault
the host is allowed to handle this async and continues the guest.
KVM_DEV_FLIC_APF_DISABLE_WAIT
Disables async page faults for the guest and waits until already pending
async page faults are done. This is necessary to trigger a completion interrupt
for every init interrupt before migrating the interrupt list.
......@@ -16,6 +16,7 @@
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
......@@ -168,18 +169,6 @@ struct kvm_vcpu_stat {
u32 diagnose_9c;
};
struct kvm_s390_io_info {
__u16 subchannel_id; /* 0x0b8 */
__u16 subchannel_nr; /* 0x0ba */
__u32 io_int_parm; /* 0x0bc */
__u32 io_int_word; /* 0x0c0 */
};
struct kvm_s390_ext_info {
__u32 ext_params;
__u64 ext_params2;
};
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
......@@ -188,27 +177,6 @@ struct kvm_s390_ext_info {
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
struct kvm_s390_pgm_info {
__u16 code;
};
struct kvm_s390_prefix_info {
__u32 address;
};
struct kvm_s390_extcall_info {
__u16 code;
};
struct kvm_s390_emerg_info {
__u16 code;
};
struct kvm_s390_mchk_info {
__u64 cr14;
__u64 mcic;
};
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
......@@ -246,6 +214,7 @@ struct kvm_s390_float_interrupt {
unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
/ sizeof(long)];
struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
unsigned int irq_count;
};
......@@ -262,6 +231,10 @@ struct kvm_vcpu_arch {
u64 stidp_data;
};
struct gmap *gmap;
#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
};
struct kvm_vm_stat {
......@@ -275,6 +248,7 @@ struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
struct gmap *gmap;
int css_support;
};
......@@ -287,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
return IS_ERR_VALUE(addr);
}
#define ASYNC_PF_PER_VCPU 64
struct kvm_vcpu;
struct kvm_async_pf;
struct kvm_arch_async_pf {
unsigned long pfault_token;
};
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
#endif
......@@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space
* @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
......@@ -775,6 +776,7 @@ struct gmap {
unsigned long asce;
void *private;
struct list_head crst_list;
bool pfault_enabled;
};
/**
......
......@@ -79,6 +79,7 @@ struct thread_struct {
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
......
......@@ -16,6 +16,22 @@
#define __KVM_S390
/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
#define KVM_DEV_FLIC_ENQUEUE 2
#define KVM_DEV_FLIC_CLEAR_IRQS 3
#define KVM_DEV_FLIC_APF_ENABLE 4
#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
* There are also sclp and machine checks. This gives us
* sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
* Lets round up to 8192 pages.
*/
#define KVM_S390_MAX_FLOAT_IRQS 266250
#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
......@@ -57,4 +73,7 @@ struct kvm_sync_regs {
#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
#endif
......@@ -23,6 +23,8 @@ config KVM
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
......
......@@ -7,7 +7,7 @@
# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
......
......@@ -17,6 +17,7 @@
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
......@@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
return 0;
}
static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
{
struct prs_parm {
u16 code;
u16 subcode;
u16 parm_len;
u16 parm_version;
u64 token_addr;
u64 select_mask;
u64 compare_mask;
u64 zarch;
};
struct prs_parm parm;
int rc;
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
unsigned long hva_token = KVM_HVA_ERR_BAD;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
switch (parm.subcode) {
case 0: /* TOKEN */
if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
/*
* If the pagefault handshake is already activated,
* the token must not be changed. We have to return
* decimal 8 instead, as mandated in SC24-6084.
*/
vcpu->run->s.regs.gprs[ry] = 8;
return 0;
}
if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
if (kvm_is_error_hva(hva_token))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.pfault_token = parm.token_addr;
vcpu->arch.pfault_select = parm.select_mask;
vcpu->arch.pfault_compare = parm.compare_mask;
vcpu->run->s.regs.gprs[ry] = 0;
rc = 0;
break;
case 1: /*
* CANCEL
* Specification allows to let already pending tokens survive
* the cancel, therefore to reduce code complexity, we assume
* all outstanding tokens are already pending.
*/
if (parm.token_addr || parm.select_mask ||
parm.compare_mask || parm.zarch)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
vcpu->run->s.regs.gprs[ry] = 0;
/*
* If the pfault handling was not established or is already
* canceled SC24-6084 requests to return decimal 4.
*/
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
vcpu->run->s.regs.gprs[ry] = 4;
else
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
rc = 0;
break;
default:
rc = -EOPNOTSUPP;
break;
}
return rc;
}
static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
......@@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return __diag_time_slice_end(vcpu);
case 0x9c:
return __diag_time_slice_end_directed(vcpu);
case 0x258:
return __diag_page_ref_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
case 0x500:
......
This diff is collapsed.
......@@ -152,11 +152,13 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
case KVM_CAP_ASYNC_PF:
case KVM_CAP_SYNC_REGS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
......@@ -254,6 +256,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.gmap)
goto out_nogmap;
kvm->arch.gmap->private = kvm;
kvm->arch.gmap->pfault_enabled = 0;
}
kvm->arch.css_support = 0;
......@@ -271,6 +274,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
......@@ -320,6 +324,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
/* Section: vcpu related */
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap)
......@@ -380,6 +386,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
}
......@@ -553,6 +561,18 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFTOKEN:
r = put_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFCOMPARE:
r = put_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFSELECT:
r = put_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr);
break;
default:
break;
}
......@@ -582,6 +602,18 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFTOKEN:
r = get_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFCOMPARE:
r = get_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFSELECT:
r = get_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr);
break;
default:
break;
}
......@@ -700,10 +732,100 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0;
}
static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
{
long rc;
hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
struct mm_struct *mm = current->mm;
down_read(&mm->mmap_sem);
rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
return rc;
}
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
unsigned long token)
{
struct kvm_s390_interrupt inti;
inti.parm64 = token;
if (start_token) {
inti.type = KVM_S390_INT_PFAULT_INIT;
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
} else {
inti.type = KVM_S390_INT_PFAULT_DONE;
WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
}
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
}
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
}
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
/* s390 will always inject the page directly */
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
{
/*
* s390 will always inject the page directly,
* but we still want check_async_completion to cleanup
*/
return true;
}
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
return 0;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
return 0;
if (psw_extint_disabled(vcpu))
return 0;
if (kvm_cpu_has_interrupt(vcpu))
return 0;
if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
return 0;
if (!vcpu->arch.gmap->pfault_enabled)
return 0;
hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
return 0;
rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
return rc;
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{
int rc, cpuflags;
/*
* On s390 notifications for arriving pages will be delivered directly
* to the guest but the house keeping for completed pfaults is
* handled outside the worker.
*/
kvm_check_async_pf_completion(vcpu);
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
if (need_resched())
......@@ -729,7 +851,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
int rc;
int rc = -1;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
......@@ -743,7 +865,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10;
rc = -EREMOTE;
} else {
} else if (current->thread.gmap_pfault) {
trace_kvm_s390_major_guest_pfault(vcpu);
current->thread.gmap_pfault = 0;
if (kvm_arch_setup_async_pf(vcpu) ||
(kvm_arch_fault_in_sync(vcpu) >= 0))
rc = 0;
}
if (rc == -1) {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
......
......@@ -159,4 +159,8 @@ void exit_sie_sync(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
/* implemented in interrupt.c */
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
#endif
......@@ -224,6 +224,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
{
int rc;
unsigned int i;
struct kvm_vcpu *v;
switch (parameter & 0xff) {
case 0:
......@@ -231,6 +233,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
break;
case 1:
case 2:
kvm_for_each_vcpu(i, v, vcpu->kvm) {
v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(v);
}
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
break;
default:
......
......@@ -30,6 +30,52 @@
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
__entry->pswmask, __entry->pswaddr, p_args)
TRACE_EVENT(kvm_s390_major_guest_pfault,
TP_PROTO(VCPU_PROTO_COMMON),
TP_ARGS(VCPU_ARGS_COMMON),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
),
VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
);
TRACE_EVENT(kvm_s390_pfault_init,
TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(long, pfault_token)
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
__entry->pfault_token = pfault_token;
),
VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
);
TRACE_EVENT(kvm_s390_pfault_done,
TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(long, pfault_token)
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
__entry->pfault_token = pfault_token;
),
VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
);
/*
* Tracepoints for SIE entry and exit.
*/
......
......@@ -50,6 +50,7 @@
#define VM_FAULT_BADMAP 0x020000
#define VM_FAULT_BADACCESS 0x040000
#define VM_FAULT_SIGNAL 0x080000
#define VM_FAULT_PFAULT 0x100000
static unsigned long store_indication __read_mostly;
......@@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
return;
}
case VM_FAULT_BADCONTEXT:
case VM_FAULT_PFAULT:
do_no_context(regs);
break;
case VM_FAULT_SIGNAL:
......@@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
*/
static inline int do_exception(struct pt_regs *regs, int access)
{
#ifdef CONFIG_PGSTE
struct gmap *gmap;
#endif
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
......@@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
address = __gmap_fault(address,
(struct gmap *) S390_lowcore.gmap);
gmap = (struct gmap *)
((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
if (gmap) {
address = __gmap_fault(address, gmap);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
......@@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
fault = VM_FAULT_OOM;
goto out_up;
}
if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT;
}
#endif
......@@ -371,9 +379,19 @@ static inline int do_exception(struct pt_regs *regs, int access)
regs, address);
}
if (fault & VM_FAULT_RETRY) {
#ifdef CONFIG_PGSTE
if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* FAULT_FLAG_RETRY_NOWAIT has been set,
* mmap_sem has not been released */
current->thread.gmap_pfault = 1;
fault = VM_FAULT_PFAULT;
goto out_up;
}
#endif
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags &= ~(FAULT_FLAG_ALLOW_RETRY |
FAULT_FLAG_RETRY_NOWAIT);
flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
......
......@@ -3328,7 +3328,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
}
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
......
......@@ -192,7 +192,7 @@ struct kvm_async_pf {
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch);
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
......@@ -1064,6 +1064,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
extern struct kvm_device_ops kvm_vfio_ops;
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
extern struct kvm_device_ops kvm_flic_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
......
......@@ -413,6 +413,8 @@ struct kvm_s390_psw {
#define KVM_S390_PROGRAM_INT 0xfffe0001u
#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
#define KVM_S390_RESTART 0xfffe0003u
#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
#define KVM_S390_MCHK 0xfffe1000u
#define KVM_S390_INT_VIRTIO 0xffff2603u
#define KVM_S390_INT_SERVICE 0xffff2401u
......@@ -434,6 +436,69 @@ struct kvm_s390_interrupt {
__u64 parm64;
};
struct kvm_s390_io_info {
__u16 subchannel_id;
__u16 subchannel_nr;
__u32 io_int_parm;
__u32 io_int_word;
};
struct kvm_s390_ext_info {
__u32 ext_params;
__u32 pad;
__u64 ext_params2;
};
struct kvm_s390_pgm_info {
__u64 trans_exc_code;
__u64 mon_code;
__u64 per_address;
__u32 data_exc_code;
__u16 code;
__u16 mon_class_nr;
__u8 per_code;
__u8 per_atmid;
__u8 exc_access_id;
__u8 per_access_id;
__u8 op_access_id;
__u8 pad[3];
};
struct kvm_s390_prefix_info {
__u32 address;
};
struct kvm_s390_extcall_info {
__u16 code;
};
struct kvm_s390_emerg_info {
__u16 code;
};
struct kvm_s390_mchk_info {
__u64 cr14;
__u64 mcic;
__u64 failing_storage_address;
__u32 ext_damage_code;
__u32 pad;
__u8 fixed_logout[16];
};
struct kvm_s390_irq {
__u64 type;
union {
struct kvm_s390_io_info io;
struct kvm_s390_ext_info ext;
struct kvm_s390_pgm_info pgm;
struct kvm_s390_emerg_info emerg;
struct kvm_s390_extcall_info extcall;
struct kvm_s390_prefix_info prefix;
struct kvm_s390_mchk_info mchk;
char reserved[64];
} u;
};
/* for KVM_SET_GUEST_DEBUG */
#define KVM_GUESTDBG_ENABLE 0x00000001
......@@ -855,6 +920,7 @@ struct kvm_device_attr {
#define KVM_DEV_VFIO_GROUP_ADD 1
#define KVM_DEV_VFIO_GROUP_DEL 2
#define KVM_DEV_TYPE_ARM_VGIC_V2 5
#define KVM_DEV_TYPE_FLIC 6
/*
* ioctls for VM fds
......
......@@ -22,6 +22,10 @@ config KVM_MMIO
config KVM_ASYNC_PF
bool
# Toggle to switch between direct notification and batch job
config KVM_ASYNC_PF_SYNC
bool
config HAVE_KVM_MSI
bool
......
......@@ -28,6 +28,21 @@
#include "async_pf.h"
#include <trace/events/kvm.h>
static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
kvm_arch_async_page_present(vcpu, work);
#endif
}
static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
#ifndef CONFIG_KVM_ASYNC_PF_SYNC
kvm_arch_async_page_present(vcpu, work);
#endif
}
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
......@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
down_read(&mm->mmap_sem);
get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
kvm_async_page_present_sync(vcpu, apf);
unuse_mm(mm);
spin_lock(&vcpu->async_pf.lock);
......@@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
list_del(&work->queue);
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work(&work->work);
#else
if (cancel_work_sync(&work->work)) {
mmdrop(work->mm);
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work);
}
#endif
}
spin_lock(&vcpu->async_pf.lock);
......@@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
......@@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
work->wakeup_all = false;
work->vcpu = vcpu;
work->gva = gva;
work->addr = gfn_to_hva(vcpu->kvm, gfn);
work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
atomic_inc(&work->mm->mm_count);
......
......@@ -2283,6 +2283,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
case KVM_DEV_TYPE_ARM_VGIC_V2:
ops = &kvm_arm_vgic_v2_ops;
break;
#endif
#ifdef CONFIG_S390
case KVM_DEV_TYPE_FLIC:
ops = &kvm_flic_ops;
break;
#endif
default:
return -ENODEV;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment