Commit a3508fbe authored by David Hildenbrand's avatar David Hildenbrand Committed by Christian Borntraeger

KVM: s390: vsie: initial support for nested virtualization

This patch adds basic support for nested virtualization on s390x, called
VSIE (virtual SIE) and allows it to be used by the guest if the necessary
facilities are supported by the hardware and enabled for the guest.

In order to make this work, we have to shadow the sie control block
provided by guest 2. In order to gain some performance, we have to
reuse the same shadow blocks as good as possible. For now, we allow
as many shadow blocks as we have VCPUs (that way, every VCPU can run the
VSIE concurrently).

We have to watch out for the prefix getting unmapped out of our shadow
gmap and properly get the VCPU out of VSIE in that case, to fault the
prefix pages back in. We use the PROG_REQUEST bit for that purpose.

This patch is based on an initial prototype by Tobias Elpelt.
Acked-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: default avatarDavid Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
parent df9b2b4a
......@@ -145,7 +145,7 @@ struct kvm_s390_sie_block {
__u64 cputm; /* 0x0028 */
__u64 ckc; /* 0x0030 */
__u64 epoch; /* 0x0038 */
__u8 reserved40[4]; /* 0x0040 */
__u32 svcc; /* 0x0040 */
#define LCTL_CR0 0x8000
#define LCTL_CR6 0x0200
#define LCTL_CR9 0x0040
......@@ -167,6 +167,9 @@ struct kvm_s390_sie_block {
#define ICPT_INST 0x04
#define ICPT_PROGI 0x08
#define ICPT_INSTPROGI 0x0C
#define ICPT_EXTINT 0x14
#define ICPT_VALIDITY 0x20
#define ICPT_STOP 0x28
#define ICPT_OPEREXC 0x2C
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
......@@ -281,6 +284,7 @@ struct kvm_vcpu_stat {
u32 instruction_stsi;
u32 instruction_stfl;
u32 instruction_tprot;
u32 instruction_sie;
u32 instruction_essa;
u32 instruction_sthyi;
u32 instruction_sigp_sense;
......@@ -637,6 +641,14 @@ struct sie_page2 {
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
} __packed;
struct kvm_s390_vsie {
struct mutex mutex;
struct radix_tree_root addr_to_page;
int page_count;
int next;
struct page *pages[KVM_MAX_VCPUS];
};
struct kvm_arch{
void *sca;
int use_esca;
......@@ -661,6 +673,7 @@ struct kvm_arch{
struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
struct kvm_s390_vsie vsie;
u64 epoch;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
......
......@@ -98,6 +98,7 @@ struct kvm_s390_vm_cpu_machine {
#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024
#define KVM_S390_VM_CPU_FEAT_ESOP 0
#define KVM_S390_VM_CPU_FEAT_SIEF2 1
struct kvm_s390_vm_cpu_feat {
__u64 feat[16];
};
......
......@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o
kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o
obj-$(CONFIG_KVM) += kvm.o
......@@ -99,6 +99,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sie", VCPU_STAT(instruction_sie) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
......@@ -142,6 +143,7 @@ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS)
static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
/* Section: not file related */
......@@ -187,6 +189,8 @@ int kvm_arch_hardware_setup(void)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
gmap_register_pte_notifier(&vsie_gmap_notifier);
atomic_notifier_chain_register(&s390_epoch_delta_notifier,
&kvm_clock_notifier);
return 0;
......@@ -195,6 +199,7 @@ int kvm_arch_hardware_setup(void)
void kvm_arch_hardware_unsetup(void)
{
gmap_unregister_pte_notifier(&gmap_notifier);
gmap_unregister_pte_notifier(&vsie_gmap_notifier);
atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
&kvm_clock_notifier);
}
......@@ -252,6 +257,14 @@ static void kvm_s390_cpu_feat_init(void)
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
*/
if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
!test_facility(3))
return;
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
}
int kvm_arch_init(void *opaque)
......@@ -1406,6 +1419,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.epoch = 0;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
......@@ -1463,6 +1477,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
gmap_remove(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
......
......@@ -252,6 +252,13 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
/* implemented in vsie.c */
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
void kvm_s390_vsie_init(struct kvm *kvm);
void kvm_s390_vsie_destroy(struct kvm *kvm);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
......
......@@ -719,6 +719,7 @@ static const intercept_handler_t b2_handlers[256] = {
[0x10] = handle_set_prefix,
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
[0x14] = kvm_s390_handle_vsie,
[0x21] = handle_ipte_interlock,
[0x29] = handle_iske,
[0x2a] = handle_rrbe,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment