Commit fb491d55 authored by Claudio Imbrenda's avatar Claudio Imbrenda Committed by Janosch Frank

KVM: s390: pv: asynchronous destroy for reboot

Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.

This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.

This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.

The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.

Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:

KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.

KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.
Signed-off-by: default avatarClaudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: default avatarNico Boehr <nrb@linux.ibm.com>
Reviewed-by: default avatarJanosch Frank <frankja@linux.ibm.com>
Reviewed-by: default avatarSteffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: default avatarJanosch Frank <frankja@linux.ibm.com>
parent 58635d66
...@@ -942,6 +942,8 @@ struct kvm_s390_pv { ...@@ -942,6 +942,8 @@ struct kvm_s390_pv {
unsigned long stor_base; unsigned long stor_base;
void *stor_var; void *stor_var;
bool dumping; bool dumping;
void *set_aside;
struct list_head need_cleanup;
struct mmu_notifier mmu_notifier; struct mmu_notifier mmu_notifier;
}; };
......
...@@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz; ...@@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz;
module_param(diag9c_forwarding_hz, uint, 0644); module_param(diag9c_forwarding_hz, uint, 0644);
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
static int async_destroy;
/* /*
* For now we handle at most 16 double words as this is what the s390 base * For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond * kernel handles and stores in the prefix page. If we ever need to go beyond
...@@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, ...@@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
{ {
const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
void __user *argp = (void __user *)cmd->data;
int r = 0; int r = 0;
u16 dummy; u16 dummy;
void __user *argp = (void __user *)cmd->data;
if (need_lock)
mutex_lock(&kvm->lock);
switch (cmd->cmd) { switch (cmd->cmd) {
case KVM_PV_ENABLE: { case KVM_PV_ENABLE: {
...@@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) ...@@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
break; break;
} }
case KVM_PV_ASYNC_CLEANUP_PREPARE:
r = -EINVAL;
if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
break;
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
/*
* If a CPU could not be destroyed, destroy VM will also fail.
* There is no point in trying to destroy it. Instead return
* the rc and rrc from the first CPU that failed destroying.
*/
if (r)
break;
r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
/* no need to block service interrupts any more */
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
break;
case KVM_PV_ASYNC_CLEANUP_PERFORM:
r = -EINVAL;
if (!async_destroy)
break;
/* kvm->lock must not be held; this is asserted inside the function. */
r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
break;
case KVM_PV_DISABLE: { case KVM_PV_DISABLE: {
r = -EINVAL; r = -EINVAL;
if (!kvm_s390_pv_is_protected(kvm)) if (!kvm_s390_pv_is_protected(kvm))
...@@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) ...@@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
*/ */
if (r) if (r)
break; break;
r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
/* no need to block service interrupts any more */ /* no need to block service interrupts any more */
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
...@@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) ...@@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
default: default:
r = -ENOTTY; r = -ENOTTY;
} }
if (need_lock)
mutex_unlock(&kvm->lock);
return r; return r;
} }
...@@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp, ...@@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EINVAL; r = -EINVAL;
break; break;
} }
mutex_lock(&kvm->lock); /* must be called without kvm->lock */
r = kvm_s390_handle_pv(kvm, &args); r = kvm_s390_handle_pv(kvm, &args);
mutex_unlock(&kvm->lock);
if (copy_to_user(argp, &args, sizeof(args))) { if (copy_to_user(argp, &args, sizeof(args))) {
r = -EFAULT; r = -EFAULT;
break; break;
...@@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_vsie_init(kvm); kvm_s390_vsie_init(kvm);
if (use_gisa) if (use_gisa)
kvm_s390_gisa_init(kvm); kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0; return 0;
...@@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
/* /*
* We are already at the end of life and kvm->lock is not taken. * We are already at the end of life and kvm->lock is not taken.
* This is ok as the file descriptor is closed by now and nobody * This is ok as the file descriptor is closed by now and nobody
* can mess with the pv state. To avoid lockdep_assert_held from * can mess with the pv state.
* complaining we do not use kvm_s390_pv_is_protected.
*/ */
if (kvm_s390_pv_get_handle(kvm)) kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
/* /*
* Remove the mmu notifier only when the whole KVM VM is torn down, * Remove the mmu notifier only when the whole KVM VM is torn down,
* and only if one was registered to begin with. If the VM is * and only if one was registered to begin with. If the VM is
......
...@@ -244,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) ...@@ -244,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
/* implemented in pv.c */ /* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
......
This diff is collapsed.
...@@ -1740,6 +1740,8 @@ enum pv_cmd_id { ...@@ -1740,6 +1740,8 @@ enum pv_cmd_id {
KVM_PV_UNSHARE_ALL, KVM_PV_UNSHARE_ALL,
KVM_PV_INFO, KVM_PV_INFO,
KVM_PV_DUMP, KVM_PV_DUMP,
KVM_PV_ASYNC_CLEANUP_PREPARE,
KVM_PV_ASYNC_CLEANUP_PERFORM,
}; };
struct kvm_pv_cmd { struct kvm_pv_cmd {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment