vcpu.c 18.4 KB
Newer Older
1 2 3 4 5 6 7 8 9
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
 *
 * Authors:
 *     Anup Patel <anup.patel@wdc.com>
 */

#include <linux/bitops.h>
10
#include <linux/entry-kvm.h>
11 12 13 14
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kdebug.h>
#include <linux/module.h>
15
#include <linux/percpu.h>
16 17 18 19 20
#include <linux/vmalloc.h>
#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
21
#include <asm/cacheflush.h>
22
#include <asm/kvm_vcpu_vector.h>
23 24 25 26 27 28 29

const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
	KVM_GENERIC_VCPU_STATS(),
	STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
30 31
	STATS_DESC_COUNTER(VCPU, csr_exit_user),
	STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
32
	STATS_DESC_COUNTER(VCPU, signal_exits),
33 34 35 36 37 38 39 40 41 42 43 44
	STATS_DESC_COUNTER(VCPU, exits)
};

const struct kvm_stats_header kvm_vcpu_stats_header = {
	.name_size = KVM_STATS_NAME_SIZE,
	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
	.id_offset = sizeof(struct kvm_stats_header),
	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
		       sizeof(kvm_vcpu_stats_desc),
};

45 46 47 48 49 50
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
{
	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
	struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
51 52 53 54 55 56 57 58 59 60 61
	bool loaded;

	/**
	 * The preemption should be disabled here because it races with
	 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
	 * also calls vcpu_load/put.
	 */
	get_cpu();
	loaded = (vcpu->cpu != -1);
	if (loaded)
		kvm_arch_vcpu_put(vcpu);
62

63 64
	vcpu->arch.last_exit_cpu = -1;

65 66 67
	memcpy(csr, reset_csr, sizeof(*csr));

	memcpy(cntx, reset_cntx, sizeof(*cntx));
68

69 70
	kvm_riscv_vcpu_fp_reset(vcpu);

71 72
	kvm_riscv_vcpu_vector_reset(vcpu);

73 74
	kvm_riscv_vcpu_timer_reset(vcpu);

75 76
	kvm_riscv_vcpu_aia_reset(vcpu);

77 78
	bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
	bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
79

80 81
	kvm_riscv_vcpu_pmu_reset(vcpu);

82 83 84 85
	vcpu->arch.hfence_head = 0;
	vcpu->arch.hfence_tail = 0;
	memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));

86 87 88 89
	/* Reset the guest CSRs for hotplug usecase */
	if (loaded)
		kvm_arch_vcpu_load(vcpu, smp_processor_id());
	put_cpu();
90 91
}

92 93 94 95 96 97 98
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
	return 0;
}

int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
99
	int rc;
100
	struct kvm_cpu_context *cntx;
101
	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
102 103 104

	/* Mark this VCPU never ran */
	vcpu->arch.ran_atleast_once = false;
105
	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
106
	bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
107 108

	/* Setup ISA features available to VCPU */
109
	kvm_riscv_vcpu_setup_isa(vcpu);
110

111 112 113 114 115
	/* Setup vendor, arch, and implementation details */
	vcpu->arch.mvendorid = sbi_get_mvendorid();
	vcpu->arch.marchid = sbi_get_marchid();
	vcpu->arch.mimpid = sbi_get_mimpid();

116 117 118
	/* Setup VCPU hfence queue */
	spin_lock_init(&vcpu->arch.hfence_lock);

119 120 121 122 123 124 125 126
	/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
	cntx = &vcpu->arch.guest_reset_context;
	cntx->sstatus = SR_SPP | SR_SPIE;
	cntx->hstatus = 0;
	cntx->hstatus |= HSTATUS_VTW;
	cntx->hstatus |= HSTATUS_SPVP;
	cntx->hstatus |= HSTATUS_SPV;

127 128 129
	if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
		return -ENOMEM;

130 131 132
	/* By default, make CY, TM, and IR counters accessible in VU mode */
	reset_csr->scounteren = 0x7;

133 134 135
	/* Setup VCPU timer */
	kvm_riscv_vcpu_timer_init(vcpu);

136 137 138
	/* setup performance monitoring */
	kvm_riscv_vcpu_pmu_init(vcpu);

139 140 141 142 143
	/* Setup VCPU AIA */
	rc = kvm_riscv_vcpu_aia_init(vcpu);
	if (rc)
		return rc;

144 145 146
	/* Reset VCPU */
	kvm_riscv_reset_vcpu(vcpu);

147 148 149 150 151
	return 0;
}

void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
152 153 154 155 156 157 158
	/**
	 * vcpu with id 0 is the designated boot cpu.
	 * Keep all vcpus with non-zero id in power-off state so that
	 * they can be brought up using SBI HSM extension.
	 */
	if (vcpu->vcpu_idx != 0)
		kvm_riscv_vcpu_power_off(vcpu);
159 160 161 162
}

void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
163 164 165
	/* Cleanup VCPU AIA context */
	kvm_riscv_vcpu_aia_deinit(vcpu);

166 167 168
	/* Cleanup VCPU timer */
	kvm_riscv_vcpu_timer_deinit(vcpu);

169 170
	kvm_riscv_vcpu_pmu_deinit(vcpu);

171
	/* Free unused pages pre-allocated for G-stage page table mappings */
172
	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
173 174 175

	/* Free vector context space for host and guest kernel */
	kvm_riscv_vcpu_free_vector_context(vcpu);
176 177 178 179
}

int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
180
	return kvm_riscv_vcpu_timer_pending(vcpu);
181 182 183 184
}

void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
185
	kvm_riscv_aia_wakeon_hgei(vcpu, true);
186 187 188 189
}

void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
190
	kvm_riscv_aia_wakeon_hgei(vcpu, false);
191 192 193 194
}

int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
195 196
	return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
		!vcpu->arch.power_off && !vcpu->arch.pause);
197 198 199 200
}

int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
201
	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
202 203 204 205
}

bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
206
	return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
207 208 209 210 211 212 213 214 215 216
}

vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
	return VM_FAULT_SIGBUS;
}

long kvm_arch_vcpu_async_ioctl(struct file *filp,
			       unsigned int ioctl, unsigned long arg)
{
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	struct kvm_vcpu *vcpu = filp->private_data;
	void __user *argp = (void __user *)arg;

	if (ioctl == KVM_INTERRUPT) {
		struct kvm_interrupt irq;

		if (copy_from_user(&irq, argp, sizeof(irq)))
			return -EFAULT;

		if (irq.irq == KVM_INTERRUPT_SET)
			return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
		else
			return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
	}

232 233 234 235 236 237
	return -ENOIOCTLCMD;
}

long kvm_arch_vcpu_ioctl(struct file *filp,
			 unsigned int ioctl, unsigned long arg)
{
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
	struct kvm_vcpu *vcpu = filp->private_data;
	void __user *argp = (void __user *)arg;
	long r = -EINVAL;

	switch (ioctl) {
	case KVM_SET_ONE_REG:
	case KVM_GET_ONE_REG: {
		struct kvm_one_reg reg;

		r = -EFAULT;
		if (copy_from_user(&reg, argp, sizeof(reg)))
			break;

		if (ioctl == KVM_SET_ONE_REG)
			r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
		else
			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
		break;
	}
	default:
		break;
	}

	return r;
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
}

int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs)
{
	return -EINVAL;
}

int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				  struct kvm_sregs *sregs)
{
	return -EINVAL;
}

int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
	return -EINVAL;
}

int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
	return -EINVAL;
}

int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				  struct kvm_translation *tr)
{
	return -EINVAL;
}

int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
	return -EINVAL;
}

int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
	return -EINVAL;
}

302 303 304 305 306
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
{
	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
	unsigned long mask, val;

307 308 309
	if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
		mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
		val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
310 311 312 313

		csr->hvip &= ~mask;
		csr->hvip |= val;
	}
314 315 316

	/* Flush AIA high interrupts */
	kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
}

void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
{
	unsigned long hvip;
	struct kvm_vcpu_arch *v = &vcpu->arch;
	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;

	/* Read current HVIP and VSIE CSRs */
	csr->vsie = csr_read(CSR_VSIE);

	/* Sync-up HVIP.VSSIP bit changes does by Guest */
	hvip = csr_read(CSR_HVIP);
	if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
		if (hvip & (1UL << IRQ_VS_SOFT)) {
			if (!test_and_set_bit(IRQ_VS_SOFT,
333 334
					      v->irqs_pending_mask))
				set_bit(IRQ_VS_SOFT, v->irqs_pending);
335 336
		} else {
			if (!test_and_set_bit(IRQ_VS_SOFT,
337 338
					      v->irqs_pending_mask))
				clear_bit(IRQ_VS_SOFT, v->irqs_pending);
339 340
		}
	}
341

342 343 344
	/* Sync-up AIA high interrupts */
	kvm_riscv_vcpu_aia_sync_interrupts(vcpu);

345 346
	/* Sync-up timer CSRs */
	kvm_riscv_vcpu_timer_sync(vcpu);
347 348 349 350
}

int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
351 352 353 354 355 356 357
	/*
	 * We only allow VS-mode software, timer, and external
	 * interrupts when irq is one of the local interrupts
	 * defined by RISC-V privilege specification.
	 */
	if (irq < IRQ_LOCAL_MAX &&
	    irq != IRQ_VS_SOFT &&
358 359 360 361
	    irq != IRQ_VS_TIMER &&
	    irq != IRQ_VS_EXT)
		return -EINVAL;

362
	set_bit(irq, vcpu->arch.irqs_pending);
363
	smp_mb__before_atomic();
364
	set_bit(irq, vcpu->arch.irqs_pending_mask);
365 366 367 368 369 370 371 372

	kvm_vcpu_kick(vcpu);

	return 0;
}

int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
373 374 375 376 377 378 379
	/*
	 * We only allow VS-mode software, timer, and external
	 * interrupts when irq is one of the local interrupts
	 * defined by RISC-V privilege specification.
	 */
	if (irq < IRQ_LOCAL_MAX &&
	    irq != IRQ_VS_SOFT &&
380 381 382 383
	    irq != IRQ_VS_TIMER &&
	    irq != IRQ_VS_EXT)
		return -EINVAL;

384
	clear_bit(irq, vcpu->arch.irqs_pending);
385
	smp_mb__before_atomic();
386
	set_bit(irq, vcpu->arch.irqs_pending_mask);
387 388 389 390

	return 0;
}

391
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
392
{
393 394 395
	unsigned long ie;

	ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
396 397 398 399
		<< VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
	ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
		(unsigned long)mask;
	if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
400
		return true;
401

402 403
	/* Check AIA high interrupts */
	return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
}

void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
	vcpu->arch.power_off = true;
	kvm_make_request(KVM_REQ_SLEEP, vcpu);
	kvm_vcpu_kick(vcpu);
}

void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
	vcpu->arch.power_off = false;
	kvm_vcpu_wake_up(vcpu);
}

419 420 421
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
				    struct kvm_mp_state *mp_state)
{
422 423 424 425 426
	if (vcpu->arch.power_off)
		mp_state->mp_state = KVM_MP_STATE_STOPPED;
	else
		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;

427 428 429 430 431 432
	return 0;
}

int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
				    struct kvm_mp_state *mp_state)
{
433 434 435 436 437 438 439 440 441 442 443 444 445 446
	int ret = 0;

	switch (mp_state->mp_state) {
	case KVM_MP_STATE_RUNNABLE:
		vcpu->arch.power_off = false;
		break;
	case KVM_MP_STATE_STOPPED:
		kvm_riscv_vcpu_power_off(vcpu);
		break;
	default:
		ret = -EINVAL;
	}

	return ret;
447 448 449 450 451 452 453 454 455
}

int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
					struct kvm_guest_debug *dbg)
{
	/* TODO; To be implemented later. */
	return -EINVAL;
}

456 457 458 459
static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
{
	u64 henvcfg = 0;

460
	if (riscv_isa_extension_available(isa, SVPBMT))
461 462
		henvcfg |= ENVCFG_PBMTE;

463
	if (riscv_isa_extension_available(isa, SSTC))
464
		henvcfg |= ENVCFG_STCE;
465 466 467 468

	if (riscv_isa_extension_available(isa, ZICBOM))
		henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);

469 470 471
	if (riscv_isa_extension_available(isa, ZICBOZ))
		henvcfg |= ENVCFG_CBZE;

472 473 474 475 476 477
	csr_write(CSR_HENVCFG, henvcfg);
#ifdef CONFIG_32BIT
	csr_write(CSR_HENVCFGH, henvcfg >> 32);
#endif
}

478 479
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
480 481 482 483 484 485 486 487 488 489 490
	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;

	csr_write(CSR_VSSTATUS, csr->vsstatus);
	csr_write(CSR_VSIE, csr->vsie);
	csr_write(CSR_VSTVEC, csr->vstvec);
	csr_write(CSR_VSSCRATCH, csr->vsscratch);
	csr_write(CSR_VSEPC, csr->vsepc);
	csr_write(CSR_VSCAUSE, csr->vscause);
	csr_write(CSR_VSTVAL, csr->vstval);
	csr_write(CSR_HVIP, csr->hvip);
	csr_write(CSR_VSATP, csr->vsatp);
491

492 493
	kvm_riscv_vcpu_update_config(vcpu->arch.isa);

494
	kvm_riscv_gstage_update_hgatp(vcpu);
495

496 497
	kvm_riscv_vcpu_timer_restore(vcpu);

498 499 500
	kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
	kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
					vcpu->arch.isa);
501 502 503
	kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
	kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
					    vcpu->arch.isa);
504

505 506
	kvm_riscv_vcpu_aia_load(vcpu, cpu);

507
	vcpu->cpu = cpu;
508 509 510 511
}

void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
512 513 514 515
	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;

	vcpu->cpu = -1;

516 517
	kvm_riscv_vcpu_aia_put(vcpu);

518 519 520 521
	kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
				     vcpu->arch.isa);
	kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);

522
	kvm_riscv_vcpu_timer_save(vcpu);
523 524 525
	kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
					 vcpu->arch.isa);
	kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
526

527 528 529 530 531 532 533 534 535
	csr->vsstatus = csr_read(CSR_VSSTATUS);
	csr->vsie = csr_read(CSR_VSIE);
	csr->vstvec = csr_read(CSR_VSTVEC);
	csr->vsscratch = csr_read(CSR_VSSCRATCH);
	csr->vsepc = csr_read(CSR_VSEPC);
	csr->vscause = csr_read(CSR_VSCAUSE);
	csr->vstval = csr_read(CSR_VSTVAL);
	csr->hvip = csr_read(CSR_HVIP);
	csr->vsatp = csr_read(CSR_VSATP);
536 537 538 539
}

static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
{
540 541 542 543
	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);

	if (kvm_request_pending(vcpu)) {
		if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
544
			kvm_vcpu_srcu_read_unlock(vcpu);
545 546 547
			rcuwait_wait_event(wait,
				(!vcpu->arch.power_off) && (!vcpu->arch.pause),
				TASK_INTERRUPTIBLE);
548
			kvm_vcpu_srcu_read_lock(vcpu);
549 550 551 552 553 554 555 556 557 558 559 560

			if (vcpu->arch.power_off || vcpu->arch.pause) {
				/*
				 * Awaken to handle a signal, request to
				 * sleep again later.
				 */
				kvm_make_request(KVM_REQ_SLEEP, vcpu);
			}
		}

		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
			kvm_riscv_reset_vcpu(vcpu);
561 562

		if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
563
			kvm_riscv_gstage_update_hgatp(vcpu);
564

565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
		if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
			kvm_riscv_fence_i_process(vcpu);

		/*
		 * The generic KVM_REQ_TLB_FLUSH is same as
		 * KVM_REQ_HFENCE_GVMA_VMID_ALL
		 */
		if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
			kvm_riscv_hfence_gvma_vmid_all_process(vcpu);

		if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
			kvm_riscv_hfence_vvma_all_process(vcpu);

		if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
			kvm_riscv_hfence_process(vcpu);
580 581 582 583 584 585 586 587
	}
}

static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
{
	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;

	csr_write(CSR_HVIP, csr->hvip);
588
	kvm_riscv_vcpu_aia_update_hvip(vcpu);
589 590
}

591 592 593 594 595 596 597 598 599 600 601
/*
 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
 * the vCPU is running.
 *
 * This must be noinstr as instrumentation may make use of RCU, and this is not
 * safe during the EQS.
 */
static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
{
	guest_state_enter_irqoff();
	__kvm_riscv_switch_to(&vcpu->arch);
602
	vcpu->arch.last_exit_cpu = vcpu->cpu;
603 604 605
	guest_state_exit_irqoff();
}

606 607 608 609 610 611
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
	int ret;
	struct kvm_cpu_trap trap;
	struct kvm_run *run = vcpu->run;

612 613 614
	/* Mark this VCPU ran at least once */
	vcpu->arch.ran_atleast_once = true;

615
	kvm_vcpu_srcu_read_lock(vcpu);
616

617 618 619
	switch (run->exit_reason) {
	case KVM_EXIT_MMIO:
		/* Process MMIO value returned from user-space */
620
		ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
621 622 623
		break;
	case KVM_EXIT_RISCV_SBI:
		/* Process SBI value returned from user-space */
624
		ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
625 626 627 628 629 630 631 632 633 634 635 636
		break;
	case KVM_EXIT_RISCV_CSR:
		/* Process CSR value returned from user-space */
		ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
		break;
	default:
		ret = 0;
		break;
	}
	if (ret) {
		kvm_vcpu_srcu_read_unlock(vcpu);
		return ret;
637 638 639
	}

	if (run->immediate_exit) {
640
		kvm_vcpu_srcu_read_unlock(vcpu);
641 642 643 644 645 646 647 648 649 650 651
		return -EINTR;
	}

	vcpu_load(vcpu);

	kvm_sigset_activate(vcpu);

	ret = 1;
	run->exit_reason = KVM_EXIT_UNKNOWN;
	while (ret > 0) {
		/* Check conditions before entering the guest */
652
		ret = xfer_to_guest_mode_handle_work(vcpu);
653 654 655
		if (ret)
			continue;
		ret = 1;
656

657
		kvm_riscv_gstage_vmid_update(vcpu);
658

659 660
		kvm_riscv_check_vcpu_requests(vcpu);

661 662 663 664 665 666 667 668 669
		preempt_disable();

		/* Update AIA HW state before entering guest */
		ret = kvm_riscv_vcpu_aia_update(vcpu);
		if (ret <= 0) {
			preempt_enable();
			continue;
		}

670 671 672 673 674 675
		local_irq_disable();

		/*
		 * Ensure we set mode to IN_GUEST_MODE after we disable
		 * interrupts and before the final VCPU requests check.
		 * See the comment in kvm_vcpu_exiting_guest_mode() and
676
		 * Documentation/virt/kvm/vcpu-requests.rst
677 678 679
		 */
		vcpu->mode = IN_GUEST_MODE;

680
		kvm_vcpu_srcu_read_unlock(vcpu);
681 682
		smp_mb__after_srcu_read_unlock();

683 684 685 686 687 688 689 690 691
		/*
		 * We might have got VCPU interrupts updated asynchronously
		 * so update it in HW.
		 */
		kvm_riscv_vcpu_flush_interrupts(vcpu);

		/* Update HVIP CSR for current CPU */
		kvm_riscv_update_hvip(vcpu);

692
		if (ret <= 0 ||
693
		    kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
694 695
		    kvm_request_pending(vcpu) ||
		    xfer_to_guest_mode_work_pending()) {
696 697
			vcpu->mode = OUTSIDE_GUEST_MODE;
			local_irq_enable();
698
			preempt_enable();
699
			kvm_vcpu_srcu_read_lock(vcpu);
700 701 702
			continue;
		}

703 704 705 706 707 708 709 710
		/*
		 * Cleanup stale TLB enteries
		 *
		 * Note: This should be done after G-stage VMID has been
		 * updated using kvm_riscv_gstage_vmid_ver_changed()
		 */
		kvm_riscv_local_tlb_sanitize(vcpu);

711
		guest_timing_enter_irqoff();
712

713
		kvm_riscv_vcpu_enter_exit(vcpu);
714 715 716 717 718 719 720 721 722

		vcpu->mode = OUTSIDE_GUEST_MODE;
		vcpu->stat.exits++;

		/*
		 * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
		 * get an interrupt between __kvm_riscv_switch_to() and
		 * local_irq_enable() which can potentially change CSRs.
		 */
723
		trap.sepc = vcpu->arch.guest_context.sepc;
724 725 726 727 728
		trap.scause = csr_read(CSR_SCAUSE);
		trap.stval = csr_read(CSR_STVAL);
		trap.htval = csr_read(CSR_HTVAL);
		trap.htinst = csr_read(CSR_HTINST);

729 730 731
		/* Syncup interrupts state with HW */
		kvm_riscv_vcpu_sync_interrupts(vcpu);

732
		/*
733 734 735 736
		 * We must ensure that any pending interrupts are taken before
		 * we exit guest timing so that timer ticks are accounted as
		 * guest time. Transiently unmask interrupts so that any
		 * pending interrupts are taken.
737
		 *
738 739 740
		 * There's no barrier which ensures that pending interrupts are
		 * recognised, so we just hope that the CPU takes any pending
		 * interrupts between the enable and disable.
741 742
		 */
		local_irq_enable();
743
		local_irq_disable();
744

745 746 747
		guest_timing_exit_irqoff();

		local_irq_enable();
748 749 750

		preempt_enable();

751
		kvm_vcpu_srcu_read_lock(vcpu);
752 753 754 755 756 757 758 759

		ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
	}

	kvm_sigset_deactivate(vcpu);

	vcpu_put(vcpu);

760
	kvm_vcpu_srcu_read_unlock(vcpu);
761 762 763

	return ret;
}