Commit da46b58f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'hyperv-next-signed-20230424' of...

Merge tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv updates from Wei Liu:

 - PCI passthrough for Hyper-V confidential VMs (Michael Kelley)

 - Hyper-V VTL mode support (Saurabh Sengar)

 - Move panic report initialization code earlier (Long Li)

 - Various improvements and bug fixes (Dexuan Cui and Michael Kelley)

* tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (22 commits)
  PCI: hv: Replace retarget_msi_interrupt_params with hyperv_pcpu_input_arg
  Drivers: hv: move panic report code from vmbus to hv early init code
  x86/hyperv: VTL support for Hyper-V
  Drivers: hv: Kconfig: Add HYPERV_VTL_MODE
  x86/hyperv: Make hv_get_nmi_reason public
  x86/hyperv: Add VTL specific structs and hypercalls
  x86/init: Make get/set_rtc_noop() public
  x86/hyperv: Exclude lazy TLB mode CPUs from enlightened TLB flushes
  x86/hyperv: Add callback filter to cpumask_to_vpset()
  Drivers: hv: vmbus: Remove the per-CPU post_msg_page
  clocksource: hyper-v: make sure Invariant-TSC is used if it is available
  PCI: hv: Enable PCI pass-thru devices in Confidential VMs
  Drivers: hv: Don't remap addresses that are above shared_gpa_boundary
  hv_netvsc: Remove second mapping of send and recv buffers
  Drivers: hv: vmbus: Remove second way of mapping ring buffers
  Drivers: hv: vmbus: Remove second mapping of VMBus monitor pages
  swiotlb: Remove bounce buffer remapping for Hyper-V
  Driver: VMBus: Add Devicetree support
  dt-bindings: bus: Add Hyper-V VMBus
  Drivers: hv: vmbus: Convert acpi_device to more generic platform_device
  ...
parents 8ccd54fe a494aef2
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/bus/microsoft,vmbus.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microsoft Hyper-V VMBus
maintainers:
- Saurabh Sengar <ssengar@linux.microsoft.com>
description:
VMBus is a software bus that implement the protocols for communication
between the root or host OS and guest OSs (virtual machines).
properties:
compatible:
const: microsoft,vmbus
ranges: true
'#address-cells':
const: 2
'#size-cells':
const: 1
required:
- compatible
- ranges
- '#address-cells'
- '#size-cells'
additionalProperties: false
examples:
- |
soc {
#address-cells = <2>;
#size-cells = <1>;
bus {
compatible = "simple-bus";
#address-cells = <2>;
#size-cells = <1>;
ranges;
vmbus@ff0000000 {
compatible = "microsoft,vmbus";
#address-cells = <2>;
#size-cells = <1>;
ranges = <0x0f 0xf0000000 0x0f 0xf0000000 0x10000000>;
};
};
};
......@@ -9588,6 +9588,7 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
F: Documentation/ABI/stable/sysfs-bus-vmbus
F: Documentation/ABI/testing/debugfs-hyperv
F: Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml
F: Documentation/virt/hyperv
F: Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
F: arch/arm64/hyperv
......
# SPDX-License-Identifier: GPL-2.0-only
obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o
ifdef CONFIG_X86_64
obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o
......
......@@ -96,6 +96,11 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
wrmsr(HV_X64_MSR_EOI, val, 0);
}
static bool cpu_is_self(int cpu)
{
return cpu == smp_processor_id();
}
/*
* IPI implementation on Hyper-V.
*/
......@@ -128,10 +133,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
*/
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
if (exclude_self)
nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
else
nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be
......
......@@ -63,7 +63,10 @@ static int hyperv_init_ghcb(void)
* memory boundary and map it here.
*/
rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB);
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary;
ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
if (!ghcb_va)
return -ENOMEM;
......@@ -217,7 +220,7 @@ static int hv_cpu_die(unsigned int cpu)
if (hv_ghcb_pg) {
ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg);
if (*ghcb_va)
memunmap(*ghcb_va);
iounmap(*ghcb_va);
*ghcb_va = NULL;
}
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2023, Microsoft Corporation.
*
* Author:
* Saurabh Sengar <ssengar@microsoft.com>
*/
#include <asm/apic.h>
#include <asm/boot.h>
#include <asm/desc.h>
#include <asm/i8259.h>
#include <asm/mshyperv.h>
#include <asm/realmode.h>
extern struct boot_params boot_params;
static struct real_mode_header hv_vtl_real_mode_header;
void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
x86_platform.get_wallclock = get_rtc_noop;
x86_platform.set_wallclock = set_rtc_noop;
x86_platform.get_nmi_reason = hv_get_nmi_reason;
x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
x86_platform.legacy.rtc = 0;
x86_platform.legacy.warm_reset = 0;
x86_platform.legacy.reserve_bios_regions = 0;
x86_platform.legacy.devices.pnpbios = 0;
}
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)
{
return ((u64)desc->base3 << 32) | ((u64)desc->base2 << 24) |
(desc->base1 << 16) | desc->base0;
}
static inline u32 hv_vtl_system_desc_limit(struct ldttss_desc *desc)
{
return ((u32)desc->limit1 << 16) | (u32)desc->limit0;
}
typedef void (*secondary_startup_64_fn)(void*, void*);
static void hv_vtl_ap_entry(void)
{
((secondary_startup_64_fn)secondary_startup_64)(&boot_params, &boot_params);
}
static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored)
{
u64 status;
int ret = 0;
struct hv_enable_vp_vtl *input;
unsigned long irq_flags;
struct desc_ptr gdt_ptr;
struct desc_ptr idt_ptr;
struct ldttss_desc *tss;
struct ldttss_desc *ldt;
struct desc_struct *gdt;
u64 rsp = current->thread.sp;
u64 rip = (u64)&hv_vtl_ap_entry;
native_store_gdt(&gdt_ptr);
store_idt(&idt_ptr);
gdt = (struct desc_struct *)((void *)(gdt_ptr.address));
tss = (struct ldttss_desc *)(gdt + GDT_ENTRY_TSS);
ldt = (struct ldttss_desc *)(gdt + GDT_ENTRY_LDT);
local_irq_save(irq_flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
memset(input, 0, sizeof(*input));
input->partition_id = HV_PARTITION_ID_SELF;
input->vp_index = target_vp_index;
input->target_vtl.target_vtl = HV_VTL_MGMT;
/*
* The x86_64 Linux kernel follows the 16-bit -> 32-bit -> 64-bit
* mode transition sequence after waking up an AP with SIPI whose
* vector points to the 16-bit AP startup trampoline code. Here in
* VTL2, we can't perform that sequence as the AP has to start in
* the 64-bit mode.
*
* To make this happen, we tell the hypervisor to load a valid 64-bit
* context (most of which is just magic numbers from the CPU manual)
* so that AP jumps right to the 64-bit entry of the kernel, and the
* control registers are loaded with values that let the AP fetch the
* code and data and carry on with work it gets assigned.
*/
input->vp_context.rip = rip;
input->vp_context.rsp = rsp;
input->vp_context.rflags = 0x0000000000000002;
input->vp_context.efer = __rdmsr(MSR_EFER);
input->vp_context.cr0 = native_read_cr0();
input->vp_context.cr3 = __native_read_cr3();
input->vp_context.cr4 = native_read_cr4();
input->vp_context.msr_cr_pat = __rdmsr(MSR_IA32_CR_PAT);
input->vp_context.idtr.limit = idt_ptr.size;
input->vp_context.idtr.base = idt_ptr.address;
input->vp_context.gdtr.limit = gdt_ptr.size;
input->vp_context.gdtr.base = gdt_ptr.address;
/* Non-system desc (64bit), long, code, present */
input->vp_context.cs.selector = __KERNEL_CS;
input->vp_context.cs.base = 0;
input->vp_context.cs.limit = 0xffffffff;
input->vp_context.cs.attributes = 0xa09b;
/* Non-system desc (64bit), data, present, granularity, default */
input->vp_context.ss.selector = __KERNEL_DS;
input->vp_context.ss.base = 0;
input->vp_context.ss.limit = 0xffffffff;
input->vp_context.ss.attributes = 0xc093;
/* System desc (128bit), present, LDT */
input->vp_context.ldtr.selector = GDT_ENTRY_LDT * 8;
input->vp_context.ldtr.base = hv_vtl_system_desc_base(ldt);
input->vp_context.ldtr.limit = hv_vtl_system_desc_limit(ldt);
input->vp_context.ldtr.attributes = 0x82;
/* System desc (128bit), present, TSS, 0x8b - busy, 0x89 -- default */
input->vp_context.tr.selector = GDT_ENTRY_TSS * 8;
input->vp_context.tr.base = hv_vtl_system_desc_base(tss);
input->vp_context.tr.limit = hv_vtl_system_desc_limit(tss);
input->vp_context.tr.attributes = 0x8b;
status = hv_do_hypercall(HVCALL_ENABLE_VP_VTL, input, NULL);
if (!hv_result_success(status) &&
hv_result(status) != HV_STATUS_VTL_ALREADY_ENABLED) {
pr_err("HVCALL_ENABLE_VP_VTL failed for VP : %d ! [Err: %#llx\n]",
target_vp_index, status);
ret = -EINVAL;
goto free_lock;
}
status = hv_do_hypercall(HVCALL_START_VP, input, NULL);
if (!hv_result_success(status)) {
pr_err("HVCALL_START_VP failed for VP : %d ! [Err: %#llx]\n",
target_vp_index, status);
ret = -EINVAL;
}
free_lock:
local_irq_restore(irq_flags);
return ret;
}
static int hv_vtl_apicid_to_vp_id(u32 apic_id)
{
u64 control;
u64 status;
unsigned long irq_flags;
struct hv_get_vp_from_apic_id_in *input;
u32 *output, ret;
local_irq_save(irq_flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
memset(input, 0, sizeof(*input));
input->partition_id = HV_PARTITION_ID_SELF;
input->apic_ids[0] = apic_id;
output = (u32 *)input;
control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
status = hv_do_hypercall(control, input, output);
ret = output[0];
local_irq_restore(irq_flags);
if (!hv_result_success(status)) {
pr_err("failed to get vp id from apic id %d, status %#llx\n",
apic_id, status);
return -EINVAL;
}
return ret;
}
static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
{
int vp_id;
pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid);
vp_id = hv_vtl_apicid_to_vp_id(apicid);
if (vp_id < 0) {
pr_err("Couldn't find CPU with APIC ID %d\n", apicid);
return -EINVAL;
}
if (vp_id > ms_hyperv.max_vp_index) {
pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid);
return -EINVAL;
}
return hv_vtl_bringup_vcpu(vp_id, start_eip);
}
static int __init hv_vtl_early_init(void)
{
/*
* `boot_cpu_has` returns the runtime feature support,
* and here is the earliest it can be used.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
panic("XSAVE has to be disabled as it is not supported by this module.\n"
"Please add 'noxsave' to the kernel command line.\n");
real_mode_header = &hv_vtl_real_mode_header;
apic->wakeup_secondary_cpu_64 = hv_vtl_wakeup_secondary_cpu;
return 0;
}
early_initcall(hv_vtl_early_init);
......@@ -376,34 +376,6 @@ void __init hv_vtom_init(void)
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
* hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM.
*/
void *hv_map_memory(void *addr, unsigned long size)
{
unsigned long *pfns = kcalloc(size / PAGE_SIZE,
sizeof(unsigned long), GFP_KERNEL);
void *vaddr;
int i;
if (!pfns)
return NULL;
for (i = 0; i < size / PAGE_SIZE; i++)
pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) +
(ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT);
vaddr = vmap_pfn(pfns, size / PAGE_SIZE, pgprot_decrypted(PAGE_KERNEL));
kfree(pfns);
return vaddr;
}
void hv_unmap_memory(void *addr)
{
vunmap(addr);
}
enum hv_isolation_type hv_get_isolation_type(void)
{
if (!(ms_hyperv.priv_high & HV_ISOLATION))
......
......@@ -52,6 +52,11 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
return gva_n - offset;
}
static bool cpu_is_lazy(int cpu)
{
return per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
}
static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
......@@ -60,6 +65,7 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
struct hv_tlb_flush *flush;
u64 status;
unsigned long flags;
bool do_lazy = !info->freed_tables;
trace_hyperv_mmu_flush_tlb_multi(cpus, info);
......@@ -112,6 +118,8 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
goto do_ex_hypercall;
for_each_cpu(cpu, cpus) {
if (do_lazy && cpu_is_lazy(cpu))
continue;
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu == VP_INVAL) {
local_irq_restore(flags);
......@@ -198,7 +206,8 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
flush->hv_vp_set.valid_bank_mask = 0;
flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus,
info->freed_tables ? NULL : cpu_is_lazy);
if (nr_bank < 0)
return HV_STATUS_INVALID_PARAMETER;
......
......@@ -122,6 +122,9 @@
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
/* Use hypercalls for MMIO config space access */
#define HV_X64_USE_MMIO_HYPERCALLS BIT(21)
/*
* CPU management features identification.
* These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits.
......@@ -713,6 +716,81 @@ union hv_msi_entry {
} __packed;
};
struct hv_x64_segment_register {
u64 base;
u32 limit;
u16 selector;
union {
struct {
u16 segment_type : 4;
u16 non_system_segment : 1;
u16 descriptor_privilege_level : 2;
u16 present : 1;
u16 reserved : 4;
u16 available : 1;
u16 _long : 1;
u16 _default : 1;
u16 granularity : 1;
} __packed;
u16 attributes;
};
} __packed;
struct hv_x64_table_register {
u16 pad[3];
u16 limit;
u64 base;
} __packed;
struct hv_init_vp_context {
u64 rip;
u64 rsp;
u64 rflags;
struct hv_x64_segment_register cs;
struct hv_x64_segment_register ds;
struct hv_x64_segment_register es;
struct hv_x64_segment_register fs;
struct hv_x64_segment_register gs;
struct hv_x64_segment_register ss;
struct hv_x64_segment_register tr;
struct hv_x64_segment_register ldtr;
struct hv_x64_table_register idtr;
struct hv_x64_table_register gdtr;
u64 efer;
u64 cr0;
u64 cr3;
u64 cr4;
u64 msr_cr_pat;
} __packed;
union hv_input_vtl {
u8 as_uint8;
struct {
u8 target_vtl: 4;
u8 use_target_vtl: 1;
u8 reserved_z: 3;
};
} __packed;
struct hv_enable_vp_vtl {
u64 partition_id;
u32 vp_index;
union hv_input_vtl target_vtl;
u8 mbz0;
u16 mbz1;
struct hv_init_vp_context vp_context;
} __packed;
struct hv_get_vp_from_apic_id_in {
u64 partition_id;
union hv_input_vtl target_vtl;
u8 res[7];
u32 apic_ids[];
} __packed;
#include <asm-generic/hyperv-tlfs.h>
#endif
......@@ -19,6 +19,10 @@
*/
#define HV_IOAPIC_BASE_ADDRESS 0xfec00000
#define HV_VTL_NORMAL 0x0
#define HV_VTL_SECURE 0x1
#define HV_VTL_MGMT 0x2
union hv_ghcb;
DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
......@@ -29,6 +33,11 @@ typedef int (*hyperv_fill_flush_list_func)(
void hyperv_vector_handler(struct pt_regs *regs);
static inline unsigned char hv_get_nmi_reason(void)
{
return 0;
}
#if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
......@@ -271,6 +280,12 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
#else
static inline void __init hv_vtl_init_platform(void) {}
#endif
#include <asm-generic/mshyperv.h>
#endif
......@@ -330,5 +330,7 @@ extern void x86_init_uint_noop(unsigned int unused);
extern bool bool_x86_init_noop(void);
extern void x86_op_int_noop(int cpu);
extern bool x86_pnpbios_disabled(void);
extern int set_rtc_noop(const struct timespec64 *now);
extern void get_rtc_noop(struct timespec64 *now);
#endif
......@@ -18,7 +18,6 @@
#include <linux/kexec.h>
#include <linux/i8253.h>
#include <linux/random.h>
#include <linux/swiotlb.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
......@@ -249,11 +248,6 @@ static uint32_t __init ms_hyperv_platform(void)
return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
}
static unsigned char hv_get_nmi_reason(void)
{
return 0;
}
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
......@@ -408,12 +402,8 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
static_branch_enable(&isolation_type_snp);
#ifdef CONFIG_SWIOTLB
swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
#endif
}
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
......@@ -524,6 +514,7 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
hv_vtl_init_platform();
#endif
/*
* TSC should be marked as unstable only after Hyper-V
......
......@@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; }
static void iommu_shutdown_noop(void) { }
bool __init bool_x86_init_noop(void) { return false; }
void x86_op_int_noop(int cpu) { }
static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
static void get_rtc_noop(struct timespec64 *now) { }
int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
void get_rtc_noop(struct timespec64 *now) { }
static __initconst const struct of_device_id of_cmos_match[] = {
{ .compatible = "motorola,mc146818" },
......
......@@ -49,7 +49,7 @@ static bool direct_mode_enabled;
static int stimer0_irq = -1;
static int stimer0_message_sint;
static DEFINE_PER_CPU(long, stimer0_evt);
static __maybe_unused DEFINE_PER_CPU(long, stimer0_evt);
/*
* Common code for stimer0 interrupts coming via Direct Mode or
......@@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(hv_stimer0_isr);
* stimer0 interrupt handler for architectures that support
* per-cpu interrupts, which also implies Direct Mode.
*/
static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id)
static irqreturn_t __maybe_unused hv_stimer0_percpu_isr(int irq, void *dev_id)
{
hv_stimer0_isr();
return IRQ_HANDLED;
......@@ -196,6 +196,7 @@ void __weak hv_remove_stimer0_handler(void)
{
};
#ifdef CONFIG_ACPI
/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */
static int hv_setup_stimer0_irq(void)
{
......@@ -230,6 +231,16 @@ static void hv_remove_stimer0_irq(void)
stimer0_irq = -1;
}
}
#else
static int hv_setup_stimer0_irq(void)
{
return 0;
}
static void hv_remove_stimer0_irq(void)
{
}
#endif
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
int hv_stimer_alloc(bool have_percpu_irqs)
......@@ -506,9 +517,6 @@ static bool __init hv_init_tsc_clocksource(void)
{
union hv_reference_tsc_msr tsc_msr;
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
return false;
/*
* If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
* handles frequency and offset changes due to live migration,
......@@ -525,6 +533,9 @@ static bool __init hv_init_tsc_clocksource(void)
hyperv_cs_msr.rating = 250;
}
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
return false;
hv_read_reference_counter = read_hv_clock_tsc;
/*
......
......@@ -4,15 +4,39 @@ menu "Microsoft Hyper-V guest support"
config HYPERV
tristate "Microsoft Hyper-V client drivers"
depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
|| (ARM64 && !CPU_BIG_ENDIAN))
depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
|| (ACPI && ARM64 && !CPU_BIG_ENDIAN)
select PARAVIRT
select X86_HV_CALLBACK_VECTOR if X86
select VMAP_PFN
select OF_EARLY_FLATTREE if OF
help
Select this option to run Linux as a Hyper-V client operating
system.
config HYPERV_VTL_MODE
bool "Enable Linux to boot in VTL context"
depends on X86_64 && HYPERV
default n
help
Virtual Secure Mode (VSM) is a set of hypervisor capabilities and
enlightenments offered to host and guest partitions which enables
the creation and management of new security boundaries within
operating system software.
VSM achieves and maintains isolation through Virtual Trust Levels
(VTLs). Virtual Trust Levels are hierarchical, with higher levels
being more privileged than lower levels. VTL0 is the least privileged
level, and currently only other level supported is VTL2.
Select this option to build a Linux kernel to run at a VTL other than
the normal VTL0, which currently is only VTL2. This option
initializes the x86 platform for VTL2, and adds the ability to boot
secondary CPUs directly into 64-bit context as required for VTLs other
than 0. A kernel built with this option must run at VTL2, and will
not run as a normal guest.
If unsure, say N
config HYPERV_TIMER
def_bool HYPERV && X86
......
......@@ -67,7 +67,7 @@ const struct vmbus_device vmbus_devs[] = {
{ .dev_type = HV_PCIE,
HV_PCIE_GUID,
.perf_device = false,
.allowed_in_isolated = false,
.allowed_in_isolated = true,
},
/* Synthetic Frame Buffer */
......
......@@ -104,8 +104,14 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
}
msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0];
msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1];
/*
* shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always
* bitwise OR it
*/
msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) |
ms_hyperv.shared_gpa_boundary;
msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) |
ms_hyperv.shared_gpa_boundary;
msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
......@@ -219,72 +225,27 @@ int vmbus_connect(void)
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page();
vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page();
vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page();
vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page();
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
goto cleanup;
}
vmbus_connection.monitor_pages_original[0]
= vmbus_connection.monitor_pages[0];
vmbus_connection.monitor_pages_original[1]
= vmbus_connection.monitor_pages[1];
vmbus_connection.monitor_pages_pa[0]
= virt_to_phys(vmbus_connection.monitor_pages[0]);
vmbus_connection.monitor_pages_pa[1]
= virt_to_phys(vmbus_connection.monitor_pages[1]);
if (hv_is_isolation_supported()) {
ret = set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[0],
1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1],
1);
if (ret)
goto cleanup;
/*
* Isolation VM with AMD SNP needs to access monitor page via
* address space above shared gpa boundary.
*/
if (hv_isolation_type_snp()) {
vmbus_connection.monitor_pages_pa[0] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages_pa[1] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages[0]
= memremap(vmbus_connection.monitor_pages_pa[0],
HV_HYP_PAGE_SIZE,
MEMREMAP_WB);
if (!vmbus_connection.monitor_pages[0]) {
ret = -ENOMEM;
goto cleanup;
}
vmbus_connection.monitor_pages[1]
= memremap(vmbus_connection.monitor_pages_pa[1],
HV_HYP_PAGE_SIZE,
MEMREMAP_WB);
if (!vmbus_connection.monitor_pages[1]) {
ret = -ENOMEM;
goto cleanup;
}
}
/*
* Set memory host visibility hvcall smears memory
* and so zero monitor pages here.
*/
memset(vmbus_connection.monitor_pages[0], 0x00,
HV_HYP_PAGE_SIZE);
memset(vmbus_connection.monitor_pages[1], 0x00,
HV_HYP_PAGE_SIZE);
ret = set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[0], 1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1], 1);
if (ret)
goto cleanup;
}
/*
* Set_memory_decrypted() will change the memory contents if
* decryption occurs, so zero monitor pages here.
*/
memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE);
memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE);
msginfo = kzalloc(sizeof(*msginfo) +
sizeof(struct vmbus_channel_initiate_contact),
......@@ -376,31 +337,13 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}
if (hv_is_isolation_supported()) {
/*
* memunmap() checks input address is ioremap address or not
* inside. It doesn't unmap any thing in the non-SNP CVM and
* so not check CVM type here.
*/
memunmap(vmbus_connection.monitor_pages[0]);
memunmap(vmbus_connection.monitor_pages[1]);
set_memory_encrypted((unsigned long)
vmbus_connection.monitor_pages_original[0],
1);
set_memory_encrypted((unsigned long)
vmbus_connection.monitor_pages_original[1],
1);
}
set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
hv_free_hyperv_page((unsigned long)
vmbus_connection.monitor_pages_original[0]);
hv_free_hyperv_page((unsigned long)
vmbus_connection.monitor_pages_original[1]);
vmbus_connection.monitor_pages_original[0] =
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages_original[1] =
vmbus_connection.monitor_pages[1] = NULL;
hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages[1] = NULL;
}
/*
......
......@@ -38,42 +38,6 @@ int hv_init(void)
return 0;
}
/*
* Functions for allocating and freeing memory with size and
* alignment HV_HYP_PAGE_SIZE. These functions are needed because
* the guest page size may not be the same as the Hyper-V page
* size. We depend upon kmalloc() aligning power-of-two size
* allocations to the allocation size boundary, so that the
* allocated memory appears to Hyper-V as a page of the size
* it expects.
*/
void *hv_alloc_hyperv_page(void)
{
BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL);
else
return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
void *hv_alloc_hyperv_zeroed_page(void)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
else
return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
void hv_free_hyperv_page(unsigned long addr)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
free_page(addr);
else
kfree((void *)addr);
}
/*
* hv_post_message - Post a message using the hypervisor message IPC.
*
......@@ -84,14 +48,15 @@ int hv_post_message(union hv_connection_id connection_id,
void *payload, size_t payload_size)
{
struct hv_input_post_message *aligned_msg;
struct hv_per_cpu_context *hv_cpu;
unsigned long flags;
u64 status;
if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
return -EMSGSIZE;
hv_cpu = get_cpu_ptr(hv_context.cpu_context);
aligned_msg = hv_cpu->post_msg_page;
local_irq_save(flags);
aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
aligned_msg->connectionid = connection_id;
aligned_msg->reserved = 0;
aligned_msg->message_type = message_type;
......@@ -106,11 +71,7 @@ int hv_post_message(union hv_connection_id connection_id,
status = hv_do_hypercall(HVCALL_POST_MESSAGE,
aligned_msg, NULL);
/* Preemption must remain disabled until after the hypercall
* so some other thread can't get scheduled onto this cpu and
* corrupt the per-cpu post_msg_page
*/
put_cpu_ptr(hv_cpu);
local_irq_restore(flags);
return hv_result(status);
}
......@@ -162,12 +123,6 @@ int hv_synic_alloc(void)
goto err;
}
}
hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->post_msg_page == NULL) {
pr_err("Unable to allocate post msg page\n");
goto err;
}
}
return 0;
......@@ -190,7 +145,6 @@ void hv_synic_free(void)
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
free_page((unsigned long)hv_cpu->post_msg_page);
}
kfree(hv_context.hv_numa_map);
......@@ -217,11 +171,13 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.simp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
= (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_message_page)
pr_err("Fail to map syinc message page.\n");
pr_err("Fail to map synic message page.\n");
} else {
simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> HV_HYP_PAGE_SHIFT;
......@@ -234,12 +190,13 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.siefp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->synic_event_page
= (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_event_page)
pr_err("Fail to map syinc event page.\n");
pr_err("Fail to map synic event page.\n");
} else {
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> HV_HYP_PAGE_SHIFT;
......@@ -316,7 +273,7 @@ void hv_synic_disable_regs(unsigned int cpu)
*/
simp.simp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
memunmap(hv_cpu->synic_message_page);
iounmap(hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
} else {
simp.base_simp_gpa = 0;
......@@ -328,7 +285,7 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.siefp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
memunmap(hv_cpu->synic_event_page);
iounmap(hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
} else {
siefp.base_siefp_gpa = 0;
......
......@@ -17,8 +17,11 @@
#include <linux/export.h>
#include <linux/bitfield.h>
#include <linux/cpumask.h>
#include <linux/sched/task_stack.h>
#include <linux/panic_notifier.h>
#include <linux/ptrace.h>
#include <linux/kdebug.h>
#include <linux/kmsg_dump.h>
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
#include <asm/hyperv-tlfs.h>
......@@ -54,6 +57,10 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
void * __percpu *hyperv_pcpu_output_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
static void hv_kmsg_dump_unregister(void);
static struct ctl_table_header *hv_ctl_table_hdr;
/*
* Hyper-V specific initialization and shutdown code that is
* common across all architectures. Called from architecture
......@@ -62,6 +69,12 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
void __init hv_common_free(void)
{
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
hv_kmsg_dump_unregister();
kfree(hv_vp_index);
hv_vp_index = NULL;
......@@ -72,10 +85,203 @@ void __init hv_common_free(void)
hyperv_pcpu_input_arg = NULL;
}
/*
* Functions for allocating and freeing memory with size and
* alignment HV_HYP_PAGE_SIZE. These functions are needed because
* the guest page size may not be the same as the Hyper-V page
* size. We depend upon kmalloc() aligning power-of-two size
* allocations to the allocation size boundary, so that the
* allocated memory appears to Hyper-V as a page of the size
* it expects.
*/
void *hv_alloc_hyperv_page(void)
{
BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL);
else
return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
void *hv_alloc_hyperv_zeroed_page(void)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
else
return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
void hv_free_hyperv_page(unsigned long addr)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
free_page(addr);
else
kfree((void *)addr);
}
EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
static void *hv_panic_page;
/*
* Boolean to control whether to report panic messages over Hyper-V.
*
* It can be set via /proc/sys/kernel/hyperv_record_panic_msg
*/
static int sysctl_record_panic_msg = 1;
/*
* sysctl option to allow the user to control whether kmsg data should be
* reported to Hyper-V on panic.
*/
static struct ctl_table hv_ctl_table[] = {
{
.procname = "hyperv_record_panic_msg",
.data = &sysctl_record_panic_msg,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{}
};
static int hv_die_panic_notify_crash(struct notifier_block *self,
unsigned long val, void *args);
static struct notifier_block hyperv_die_report_block = {
.notifier_call = hv_die_panic_notify_crash,
};
static struct notifier_block hyperv_panic_report_block = {
.notifier_call = hv_die_panic_notify_crash,
};
/*
* The following callback works both as die and panic notifier; its
* goal is to provide panic information to the hypervisor unless the
* kmsg dumper is used [see hv_kmsg_dump()], which provides more
* information but isn't always available.
*
* Notice that both the panic/die report notifiers are registered only
* if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
*/
static int hv_die_panic_notify_crash(struct notifier_block *self,
unsigned long val, void *args)
{
struct pt_regs *regs;
bool is_die;
/* Don't notify Hyper-V unless we have a die oops event or panic. */
if (self == &hyperv_panic_report_block) {
is_die = false;
regs = current_pt_regs();
} else { /* die event */
if (val != DIE_OOPS)
return NOTIFY_DONE;
is_die = true;
regs = ((struct die_args *)args)->regs;
}
/*
* Hyper-V should be notified only once about a panic/die. If we will
* be calling hv_kmsg_dump() later with kmsg data, don't do the
* notification here.
*/
if (!sysctl_record_panic_msg || !hv_panic_page)
hyperv_report_panic(regs, val, is_die);
return NOTIFY_DONE;
}
/*
* Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
* buffer and call into Hyper-V to transfer the data.
*/
static void hv_kmsg_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
struct kmsg_dump_iter iter;
size_t bytes_written;
/* We are only interested in panics. */
if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
return;
/*
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
kmsg_dump_rewind(&iter);
kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (!bytes_written)
return;
/*
* P3 to contain the physical address of the panic page & P4 to
* contain the size of the panic data in that page. Rest of the
* registers are no-op when the NOTIFY_MSG flag is set.
*/
hv_set_register(HV_REGISTER_CRASH_P0, 0);
hv_set_register(HV_REGISTER_CRASH_P1, 0);
hv_set_register(HV_REGISTER_CRASH_P2, 0);
hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
/*
* Let Hyper-V know there is crash data available along with
* the panic message.
*/
hv_set_register(HV_REGISTER_CRASH_CTL,
(HV_CRASH_CTL_CRASH_NOTIFY |
HV_CRASH_CTL_CRASH_NOTIFY_MSG));
}
static struct kmsg_dumper hv_kmsg_dumper = {
.dump = hv_kmsg_dump,
};
static void hv_kmsg_dump_unregister(void)
{
kmsg_dump_unregister(&hv_kmsg_dumper);
unregister_die_notifier(&hyperv_die_report_block);
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_report_block);
hv_free_hyperv_page((unsigned long)hv_panic_page);
hv_panic_page = NULL;
}
static void hv_kmsg_dump_register(void)
{
int ret;
hv_panic_page = hv_alloc_hyperv_zeroed_page();
if (!hv_panic_page) {
pr_err("Hyper-V: panic message page memory allocation failed\n");
return;
}
ret = kmsg_dump_register(&hv_kmsg_dumper);
if (ret) {
pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
hv_free_hyperv_page((unsigned long)hv_panic_page);
hv_panic_page = NULL;
}
}
int __init hv_common_init(void)
{
int i;
if (hv_is_isolation_supported())
sysctl_record_panic_msg = 0;
/*
* Hyper-V expects to get crash register data or kmsg when
* crash enlightment is available and system crashes. Set
......@@ -84,8 +290,33 @@ int __init hv_common_init(void)
* kernel.
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
u64 hyperv_crash_ctl;
crash_kexec_post_notifiers = true;
pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
/*
* Panic message recording (sysctl_record_panic_msg)
* is enabled by default in non-isolated guests and
* disabled by default in isolated guests; the panic
* message recording won't be available in isolated
* guests should the following registration fail.
*/
hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
if (!hv_ctl_table_hdr)
pr_err("Hyper-V: sysctl table register error");
/*
* Register for panic kmsg callback only if the right
* capability is supported by the hypervisor.
*/
hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
hv_kmsg_dump_register();
register_die_notifier(&hyperv_die_report_block);
atomic_notifier_chain_register(&panic_notifier_list,
&hyperv_panic_report_block);
}
/*
......@@ -311,14 +542,3 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
void __weak *hv_map_memory(void *addr, unsigned long size)
{
return NULL;
}
EXPORT_SYMBOL_GPL(hv_map_memory);
void __weak hv_unmap_memory(void *addr)
{
}
EXPORT_SYMBOL_GPL(hv_unmap_memory);
......@@ -122,10 +122,6 @@ enum {
struct hv_per_cpu_context {
void *synic_message_page;
void *synic_event_page;
/*
* buffer to post messages to the host.
*/
void *post_msg_page;
/*
* Starting with win8, we can take channel interrupts on any CPU;
......@@ -241,8 +237,6 @@ struct vmbus_connection {
* is child->parent notification
*/
struct hv_monitor_page *monitor_pages[2];
void *monitor_pages_original[2];
phys_addr_t monitor_pages_pa[2];
struct list_head chn_msg_list;
spinlock_t channelmsg_lock;
......
......@@ -186,8 +186,6 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
struct page *pages, u32 page_cnt, u32 max_pkt_size)
{
struct page **pages_wraparound;
unsigned long *pfns_wraparound;
u64 pfn;
int i;
BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
......@@ -196,50 +194,30 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
* First page holds struct hv_ring_buffer, do wraparound mapping for
* the rest.
*/
if (hv_isolation_type_snp()) {
pfn = page_to_pfn(pages) +
PFN_DOWN(ms_hyperv.shared_gpa_boundary);
pages_wraparound = kcalloc(page_cnt * 2 - 1,
sizeof(struct page *),
GFP_KERNEL);
if (!pages_wraparound)
return -ENOMEM;
pfns_wraparound = kcalloc(page_cnt * 2 - 1,
sizeof(unsigned long), GFP_KERNEL);
if (!pfns_wraparound)
return -ENOMEM;
pfns_wraparound[0] = pfn;
for (i = 0; i < 2 * (page_cnt - 1); i++)
pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1;
ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap_pfn(pfns_wraparound, page_cnt * 2 - 1,
pgprot_decrypted(PAGE_KERNEL));
kfree(pfns_wraparound);
if (!ring_info->ring_buffer)
return -ENOMEM;
/* Zero ring buffer after setting memory host visibility. */
memset(ring_info->ring_buffer, 0x00, PAGE_SIZE * page_cnt);
} else {
pages_wraparound = kcalloc(page_cnt * 2 - 1,
sizeof(struct page *),
GFP_KERNEL);
if (!pages_wraparound)
return -ENOMEM;
pages_wraparound[0] = pages;
for (i = 0; i < 2 * (page_cnt - 1); i++)
pages_wraparound[i + 1] =
&pages[i % (page_cnt - 1) + 1];
pages_wraparound[0] = pages;
for (i = 0; i < 2 * (page_cnt - 1); i++)
pages_wraparound[i + 1] =
&pages[i % (page_cnt - 1) + 1];
ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
PAGE_KERNEL);
ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
pgprot_decrypted(PAGE_KERNEL));
kfree(pages_wraparound);
if (!ring_info->ring_buffer)
return -ENOMEM;
}
kfree(pages_wraparound);
if (!ring_info->ring_buffer)
return -ENOMEM;
/*
* Ensure the header page is zero'ed since
* encryption status may have changed.
*/
memset(ring_info->ring_buffer, 0, HV_HYP_PAGE_SIZE);
ring_info->ring_buffer->read_index =
ring_info->ring_buffer->write_index = 0;
......
This diff is collapsed.
......@@ -1139,7 +1139,6 @@ struct netvsc_device {
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
void *recv_original_buf;
u32 recv_buf_size; /* allocated bytes */
struct vmbus_gpadl recv_buf_gpadl_handle;
u32 recv_section_cnt;
......@@ -1148,7 +1147,6 @@ struct netvsc_device {
/* Send buffer allocated by us */
void *send_buf;
void *send_original_buf;
u32 send_buf_size;
struct vmbus_gpadl send_buf_gpadl_handle;
u32 send_section_cnt;
......
......@@ -154,17 +154,8 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
kfree(nvdev->extension);
if (nvdev->recv_original_buf)
vfree(nvdev->recv_original_buf);
else
vfree(nvdev->recv_buf);
if (nvdev->send_original_buf)
vfree(nvdev->send_original_buf);
else
vfree(nvdev->send_buf);
vfree(nvdev->recv_buf);
vfree(nvdev->send_buf);
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
......@@ -347,7 +338,6 @@ static int netvsc_init_buf(struct hv_device *device,
struct nvsp_message *init_packet;
unsigned int buf_size;
int i, ret = 0;
void *vaddr;
/* Get receive buffer area. */
buf_size = device_info->recv_sections * device_info->recv_section_size;
......@@ -383,17 +373,6 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
if (hv_isolation_type_snp()) {
vaddr = hv_map_memory(net_device->recv_buf, buf_size);
if (!vaddr) {
ret = -ENOMEM;
goto cleanup;
}
net_device->recv_original_buf = net_device->recv_buf;
net_device->recv_buf = vaddr;
}
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
......@@ -497,17 +476,6 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
if (hv_isolation_type_snp()) {
vaddr = hv_map_memory(net_device->send_buf, buf_size);
if (!vaddr) {
ret = -ENOMEM;
goto cleanup;
}
net_device->send_original_buf = net_device->send_buf;
net_device->send_buf = vaddr;
}
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
......@@ -762,12 +730,6 @@ void netvsc_device_remove(struct hv_device *device)
netvsc_teardown_send_gpadl(device, net_device, ndev);
}
if (net_device->recv_original_buf)
hv_unmap_memory(net_device->recv_buf);
if (net_device->send_original_buf)
hv_unmap_memory(net_device->send_buf);
/* Release all resources */
free_netvsc_device_rcu(net_device);
}
......@@ -1844,12 +1806,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
netif_napi_del(&net_device->chan_table[0].napi);
cleanup2:
if (net_device->recv_original_buf)
hv_unmap_memory(net_device->recv_buf);
if (net_device->send_original_buf)
hv_unmap_memory(net_device->send_buf);
free_netvsc_device(&net_device->rcu);
return ERR_PTR(ret);
......
This diff is collapsed.
......@@ -146,6 +146,7 @@ union hv_reference_tsc_msr {
/* Declare the various hypercall operations. */
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_ENABLE_VP_VTL 0x000f
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
#define HVCALL_SEND_IPI 0x000b
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
......@@ -165,9 +166,13 @@ union hv_reference_tsc_msr {
#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
#define HVCALL_RETARGET_INTERRUPT 0x007e
#define HVCALL_START_VP 0x0099
#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
#define HVCALL_MMIO_READ 0x0106
#define HVCALL_MMIO_WRITE 0x0107
/* Extended hypercalls */
#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
......@@ -218,6 +223,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_PORT_ID 17
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
#define HV_STATUS_VTL_ALREADY_ENABLED 134
/*
* The Hyper-V TimeRefCount register and the TSC
......@@ -796,4 +802,24 @@ struct hv_memory_hint {
union hv_gpa_page_range ranges[];
} __packed;
/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */
#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64
struct hv_mmio_read_input {
u64 gpa;
u32 size;
u32 reserved;
} __packed;
struct hv_mmio_read_output {
u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
} __packed;
struct hv_mmio_write_input {
u64 gpa;
u32 size;
u32 reserved;
u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH];
} __packed;
#endif
......@@ -210,10 +210,9 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number)
static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
const struct cpumask *cpus,
bool exclude_self)
bool (*func)(int cpu))
{
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
int this_cpu = smp_processor_id();
int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK;
/* vpset.valid_bank_mask can represent up to HV_MAX_SPARSE_VCPU_BANKS banks */
......@@ -232,7 +231,7 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
* Some banks may end up being empty but this is acceptable.
*/
for_each_cpu(cpu, cpus) {
if (exclude_self && cpu == this_cpu)
if (func && func(cpu))
continue;
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu == VP_INVAL)
......@@ -248,17 +247,24 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
return nr_bank;
}
/*
* Convert a Linux cpumask into a Hyper-V VPset. In the _skip variant,
* 'func' is called for each CPU present in cpumask. If 'func' returns
* true, that CPU is skipped -- i.e., that CPU from cpumask is *not*
* added to the Hyper-V VPset. If 'func' is NULL, no CPUs are
* skipped.
*/
static inline int cpumask_to_vpset(struct hv_vpset *vpset,
const struct cpumask *cpus)
{
return __cpumask_to_vpset(vpset, cpus, false);
return __cpumask_to_vpset(vpset, cpus, NULL);
}
static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset,
const struct cpumask *cpus)
static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
const struct cpumask *cpus,
bool (*func)(int cpu))
{
WARN_ON_ONCE(preemptible());
return __cpumask_to_vpset(vpset, cpus, true);
return __cpumask_to_vpset(vpset, cpus, func);
}
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
......@@ -271,8 +277,6 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
void *hv_map_memory(void *addr, unsigned long size);
void hv_unmap_memory(void *addr);
#else /* CONFIG_HYPERV */
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
......
......@@ -1077,6 +1077,11 @@ static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context)
return 0;
}
static inline bool acpi_sleep_state_supported(u8 sleep_state)
{
return false;
}
#endif /* !CONFIG_ACPI */
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
......
......@@ -180,6 +180,4 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
}
#endif /* CONFIG_DMA_RESTRICTED_POOL */
extern phys_addr_t swiotlb_unencrypted_base;
#endif /* __LINUX_SWIOTLB_H */
......@@ -73,8 +73,6 @@ static bool swiotlb_force_disable;
struct io_tlb_mem io_tlb_default_mem;
phys_addr_t swiotlb_unencrypted_base;
static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static unsigned long default_nareas;
......@@ -201,34 +199,6 @@ static inline unsigned long nr_slots(u64 val)
return DIV_ROUND_UP(val, IO_TLB_SIZE);
}
/*
* Remap swioltb memory in the unencrypted physical address space
* when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
* Isolation VMs).
*/
#ifdef CONFIG_HAS_IOMEM
static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
{
void *vaddr = NULL;
if (swiotlb_unencrypted_base) {
phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
vaddr = memremap(paddr, bytes, MEMREMAP_WB);
if (!vaddr)
pr_err("Failed to map the unencrypted memory %pa size %lx.\n",
&paddr, bytes);
}
return vaddr;
}
#else
static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
{
return NULL;
}
#endif
/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
......@@ -238,18 +208,12 @@ static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
void __init swiotlb_update_mem_attributes(void)
{
struct io_tlb_mem *mem = &io_tlb_default_mem;
void *vaddr;
unsigned long bytes;
if (!mem->nslabs || mem->late_alloc)
return;
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
mem->vaddr = swiotlb_mem_remap(mem, bytes);
if (!mem->vaddr)
mem->vaddr = vaddr;
set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
}
static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
......@@ -280,13 +244,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->slots[i].alloc_size = 0;
}
/*
* If swiotlb_unencrypted_base is set, the bounce buffer memory will
* be remapped and cleared in swiotlb_update_mem_attributes.
*/
if (swiotlb_unencrypted_base)
return;
memset(vaddr, 0, bytes);
mem->vaddr = vaddr;
return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment