Commit 2c57ee6f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (249 commits)
  KVM: Move apic timer migration away from critical section
  KVM: Put kvm_para.h include outside __KERNEL__
  KVM: Fix unbounded preemption latency
  KVM: Initialize the mmu caches only after verifying cpu support
  KVM: MMU: Fix dirty page setting for pages removed from rmap
  KVM: Portability: Move kvm_fpu to asm-x86/kvm.h
  KVM: x86 emulator: Only allow VMCALL/VMMCALL trapped by #UD
  KVM: MMU: Merge shadow level check in FNAME(fetch)
  KVM: MMU: Move kvm_free_some_pages() into critical section
  KVM: MMU: Switch to mmu spinlock
  KVM: MMU: Avoid calling gfn_to_page() in mmu_set_spte()
  KVM: Add kvm_read_guest_atomic()
  KVM: MMU: Concurrent guest walkers
  KVM: Disable vapic support on Intel machines with FlexPriority
  KVM: Accelerated apic support
  KVM: local APIC TPR access reporting facility
  KVM: Print data for unimplemented wrmsr
  KVM: MMU: Add cache miss statistic
  KVM: MMU: Coalesce remote tlb flushes
  KVM: Expose ioapic to ia64 save/restore APIs
  ...
parents f389e9fc 2f52d58c
......@@ -107,6 +107,7 @@ config ARCH_SUPPORTS_OPROFILE
bool
default y
select HAVE_KVM
config ZONE_DMA32
bool
......@@ -1598,4 +1599,6 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "arch/x86/kvm/Kconfig"
source "lib/Kconfig"
......@@ -7,6 +7,8 @@ else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
core-$(CONFIG_KVM) += arch/x86/kvm/
# BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles.
# e.g.: obj-y += foo_$(BITS).o
......
#
# KVM configuration
#
config HAVE_KVM
bool
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on X86
depends on HAVE_KVM || X86
default y
---help---
Say Y here to get to see options for using your Linux host to run other
......@@ -16,7 +19,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL
depends on HAVE_KVM && EXPERIMENTAL
select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
......
......@@ -2,7 +2,11 @@
# Makefile for Kernel-based Virtual Machine module
#
kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
......
......@@ -28,6 +28,8 @@
#include <linux/mm.h>
#include "irq.h"
#include <linux/kvm_host.h>
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
......@@ -181,10 +183,8 @@ int kvm_pic_read_irq(struct kvm_pic *s)
return intno;
}
static void pic_reset(void *opaque)
void kvm_pic_reset(struct kvm_kpic_state *s)
{
struct kvm_kpic_state *s = opaque;
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
......@@ -209,7 +209,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
pic_reset(s); /* init */
kvm_pic_reset(s); /* init */
/*
* deassert a pending interrupt
*/
......
......@@ -20,8 +20,8 @@
*/
#include <linux/module.h>
#include <linux/kvm_host.h>
#include "kvm.h"
#include "irq.h"
/*
......@@ -63,26 +63,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
static void vcpu_kick_intr(void *info)
{
#ifdef DEBUG
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
#endif
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int ipi_pcpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
if (vcpu->guest_mode)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
}
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
......
......@@ -22,7 +22,16 @@
#ifndef __IRQ_H
#define __IRQ_H
#include "kvm.h"
#include <linux/mm_types.h>
#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
#include "iodev.h"
#include "ioapic.h"
#include "lapic.h"
struct kvm;
struct kvm_vcpu;
typedef void irq_request_func(void *opaque, int level);
......@@ -57,109 +66,23 @@ struct kvm_pic {
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
void kvm_pic_update_irq(struct kvm_pic *s);
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_MEM_LENGTH 0x100
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
return kvm->arch.vpic;
}
/* Direct registers. */
#define IOAPIC_REG_SELECT 0x00
#define IOAPIC_REG_WINDOW 0x10
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return pic_irqchip(kvm) != NULL;
}
/* Indirect registers. */
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
#define IOAPIC_REG_VERSION 0x01
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
u32 id;
u32 irr;
u32 pad;
union ioapic_redir_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
};
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
};
#ifdef DEBUG
#define ASSERT(x) \
do { \
if (!(x)) { \
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
__FILE__, __LINE__, #x); \
BUG(); \
} \
} while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
unsigned long bitmap);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
#endif
......@@ -4,10 +4,10 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kvm_host.h>
#include <asm/msr.h>
#include "svm.h"
#include "kvm.h"
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
......
#ifndef __KVM_X86_LAPIC_H
#define __KVM_X86_LAPIC_H
#include "iodev.h"
#include <linux/kvm_host.h>
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
gpa_t vapic_addr;
struct page *vapic_page;
};
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
#endif
This diff is collapsed.
#ifndef __KVM_X86_MMU_H
#define __KVM_X86_MMU_H
#include <linux/kvm_host.h>
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
__kvm_mmu_free_some_pages(vcpu);
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->arch.shadow_efer & EFER_LME;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PAE;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PSE;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr0 & X86_CR0_PG;
}
#endif
#ifndef __SEGMENT_DESCRIPTOR_H
#define __SEGMENT_DESCRIPTOR_H
struct segment_descriptor {
u16 limit_low;
u16 base_low;
......@@ -14,4 +17,13 @@ struct segment_descriptor {
u8 base_high;
} __attribute__((packed));
#ifdef CONFIG_X86_64
/* LDT or TSS descriptor in the GDT. 16 bytes. */
struct segment_descriptor_64 {
struct segment_descriptor s;
u32 base_higher;
u32 pad_zero;
};
#endif
#endif
This diff is collapsed.
......@@ -204,6 +204,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define INTERCEPT_CR0_MASK 1
#define INTERCEPT_CR3_MASK (1 << 3)
#define INTERCEPT_CR4_MASK (1 << 4)
#define INTERCEPT_CR8_MASK (1 << 8)
#define INTERCEPT_DR0_MASK 1
#define INTERCEPT_DR1_MASK (1 << 1)
......@@ -311,7 +312,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXIT_ERR -1
#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP
#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
......
This diff is collapsed.
......@@ -25,6 +25,9 @@
*
*/
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
*/
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080
......@@ -42,6 +45,12 @@
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
/*
* Definitions of Secondary Processor-Based VM-Execution Controls.
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
......@@ -54,8 +63,6 @@
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
/* VMCS Encodings */
enum vmcs_field {
GUEST_ES_SELECTOR = 0x00000800,
......@@ -89,6 +96,8 @@ enum vmcs_field {
TSC_OFFSET_HIGH = 0x00002011,
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
APIC_ACCESS_ADDR = 0x00002014,
APIC_ACCESS_ADDR_HIGH = 0x00002015,
VMCS_LINK_POINTER = 0x00002800,
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
......@@ -214,6 +223,8 @@ enum vmcs_field {
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_WBINVD 54
/*
* Interruption-information format
......@@ -230,13 +241,14 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
/*
* Exit Qualifications for MOV for Control Register Access
*/
#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control register */
#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control reg.*/
#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */
#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */
#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose reg. */
#define LMSW_SOURCE_DATA_SHIFT 16
#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
#define REG_EAX (0 << 8)
......@@ -259,11 +271,11 @@ enum vmcs_field {
/*
* Exit Qualifications for MOV for Debug Register Access
*/
#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */
#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug reg. */
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */
#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
/* segment AR */
......@@ -307,4 +319,6 @@ enum vmcs_field {
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
#endif
......@@ -90,8 +90,6 @@ source "drivers/dca/Kconfig"
source "drivers/auxdisplay/Kconfig"
source "drivers/kvm/Kconfig"
source "drivers/uio/Kconfig"
source "drivers/virtio/Kconfig"
......
......@@ -47,7 +47,6 @@ obj-$(CONFIG_SPI) += spi/
obj-$(CONFIG_PCCARD) += pcmcia/
obj-$(CONFIG_DIO) += dio/
obj-$(CONFIG_SBUS) += sbus/
obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_ZORRO) += zorro/
obj-$(CONFIG_MAC) += macintosh/
obj-$(CONFIG_ATA_OVER_ETH) += block/aoe/
......
......@@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
header-y += boot.h
header-y += bootparam.h
header-y += debugreg.h
header-y += kvm.h
header-y += ldt.h
header-y += msr-index.h
header-y += prctl.h
......
#ifndef __LINUX_KVM_X86_H
#define __LINUX_KVM_X86_H
/*
* KVM x86 specific structures and definitions
*
*/
#include <asm/types.h>
#include <linux/ioctl.h>
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
struct kvm_memory_alias {
__u32 slot; /* this has a different namespace than memory slots */
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size;
__u64 target_phys_addr;
};
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
__u8 irr; /* interrupt request register */
__u8 imr; /* interrupt mask register */
__u8 isr; /* interrupt service register */
__u8 priority_add; /* highest irq priority */
__u8 irq_base;
__u8 read_reg_select;
__u8 poll;
__u8 special_mask;
__u8 init_state;
__u8 auto_eoi;
__u8 rotate_on_auto_eoi;
__u8 special_fully_nested_mode;
__u8 init4; /* true if 4 byte init */
__u8 elcr; /* PIIX edge/trigger selection */
__u8 elcr_mask;
};
#define KVM_IOAPIC_NUM_PINS 24
struct kvm_ioapic_state {
__u64 base_address;
__u32 ioregsel;
__u32 id;
__u32 irr;
__u32 pad;
union {
__u64 bits;
struct {
__u8 vector;
__u8 delivery_mode:3;
__u8 dest_mode:1;
__u8 delivery_status:1;
__u8 polarity:1;
__u8 remote_irr:1;
__u8 trig_mode:1;
__u8 mask:1;
__u8 reserve:7;
__u8 reserved[4];
__u8 dest_id;
} fields;
} redirtbl[KVM_IOAPIC_NUM_PINS];
};
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
__u64 rax, rbx, rcx, rdx;
__u64 rsi, rdi, rsp, rbp;
__u64 r8, r9, r10, r11;
__u64 r12, r13, r14, r15;
__u64 rip, rflags;
};
/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
#define KVM_APIC_REG_SIZE 0x400
struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
};
struct kvm_segment {
__u64 base;
__u32 limit;
__u16 selector;
__u8 type;
__u8 present, dpl, db, s, l, g, avl;
__u8 unusable;
__u8 padding;
};
struct kvm_dtable {
__u64 base;
__u16 limit;
__u16 padding[3];
};
/* for KVM_GET_SREGS and KVM_SET_SREGS */
struct kvm_sregs {
/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
struct kvm_segment cs, ds, es, fs, gs, ss;
struct kvm_segment tr, ldt;
struct kvm_dtable gdt, idt;
__u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer;
__u64 apic_base;
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
__u8 ftwx; /* in fxsave format */
__u8 pad1;
__u16 last_opcode;
__u64 last_ip;
__u64 last_dp;
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
};
struct kvm_msr_entry {
__u32 index;
__u32 reserved;
__u64 data;
};
/* for KVM_GET_MSRS and KVM_SET_MSRS */
struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
struct kvm_msr_entry entries[0];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
__u32 indices[0];
};
struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
};
/* for KVM_SET_CPUID */
struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
};
struct kvm_cpuid_entry2 {
__u32 function;
__u32 index;
__u32 flags;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding[3];
};
#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
/* for KVM_SET_CPUID2 */
struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry2 entries[0];
};
#endif
#ifndef __X86_KVM_PARA_H
#define __X86_KVM_PARA_H
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
/* This CPUID returns a feature bitmap in eax. Before enabling a particular
* paravirtualization, the appropriate feature bit should be checked.
*/
#define KVM_CPUID_FEATURES 0x40000001
#ifdef __KERNEL__
#include <asm/processor.h>
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
* instruction. The hypervisor may replace it with something else but only the
* instructions are guaranteed to be supported.
*
* Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
* The hypercall number should be placed in rax and the return value will be
* placed in rax. No other registers will be clobbered unless explicited
* noted by the particular hypercall.
*/
static inline long kvm_hypercall0(unsigned int nr)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr));
return ret;
}
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1));
return ret;
}
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2));
return ret;
}
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3));
return ret;
}
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
return ret;
}
static inline int kvm_para_available(void)
{
unsigned int eax, ebx, ecx, edx;
char signature[13];
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
memcpy(signature + 0, &ebx, 4);
memcpy(signature + 4, &ecx, 4);
memcpy(signature + 8, &edx, 4);
signature[12] = 0;
if (strcmp(signature, "KVMKVMKVM") == 0)
return 1;
return 0;
}
static inline unsigned int kvm_arch_para_features(void)
{
return cpuid_eax(KVM_CPUID_FEATURES);
}
#endif
#endif
......@@ -62,17 +62,6 @@ struct x86_emulate_ops {
int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for stack operations, and others.
* @addr: [IN ] Linear address to which to write.
* @val: [IN ] Value to write to memory (low-order bytes used as
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr, const void *val,
unsigned int bytes, struct kvm_vcpu *vcpu);
/*
* read_emulated: Read bytes from emulated/special memory area.
* @addr: [IN ] Linear address from which to read.
......@@ -112,13 +101,50 @@ struct x86_emulate_ops {
};
/* Type, address-of, and value of an instruction's operand. */
struct operand {
enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
unsigned int bytes;
unsigned long val, orig_val, *ptr;
};
struct fetch_cache {
u8 data[15];
unsigned long start;
unsigned long end;
};
struct decode_cache {
u8 twobyte;
u8 b;
u8 lock_prefix;
u8 rep_prefix;
u8 op_bytes;
u8 ad_bytes;
u8 rex_prefix;
struct operand src;
struct operand dst;
unsigned long *override_base;
unsigned int d;
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */
u8 modrm;
u8 modrm_mod;
u8 modrm_reg;
u8 modrm_rm;
u8 use_modrm_ea;
unsigned long modrm_ea;
unsigned long modrm_val;
struct fetch_cache fetch;
};
struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
/* Linear faulting address (if emulating a page-faulting instruction). */
unsigned long eflags;
unsigned long cr2;
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
......@@ -129,8 +155,16 @@ struct x86_emulate_ctxt {
unsigned long ss_base;
unsigned long gs_base;
unsigned long fs_base;
/* decode cache */
struct decode_cache decode;
};
/* Repeat String Operation Prefix */
#define REPE_PREFIX 1
#define REPNE_PREFIX 2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
......@@ -144,12 +178,9 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
* Returns -1 on failure, 0 on success.
*/
int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
#endif /* __X86_EMULATE_H__ */
......@@ -100,7 +100,6 @@ header-y += iso_fs.h
header-y += ixjuser.h
header-y += jffs2.h
header-y += keyctl.h
header-y += kvm.h
header-y += limits.h
header-y += lock_dlm_plock.h
header-y += magic.h
......@@ -256,6 +255,7 @@ unifdef-y += kd.h
unifdef-y += kernelcapi.h
unifdef-y += kernel.h
unifdef-y += keyboard.h
unifdef-$(CONFIG_HAVE_KVM) += kvm.h
unifdef-y += llc.h
unifdef-y += loop.h
unifdef-y += lp.h
......
......@@ -9,12 +9,10 @@
#include <asm/types.h>
#include <linux/ioctl.h>
#include <asm/kvm.h>
#define KVM_API_VERSION 12
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
/* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region {
__u32 slot;
......@@ -23,17 +21,19 @@ struct kvm_memory_region {
__u64 memory_size; /* bytes */
};
/* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
struct kvm_memory_alias {
__u32 slot; /* this has a different namespace than memory slots */
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
__u32 slot;
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size;
__u64 target_phys_addr;
__u64 memory_size; /* bytes */
__u64 userspace_addr; /* start of the userspace allocated memory */
};
/* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
/*
......@@ -45,62 +45,18 @@ struct kvm_irq_level {
__u32 level;
};
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
__u8 irr; /* interrupt request register */
__u8 imr; /* interrupt mask register */
__u8 isr; /* interrupt service register */
__u8 priority_add; /* highest irq priority */
__u8 irq_base;
__u8 read_reg_select;
__u8 poll;
__u8 special_mask;
__u8 init_state;
__u8 auto_eoi;
__u8 rotate_on_auto_eoi;
__u8 special_fully_nested_mode;
__u8 init4; /* true if 4 byte init */
__u8 elcr; /* PIIX edge/trigger selection */
__u8 elcr_mask;
};
#define KVM_IOAPIC_NUM_PINS 24
struct kvm_ioapic_state {
__u64 base_address;
__u32 ioregsel;
__u32 id;
__u32 irr;
__u32 pad;
union {
__u64 bits;
struct {
__u8 vector;
__u8 delivery_mode:3;
__u8 dest_mode:1;
__u8 delivery_status:1;
__u8 polarity:1;
__u8 remote_irr:1;
__u8 trig_mode:1;
__u8 mask:1;
__u8 reserve:7;
__u8 reserved[4];
__u8 dest_id;
} fields;
} redirtbl[KVM_IOAPIC_NUM_PINS];
};
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
struct kvm_irqchip {
__u32 chip_id;
__u32 pad;
union {
char dummy[512]; /* reserving space */
#ifdef CONFIG_X86
struct kvm_pic_state pic;
#endif
#if defined(CONFIG_X86) || defined(CONFIG_IA64)
struct kvm_ioapic_state ioapic;
#endif
} chip;
};
......@@ -116,6 +72,7 @@ struct kvm_irqchip {
#define KVM_EXIT_FAIL_ENTRY 9
#define KVM_EXIT_INTR 10
#define KVM_EXIT_SET_TPR 11
#define KVM_EXIT_TPR_ACCESS 12
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
......@@ -174,90 +131,17 @@ struct kvm_run {
__u32 longmode;
__u32 pad;
} hypercall;
/* KVM_EXIT_TPR_ACCESS */
struct {
__u64 rip;
__u32 is_write;
__u32 pad;
} tpr_access;
/* Fix the size of the union. */
char padding[256];
};
};
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
__u64 rax, rbx, rcx, rdx;
__u64 rsi, rdi, rsp, rbp;
__u64 r8, r9, r10, r11;
__u64 r12, r13, r14, r15;
__u64 rip, rflags;
};
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
__u8 ftwx; /* in fxsave format */
__u8 pad1;
__u16 last_opcode;
__u64 last_ip;
__u64 last_dp;
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
};
/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
#define KVM_APIC_REG_SIZE 0x400
struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
};
struct kvm_segment {
__u64 base;
__u32 limit;
__u16 selector;
__u8 type;
__u8 present, dpl, db, s, l, g, avl;
__u8 unusable;
__u8 padding;
};
struct kvm_dtable {
__u64 base;
__u16 limit;
__u16 padding[3];
};
/* for KVM_GET_SREGS and KVM_SET_SREGS */
struct kvm_sregs {
/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
struct kvm_segment cs, ds, es, fs, gs, ss;
struct kvm_segment tr, ldt;
struct kvm_dtable gdt, idt;
__u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer;
__u64 apic_base;
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
struct kvm_msr_entry {
__u32 index;
__u32 reserved;
__u64 data;
};
/* for KVM_GET_MSRS and KVM_SET_MSRS */
struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
struct kvm_msr_entry entries[0];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
__u32 indices[0];
};
/* for KVM_TRANSLATE */
struct kvm_translation {
/* in */
......@@ -302,28 +186,24 @@ struct kvm_dirty_log {
};
};
struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
};
/* for KVM_SET_CPUID */
struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
};
/* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask {
__u32 len;
__u8 sigset[0];
};
/* for KVM_TPR_ACCESS_REPORTING */
struct kvm_tpr_access_ctl {
__u32 enabled;
__u32 flags;
__u32 reserved[8];
};
/* for KVM_SET_VAPIC_ADDR */
struct kvm_vapic_addr {
__u64 vapic_addr;
};
#define KVMIO 0xAE
/*
......@@ -347,11 +227,21 @@ struct kvm_signal_mask {
*/
#define KVM_CAP_IRQCHIP 0
#define KVM_CAP_HLT 1
#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
#define KVM_CAP_USER_MEMORY 3
#define KVM_CAP_SET_TSS_ADDR 4
#define KVM_CAP_EXT_CPUID 5
#define KVM_CAP_VAPIC 6
/*
* ioctls for VM fds
*/
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
/*
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
* a vcpu fd.
......@@ -359,6 +249,7 @@ struct kvm_signal_mask {
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
/* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
......@@ -384,5 +275,11 @@ struct kvm_signal_mask {
#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state)
#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state)
#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2)
#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
/* Available with KVM_CAP_VAPIC */
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
/* Available with KVM_CAP_VAPIC */
#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
#endif
This diff is collapsed.
......@@ -2,72 +2,30 @@
#define __LINUX_KVM_PARA_H
/*
* Guest OS interface for KVM paravirtualization
*
* Note: this interface is totally experimental, and is certain to change
* as we make progress.
* This header file provides a method for making a hypercall to the host
* Architectures should define:
* - kvm_hypercall0, kvm_hypercall1...
* - kvm_arch_para_features
* - kvm_para_available
*/
/*
* Per-VCPU descriptor area shared between guest and host. Writable to
* both guest and host. Registered with the host by the guest when
* a guest acknowledges paravirtual mode.
*
* NOTE: all addresses are guest-physical addresses (gpa), to make it
* easier for the hypervisor to map between the various addresses.
*/
struct kvm_vcpu_para_state {
/*
* API version information for compatibility. If there's any support
* mismatch (too old host trying to execute too new guest) then
* the host will deny entry into paravirtual mode. Any other
* combination (new host + old guest and new host + new guest)
* is supposed to work - new host versions will support all old
* guest API versions.
*/
u32 guest_version;
u32 host_version;
u32 size;
u32 ret;
/*
* The address of the vm exit instruction (VMCALL or VMMCALL),
* which the host will patch according to the CPU model the
* VM runs on:
*/
u64 hypercall_gpa;
} __attribute__ ((aligned(PAGE_SIZE)));
#define KVM_PARA_API_VERSION 1
/*
* This is used for an RDMSR's ECX parameter to probe for a KVM host.
* Hopefully no CPU vendor will use up this number. This is placed well
* out of way of the typical space occupied by CPU vendors' MSR indices,
* and we think (or at least hope) it wont be occupied in the future
* either.
*/
#define MSR_KVM_API_MAGIC 0x87655678
/* Return values for hypercalls */
#define KVM_ENOSYS 1000
#define KVM_EINVAL 1
#define KVM_HC_VAPIC_POLL_IRQ 1
/*
* Hypercall calling convention:
*
* Each hypercall may have 0-6 parameters.
*
* 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
*
* 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
* order: RDI, RSI, RDX, RCX, R8, R9.
*
* 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
* (the first 3 are according to the gcc regparm calling convention)
*
* No registers are clobbered by the hypercall, except that the
* return value is in RAX.
* hypercalls use architecture specific
*/
#define __NR_hypercalls 0
#include <asm/kvm_para.h>
#ifdef __KERNEL__
static inline int kvm_para_has_feature(unsigned int feature)
{
if (kvm_arch_para_features() & (1UL << feature))
return 1;
return 0;
}
#endif /* __KERNEL__ */
#endif /* __LINUX_KVM_PARA_H */
#endif
This diff is collapsed.
......@@ -393,6 +393,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
destroy_context(mm);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
/*
* Decrement the use count and release all resources for an mm.
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment