Commit 93b1eab3 authored by Jeremy Fitzhardinge's avatar Jeremy Fitzhardinge Committed by Jeremy Fitzhardinge

paravirt: refactor struct paravirt_ops into smaller pv_*_ops

This patch refactors the paravirt_ops structure into groups of
functionally related ops:

pv_info - random info, rather than function entrypoints
pv_init_ops - functions used at boot time (some for module_init too)
pv_misc_ops - lazy mode, which didn't fit well anywhere else
pv_time_ops - time-related functions
pv_cpu_ops - various privileged instruction ops
pv_irq_ops - operations for managing interrupt state
pv_apic_ops - APIC operations
pv_mmu_ops - operations for managing pagetables

There are several motivations for this:

1. Some of these ops will be general to all x86, and some will be
   i386/x86-64 specific.  This makes it easier to share common stuff
   while allowing separate implementations where needed.

2. At the moment we must export all of paravirt_ops, but modules only
   need selected parts of it.  This allows us to export on a case by case
   basis (and also choose which export license we want to apply).

3. Functional groupings make things a bit more readable.

Struct paravirt_ops is now only used as a template to generate
patch-site identifiers, and to extract function pointers for inserting
into jmp/calls when patching.  It is only instantiated when needed.
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Zach Amsden <zach@vmware.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Anthony Liguory <aliguori@us.ibm.com>
Cc: "Glauber de Oliveira Costa" <glommer@gmail.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
parent ab9c2322
...@@ -368,8 +368,8 @@ void apply_paravirt(struct paravirt_patch_site *start, ...@@ -368,8 +368,8 @@ void apply_paravirt(struct paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN); BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */ /* prep the buffer with the original instructions */
memcpy(insnbuf, p->instr, p->len); memcpy(insnbuf, p->instr, p->len);
used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
(unsigned long)p->instr, p->len); (unsigned long)p->instr, p->len);
BUG_ON(used > p->len); BUG_ON(used > p->len);
......
...@@ -116,12 +116,14 @@ void foo(void) ...@@ -116,12 +116,14 @@ void foo(void)
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
BLANK(); BLANK();
OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PARAVIRT_iret, paravirt_ops, iret); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif #endif
#ifdef CONFIG_XEN #ifdef CONFIG_XEN
......
...@@ -434,7 +434,7 @@ ldt_ss: ...@@ -434,7 +434,7 @@ ldt_ss:
* is still available to implement the setting of the high * is still available to implement the setting of the high
* 16-bits in the INTERRUPT_RETURN paravirt-op. * 16-bits in the INTERRUPT_RETURN paravirt-op.
*/ */
cmpl $0, paravirt_ops+PARAVIRT_enabled cmpl $0, pv_info+PARAVIRT_enabled
jne restore_nocheck jne restore_nocheck
#endif #endif
......
...@@ -42,32 +42,33 @@ void _paravirt_nop(void) ...@@ -42,32 +42,33 @@ void _paravirt_nop(void)
static void __init default_banner(void) static void __init default_banner(void)
{ {
printk(KERN_INFO "Booting paravirtualized kernel on %s\n", printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
paravirt_ops.name); pv_info.name);
} }
char *memory_setup(void) char *memory_setup(void)
{ {
return paravirt_ops.memory_setup(); return pv_init_ops.memory_setup();
} }
/* Simple instruction patching code. */ /* Simple instruction patching code. */
#define DEF_NATIVE(name, code) \ #define DEF_NATIVE(ops, name, code) \
extern const char start_##name[], end_##name[]; \ extern const char start_##ops##_##name[], end_##ops##_##name[]; \
asm("start_" #name ": " code "; end_" #name ":") asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
DEF_NATIVE(irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(iret, "iret"); DEF_NATIVE(pv_cpu_ops, iret, "iret");
DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(clts, "clts"); DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(read_tsc, "rdtsc"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
DEF_NATIVE(ud2a, "ud2a"); /* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };
static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
...@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, ...@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned ret; unsigned ret;
switch(type) { switch(type) {
#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site #define SITE(ops, x) \
SITE(irq_disable); case PARAVIRT_PATCH(ops.x): \
SITE(irq_enable); start = start_##ops##_##x; \
SITE(restore_fl); end = end_##ops##_##x; \
SITE(save_fl); goto patch_site
SITE(iret);
SITE(irq_enable_sysexit); SITE(pv_irq_ops, irq_disable);
SITE(read_cr2); SITE(pv_irq_ops, irq_enable);
SITE(read_cr3); SITE(pv_irq_ops, restore_fl);
SITE(write_cr3); SITE(pv_irq_ops, save_fl);
SITE(clts); SITE(pv_cpu_ops, iret);
SITE(read_tsc); SITE(pv_cpu_ops, irq_enable_sysexit);
SITE(pv_mmu_ops, read_cr2);
SITE(pv_mmu_ops, read_cr3);
SITE(pv_mmu_ops, write_cr3);
SITE(pv_cpu_ops, clts);
SITE(pv_cpu_ops, read_tsc);
#undef SITE #undef SITE
patch_site: patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end); ret = paravirt_patch_insns(ibuf, len, start, end);
break; break;
case PARAVIRT_PATCH(make_pgd):
case PARAVIRT_PATCH(make_pte):
case PARAVIRT_PATCH(pgd_val):
case PARAVIRT_PATCH(pte_val):
#ifdef CONFIG_X86_PAE
case PARAVIRT_PATCH(make_pmd):
case PARAVIRT_PATCH(pmd_val):
#endif
/* These functions end up returning exactly what
they're passed, in the same registers. */
ret = paravirt_patch_nop();
break;
default: default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break; break;
...@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf, ...@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
return 5; return 5;
} }
unsigned paravirt_patch_jmp(const void *target, void *insnbuf, unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
{ {
struct branch *b = insnbuf; struct branch *b = insnbuf;
...@@ -165,22 +158,38 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf, ...@@ -165,22 +158,38 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
return 5; return 5;
} }
/* Neat trick to map patch type back to the call within the
* corresponding structure. */
static void *get_call_destination(u8 type)
{
struct paravirt_patch_template tmpl = {
.pv_init_ops = pv_init_ops,
.pv_misc_ops = pv_misc_ops,
.pv_time_ops = pv_time_ops,
.pv_cpu_ops = pv_cpu_ops,
.pv_irq_ops = pv_irq_ops,
.pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
};
return *((void **)&tmpl + type);
}
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
unsigned long addr, unsigned len) unsigned long addr, unsigned len)
{ {
void *opfunc = *((void **)&paravirt_ops + type); void *opfunc = get_call_destination(type);
unsigned ret; unsigned ret;
if (opfunc == NULL) if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */ /* If there's no function, patch it with a ud2a (BUG) */
ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == paravirt_nop) else if (opfunc == paravirt_nop)
/* If the operation is a nop, then nop the callsite */ /* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop(); ret = paravirt_patch_nop();
else if (type == PARAVIRT_PATCH(iret) || else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
type == PARAVIRT_PATCH(irq_enable_sysexit)) type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
/* If operation requires a jmp, then jmp */ /* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else else
/* Otherwise call the function; assume target could /* Otherwise call the function; assume target could
clobber any caller-save reg */ clobber any caller-save reg */
...@@ -205,7 +214,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, ...@@ -205,7 +214,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
void init_IRQ(void) void init_IRQ(void)
{ {
paravirt_ops.init_IRQ(); pv_irq_ops.init_IRQ();
} }
static void native_flush_tlb(void) static void native_flush_tlb(void)
...@@ -233,7 +242,7 @@ extern void native_irq_enable_sysexit(void); ...@@ -233,7 +242,7 @@ extern void native_irq_enable_sysexit(void);
static int __init print_banner(void) static int __init print_banner(void)
{ {
paravirt_ops.banner(); pv_init_ops.banner();
return 0; return 0;
} }
core_initcall(print_banner); core_initcall(print_banner);
...@@ -273,47 +282,53 @@ int paravirt_disable_iospace(void) ...@@ -273,47 +282,53 @@ int paravirt_disable_iospace(void)
return ret; return ret;
} }
struct paravirt_ops paravirt_ops = { struct pv_info pv_info = {
.name = "bare hardware", .name = "bare hardware",
.paravirt_enabled = 0, .paravirt_enabled = 0,
.kernel_rpl = 0, .kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
};
.patch = native_patch, struct pv_init_ops pv_init_ops = {
.patch = native_patch,
.banner = default_banner, .banner = default_banner,
.arch_setup = paravirt_nop, .arch_setup = paravirt_nop,
.memory_setup = machine_specific_memory_setup, .memory_setup = machine_specific_memory_setup,
};
struct pv_time_ops pv_time_ops = {
.time_init = hpet_time_init,
.get_wallclock = native_get_wallclock, .get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock, .set_wallclock = native_set_wallclock,
.time_init = hpet_time_init, .sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
};
struct pv_irq_ops pv_irq_ops = {
.init_IRQ = native_init_IRQ, .init_IRQ = native_init_IRQ,
.save_fl = native_save_fl,
.restore_fl = native_restore_fl,
.irq_disable = native_irq_disable,
.irq_enable = native_irq_enable,
.safe_halt = native_safe_halt,
.halt = native_halt,
};
struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid, .cpuid = native_cpuid,
.get_debugreg = native_get_debugreg, .get_debugreg = native_get_debugreg,
.set_debugreg = native_set_debugreg, .set_debugreg = native_set_debugreg,
.clts = native_clts, .clts = native_clts,
.read_cr0 = native_read_cr0, .read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0, .write_cr0 = native_write_cr0,
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
.read_cr3 = native_read_cr3,
.write_cr3 = native_write_cr3,
.read_cr4 = native_read_cr4, .read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe, .read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4, .write_cr4 = native_write_cr4,
.save_fl = native_save_fl,
.restore_fl = native_restore_fl,
.irq_disable = native_irq_disable,
.irq_enable = native_irq_enable,
.safe_halt = native_safe_halt,
.halt = native_halt,
.wbinvd = native_wbinvd, .wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe, .read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe, .write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc, .read_tsc = native_read_tsc,
.read_pmc = native_read_pmc, .read_pmc = native_read_pmc,
.sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc, .load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt, .set_ldt = native_set_ldt,
.load_gdt = native_load_gdt, .load_gdt = native_load_gdt,
...@@ -327,9 +342,14 @@ struct paravirt_ops paravirt_ops = { ...@@ -327,9 +342,14 @@ struct paravirt_ops paravirt_ops = {
.write_idt_entry = write_dt_entry, .write_idt_entry = write_dt_entry,
.load_esp0 = native_load_esp0, .load_esp0 = native_load_esp0,
.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,
.set_iopl_mask = native_set_iopl_mask, .set_iopl_mask = native_set_iopl_mask,
.io_delay = native_io_delay, .io_delay = native_io_delay,
};
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write, .apic_write = native_apic_write,
.apic_write_atomic = native_apic_write_atomic, .apic_write_atomic = native_apic_write_atomic,
...@@ -338,11 +358,21 @@ struct paravirt_ops paravirt_ops = { ...@@ -338,11 +358,21 @@ struct paravirt_ops paravirt_ops = {
.setup_secondary_clock = setup_secondary_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop, .startup_ipi_hook = paravirt_nop,
#endif #endif
};
struct pv_misc_ops pv_misc_ops = {
.set_lazy_mode = paravirt_nop, .set_lazy_mode = paravirt_nop,
};
struct pv_mmu_ops pv_mmu_ops = {
.pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done, .pagetable_setup_done = native_pagetable_setup_done,
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
.read_cr3 = native_read_cr3,
.write_cr3 = native_write_cr3,
.flush_tlb_user = native_flush_tlb, .flush_tlb_user = native_flush_tlb,
.flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single, .flush_tlb_single = native_flush_tlb_single,
...@@ -381,12 +411,14 @@ struct paravirt_ops paravirt_ops = { ...@@ -381,12 +411,14 @@ struct paravirt_ops paravirt_ops = {
.make_pte = native_make_pte, .make_pte = native_make_pte,
.make_pgd = native_make_pgd, .make_pgd = native_make_pgd,
.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,
.dup_mmap = paravirt_nop, .dup_mmap = paravirt_nop,
.exit_mmap = paravirt_nop, .exit_mmap = paravirt_nop,
.activate_mm = paravirt_nop, .activate_mm = paravirt_nop,
}; };
EXPORT_SYMBOL(paravirt_ops); EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL_GPL(pv_cpu_ops);
EXPORT_SYMBOL_GPL(pv_mmu_ops);
EXPORT_SYMBOL_GPL(pv_apic_ops);
EXPORT_SYMBOL_GPL(pv_info);
EXPORT_SYMBOL (pv_irq_ops);
...@@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, ...@@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
unsigned long eip, unsigned len) unsigned long eip, unsigned len)
{ {
switch (type) { switch (type) {
case PARAVIRT_PATCH(irq_disable): case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
return patch_internal(VMI_CALL_DisableInterrupts, len, return patch_internal(VMI_CALL_DisableInterrupts, len,
insns, eip); insns, eip);
case PARAVIRT_PATCH(irq_enable): case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
return patch_internal(VMI_CALL_EnableInterrupts, len, return patch_internal(VMI_CALL_EnableInterrupts, len,
insns, eip); insns, eip);
case PARAVIRT_PATCH(restore_fl): case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
return patch_internal(VMI_CALL_SetInterruptMask, len, return patch_internal(VMI_CALL_SetInterruptMask, len,
insns, eip); insns, eip);
case PARAVIRT_PATCH(save_fl): case PARAVIRT_PATCH(pv_irq_ops.save_fl):
return patch_internal(VMI_CALL_GetInterruptMask, len, return patch_internal(VMI_CALL_GetInterruptMask, len,
insns, eip); insns, eip);
case PARAVIRT_PATCH(iret): case PARAVIRT_PATCH(pv_cpu_ops.iret):
return patch_internal(VMI_CALL_IRET, len, insns, eip); return patch_internal(VMI_CALL_IRET, len, insns, eip);
case PARAVIRT_PATCH(irq_enable_sysexit): case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
default: default:
break; break;
...@@ -690,9 +690,9 @@ do { \ ...@@ -690,9 +690,9 @@ do { \
reloc = call_vrom_long_func(vmi_rom, get_reloc, \ reloc = call_vrom_long_func(vmi_rom, get_reloc, \
VMI_CALL_##vmicall); \ VMI_CALL_##vmicall); \
if (rel->type == VMI_RELOCATION_CALL_REL) \ if (rel->type == VMI_RELOCATION_CALL_REL) \
paravirt_ops.opname = (void *)rel->eip; \ opname = (void *)rel->eip; \
else if (rel->type == VMI_RELOCATION_NOP) \ else if (rel->type == VMI_RELOCATION_NOP) \
paravirt_ops.opname = (void *)vmi_nop; \ opname = (void *)vmi_nop; \
else if (rel->type != VMI_RELOCATION_NONE) \ else if (rel->type != VMI_RELOCATION_NONE) \
printk(KERN_WARNING "VMI: Unknown relocation " \ printk(KERN_WARNING "VMI: Unknown relocation " \
"type %d for " #vmicall"\n",\ "type %d for " #vmicall"\n",\
...@@ -712,7 +712,7 @@ do { \ ...@@ -712,7 +712,7 @@ do { \
VMI_CALL_##vmicall); \ VMI_CALL_##vmicall); \
BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
if (rel->type == VMI_RELOCATION_CALL_REL) { \ if (rel->type == VMI_RELOCATION_CALL_REL) { \
paravirt_ops.opname = wrapper; \ opname = wrapper; \
vmi_ops.cache = (void *)rel->eip; \ vmi_ops.cache = (void *)rel->eip; \
} \ } \
} while (0) } while (0)
...@@ -732,11 +732,11 @@ static inline int __init activate_vmi(void) ...@@ -732,11 +732,11 @@ static inline int __init activate_vmi(void)
} }
savesegment(cs, kernel_cs); savesegment(cs, kernel_cs);
paravirt_ops.paravirt_enabled = 1; pv_info.paravirt_enabled = 1;
paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
pv_info.name = "vmi";
paravirt_ops.patch = vmi_patch; pv_init_ops.patch = vmi_patch;
paravirt_ops.name = "vmi";
/* /*
* Many of these operations are ABI compatible with VMI. * Many of these operations are ABI compatible with VMI.
...@@ -754,26 +754,26 @@ static inline int __init activate_vmi(void) ...@@ -754,26 +754,26 @@ static inline int __init activate_vmi(void)
*/ */
/* CPUID is special, so very special it gets wrapped like a present */ /* CPUID is special, so very special it gets wrapped like a present */
para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
para_fill(clts, CLTS); para_fill(pv_cpu_ops.clts, CLTS);
para_fill(get_debugreg, GetDR); para_fill(pv_cpu_ops.get_debugreg, GetDR);
para_fill(set_debugreg, SetDR); para_fill(pv_cpu_ops.set_debugreg, SetDR);
para_fill(read_cr0, GetCR0); para_fill(pv_cpu_ops.read_cr0, GetCR0);
para_fill(read_cr2, GetCR2); para_fill(pv_mmu_ops.read_cr2, GetCR2);
para_fill(read_cr3, GetCR3); para_fill(pv_mmu_ops.read_cr3, GetCR3);
para_fill(read_cr4, GetCR4); para_fill(pv_cpu_ops.read_cr4, GetCR4);
para_fill(write_cr0, SetCR0); para_fill(pv_cpu_ops.write_cr0, SetCR0);
para_fill(write_cr2, SetCR2); para_fill(pv_mmu_ops.write_cr2, SetCR2);
para_fill(write_cr3, SetCR3); para_fill(pv_mmu_ops.write_cr3, SetCR3);
para_fill(write_cr4, SetCR4); para_fill(pv_cpu_ops.write_cr4, SetCR4);
para_fill(save_fl, GetInterruptMask); para_fill(pv_irq_ops.save_fl, GetInterruptMask);
para_fill(restore_fl, SetInterruptMask); para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
para_fill(irq_disable, DisableInterrupts); para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
para_fill(irq_enable, EnableInterrupts); para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
para_fill(wbinvd, WBINVD); para_fill(pv_cpu_ops.wbinvd, WBINVD);
para_fill(read_tsc, RDTSC); para_fill(pv_cpu_ops.read_tsc, RDTSC);
/* The following we emulate with trap and emulate for now */ /* The following we emulate with trap and emulate for now */
/* paravirt_ops.read_msr = vmi_rdmsr */ /* paravirt_ops.read_msr = vmi_rdmsr */
...@@ -781,29 +781,29 @@ static inline int __init activate_vmi(void) ...@@ -781,29 +781,29 @@ static inline int __init activate_vmi(void)
/* paravirt_ops.rdpmc = vmi_rdpmc */ /* paravirt_ops.rdpmc = vmi_rdpmc */
/* TR interface doesn't pass TR value, wrap */ /* TR interface doesn't pass TR value, wrap */
para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
/* LDT is special, too */ /* LDT is special, too */
para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
para_fill(load_gdt, SetGDT); para_fill(pv_cpu_ops.load_gdt, SetGDT);
para_fill(load_idt, SetIDT); para_fill(pv_cpu_ops.load_idt, SetIDT);
para_fill(store_gdt, GetGDT); para_fill(pv_cpu_ops.store_gdt, GetGDT);
para_fill(store_idt, GetIDT); para_fill(pv_cpu_ops.store_idt, GetIDT);
para_fill(store_tr, GetTR); para_fill(pv_cpu_ops.store_tr, GetTR);
paravirt_ops.load_tls = vmi_load_tls; pv_cpu_ops.load_tls = vmi_load_tls;
para_fill(write_ldt_entry, WriteLDTEntry); para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
para_fill(write_gdt_entry, WriteGDTEntry); para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
para_fill(write_idt_entry, WriteIDTEntry); para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
para_fill(set_iopl_mask, SetIOPLMask); para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
para_fill(io_delay, IODelay); para_fill(pv_cpu_ops.io_delay, IODelay);
para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); para_wrap(pv_misc_ops.set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
/* user and kernel flush are just handled with different flags to FlushTLB */ /* user and kernel flush are just handled with different flags to FlushTLB */
para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
para_fill(flush_tlb_single, InvalPage); para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
/* /*
* Until a standard flag format can be agreed on, we need to * Until a standard flag format can be agreed on, we need to
...@@ -819,41 +819,41 @@ static inline int __init activate_vmi(void) ...@@ -819,41 +819,41 @@ static inline int __init activate_vmi(void)
#endif #endif
if (vmi_ops.set_pte) { if (vmi_ops.set_pte) {
paravirt_ops.set_pte = vmi_set_pte; pv_mmu_ops.set_pte = vmi_set_pte;
paravirt_ops.set_pte_at = vmi_set_pte_at; pv_mmu_ops.set_pte_at = vmi_set_pte_at;
paravirt_ops.set_pmd = vmi_set_pmd; pv_mmu_ops.set_pmd = vmi_set_pmd;
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
paravirt_ops.set_pte_present = vmi_set_pte_present; pv_mmu_ops.set_pte_present = vmi_set_pte_present;
paravirt_ops.set_pud = vmi_set_pud; pv_mmu_ops.set_pud = vmi_set_pud;
paravirt_ops.pte_clear = vmi_pte_clear; pv_mmu_ops.pte_clear = vmi_pte_clear;
paravirt_ops.pmd_clear = vmi_pmd_clear; pv_mmu_ops.pmd_clear = vmi_pmd_clear;
#endif #endif
} }
if (vmi_ops.update_pte) { if (vmi_ops.update_pte) {
paravirt_ops.pte_update = vmi_update_pte; pv_mmu_ops.pte_update = vmi_update_pte;
paravirt_ops.pte_update_defer = vmi_update_pte_defer; pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
} }
vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
if (vmi_ops.allocate_page) { if (vmi_ops.allocate_page) {
paravirt_ops.alloc_pt = vmi_allocate_pt; pv_mmu_ops.alloc_pt = vmi_allocate_pt;
paravirt_ops.alloc_pd = vmi_allocate_pd; pv_mmu_ops.alloc_pd = vmi_allocate_pd;
paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
} }
vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
if (vmi_ops.release_page) { if (vmi_ops.release_page) {
paravirt_ops.release_pt = vmi_release_pt; pv_mmu_ops.release_pt = vmi_release_pt;
paravirt_ops.release_pd = vmi_release_pd; pv_mmu_ops.release_pd = vmi_release_pd;
} }
/* Set linear is needed in all cases */ /* Set linear is needed in all cases */
vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
#ifdef CONFIG_HIGHPTE #ifdef CONFIG_HIGHPTE
if (vmi_ops.set_linear_mapping) if (vmi_ops.set_linear_mapping)
paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
#endif #endif
/* /*
...@@ -863,17 +863,17 @@ static inline int __init activate_vmi(void) ...@@ -863,17 +863,17 @@ static inline int __init activate_vmi(void)
* the backend. They are performance critical anyway, so requiring * the backend. They are performance critical anyway, so requiring
* a patch is not a big problem. * a patch is not a big problem.
*/ */
paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
paravirt_ops.iret = (void *)0xbadbab0; pv_cpu_ops.iret = (void *)0xbadbab0;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
#endif #endif
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
para_fill(apic_read, APICRead); para_fill(pv_apic_ops.apic_read, APICRead);
para_fill(apic_write, APICWrite); para_fill(pv_apic_ops.apic_write, APICWrite);
para_fill(apic_write_atomic, APICWrite); para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
#endif #endif
/* /*
...@@ -891,15 +891,15 @@ static inline int __init activate_vmi(void) ...@@ -891,15 +891,15 @@ static inline int __init activate_vmi(void)
vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
vmi_timer_ops.cancel_alarm = vmi_timer_ops.cancel_alarm =
vmi_get_function(VMI_CALL_CancelAlarm); vmi_get_function(VMI_CALL_CancelAlarm);
paravirt_ops.time_init = vmi_time_init; pv_time_ops.time_init = vmi_time_init;
paravirt_ops.get_wallclock = vmi_get_wallclock; pv_time_ops.get_wallclock = vmi_get_wallclock;
paravirt_ops.set_wallclock = vmi_set_wallclock; pv_time_ops.set_wallclock = vmi_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
paravirt_ops.setup_boot_clock = vmi_time_bsp_init; pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
paravirt_ops.setup_secondary_clock = vmi_time_ap_init; pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
#endif #endif
paravirt_ops.sched_clock = vmi_sched_clock; pv_time_ops.sched_clock = vmi_sched_clock;
paravirt_ops.get_cpu_khz = vmi_cpu_khz; pv_time_ops.get_cpu_khz = vmi_cpu_khz;
/* We have true wallclock functions; disable CMOS clock sync */ /* We have true wallclock functions; disable CMOS clock sync */
no_sync_cmos_clock = 1; no_sync_cmos_clock = 1;
...@@ -908,7 +908,7 @@ static inline int __init activate_vmi(void) ...@@ -908,7 +908,7 @@ static inline int __init activate_vmi(void)
disable_vmi_timer = 1; disable_vmi_timer = 1;
} }
para_fill(safe_halt, Halt); para_fill(pv_irq_ops.safe_halt, Halt);
/* /*
* Alternative instruction rewriting doesn't happen soon enough * Alternative instruction rewriting doesn't happen soon enough
......
...@@ -124,7 +124,7 @@ static void __init xen_vcpu_setup(int cpu) ...@@ -124,7 +124,7 @@ static void __init xen_vcpu_setup(int cpu)
static void __init xen_banner(void) static void __init xen_banner(void)
{ {
printk(KERN_INFO "Booting paravirtualized kernel on %s\n", printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
paravirt_ops.name); pv_info.name);
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
} }
...@@ -738,7 +738,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) ...@@ -738,7 +738,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
/* special set_pte for pagetable initialization */ /* special set_pte for pagetable initialization */
paravirt_ops.set_pte = xen_set_pte_init; pv_mmu_ops.set_pte = xen_set_pte_init;
init_mm.pgd = base; init_mm.pgd = base;
/* /*
...@@ -785,8 +785,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base) ...@@ -785,8 +785,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
{ {
/* This will work as long as patching hasn't happened yet /* This will work as long as patching hasn't happened yet
(which it hasn't) */ (which it hasn't) */
paravirt_ops.alloc_pt = xen_alloc_pt; pv_mmu_ops.alloc_pt = xen_alloc_pt;
paravirt_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pte = xen_set_pte;
if (!xen_feature(XENFEAT_auto_translated_physmap)) { if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* /*
...@@ -833,12 +833,12 @@ void __init xen_setup_vcpu_info_placement(void) ...@@ -833,12 +833,12 @@ void __init xen_setup_vcpu_info_placement(void)
if (have_vcpu_info_placement) { if (have_vcpu_info_placement) {
printk(KERN_INFO "Xen: using vcpu_info placement\n"); printk(KERN_INFO "Xen: using vcpu_info placement\n");
paravirt_ops.save_fl = xen_save_fl_direct; pv_irq_ops.save_fl = xen_save_fl_direct;
paravirt_ops.restore_fl = xen_restore_fl_direct; pv_irq_ops.restore_fl = xen_restore_fl_direct;
paravirt_ops.irq_disable = xen_irq_disable_direct; pv_irq_ops.irq_disable = xen_irq_disable_direct;
paravirt_ops.irq_enable = xen_irq_enable_direct; pv_irq_ops.irq_enable = xen_irq_enable_direct;
paravirt_ops.read_cr2 = xen_read_cr2_direct; pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
paravirt_ops.iret = xen_iret_direct; pv_cpu_ops.iret = xen_iret_direct;
} }
} }
...@@ -850,8 +850,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, ...@@ -850,8 +850,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
start = end = reloc = NULL; start = end = reloc = NULL;
#define SITE(x) \ #define SITE(op, x) \
case PARAVIRT_PATCH(x): \ case PARAVIRT_PATCH(op.x): \
if (have_vcpu_info_placement) { \ if (have_vcpu_info_placement) { \
start = (char *)xen_##x##_direct; \ start = (char *)xen_##x##_direct; \
end = xen_##x##_direct_end; \ end = xen_##x##_direct_end; \
...@@ -860,10 +860,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, ...@@ -860,10 +860,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
goto patch_site goto patch_site
switch (type) { switch (type) {
SITE(irq_enable); SITE(pv_irq_ops, irq_enable);
SITE(irq_disable); SITE(pv_irq_ops, irq_disable);
SITE(save_fl); SITE(pv_irq_ops, save_fl);
SITE(restore_fl); SITE(pv_irq_ops, restore_fl);
#undef SITE #undef SITE
patch_site: patch_site:
...@@ -895,26 +895,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, ...@@ -895,26 +895,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
return ret; return ret;
} }
static const struct paravirt_ops xen_paravirt_ops __initdata = { static const struct pv_info xen_info __initdata = {
.paravirt_enabled = 1, .paravirt_enabled = 1,
.shared_kernel_pmd = 0, .shared_kernel_pmd = 0,
.name = "Xen", .name = "Xen",
.banner = xen_banner, };
static const struct pv_init_ops xen_init_ops __initdata = {
.patch = xen_patch, .patch = xen_patch,
.banner = xen_banner,
.memory_setup = xen_memory_setup, .memory_setup = xen_memory_setup,
.arch_setup = xen_arch_setup, .arch_setup = xen_arch_setup,
.init_IRQ = xen_init_IRQ,
.post_allocator_init = xen_mark_init_mm_pinned, .post_allocator_init = xen_mark_init_mm_pinned,
};
static const struct pv_time_ops xen_time_ops __initdata = {
.time_init = xen_time_init, .time_init = xen_time_init,
.set_wallclock = xen_set_wallclock, .set_wallclock = xen_set_wallclock,
.get_wallclock = xen_get_wallclock, .get_wallclock = xen_get_wallclock,
.get_cpu_khz = xen_cpu_khz, .get_cpu_khz = xen_cpu_khz,
.sched_clock = xen_sched_clock, .sched_clock = xen_sched_clock,
};
static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.cpuid = xen_cpuid, .cpuid = xen_cpuid,
.set_debugreg = xen_set_debugreg, .set_debugreg = xen_set_debugreg,
...@@ -925,22 +931,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { ...@@ -925,22 +931,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
.read_cr0 = native_read_cr0, .read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0, .write_cr0 = native_write_cr0,
.read_cr2 = xen_read_cr2,
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
.write_cr3 = xen_write_cr3,
.read_cr4 = native_read_cr4, .read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe, .read_cr4_safe = native_read_cr4_safe,
.write_cr4 = xen_write_cr4, .write_cr4 = xen_write_cr4,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
.wbinvd = native_wbinvd, .wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe, .read_msr = native_read_msr_safe,
...@@ -968,7 +962,19 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { ...@@ -968,7 +962,19 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
.set_iopl_mask = xen_set_iopl_mask, .set_iopl_mask = xen_set_iopl_mask,
.io_delay = xen_io_delay, .io_delay = xen_io_delay,
};
static const struct pv_irq_ops xen_irq_ops __initdata = {
.init_IRQ = xen_init_IRQ,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
};
static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
.apic_write = xen_apic_write, .apic_write = xen_apic_write,
.apic_write_atomic = xen_apic_write, .apic_write_atomic = xen_apic_write,
...@@ -977,6 +983,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { ...@@ -977,6 +983,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
.setup_secondary_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop,
.startup_ipi_hook = paravirt_nop, .startup_ipi_hook = paravirt_nop,
#endif #endif
};
static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.pagetable_setup_start = xen_pagetable_setup_start,
.pagetable_setup_done = xen_pagetable_setup_done,
.read_cr2 = xen_read_cr2,
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
.write_cr3 = xen_write_cr3,
.flush_tlb_user = xen_flush_tlb, .flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb,
...@@ -986,9 +1003,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { ...@@ -986,9 +1003,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
.pte_update = paravirt_nop, .pte_update = paravirt_nop,
.pte_update_defer = paravirt_nop, .pte_update_defer = paravirt_nop,
.pagetable_setup_start = xen_pagetable_setup_start,
.pagetable_setup_done = xen_pagetable_setup_done,
.alloc_pt = xen_alloc_pt_init, .alloc_pt = xen_alloc_pt_init,
.release_pt = xen_release_pt, .release_pt = xen_release_pt,
.alloc_pd = paravirt_nop, .alloc_pd = paravirt_nop,
...@@ -1023,7 +1037,9 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { ...@@ -1023,7 +1037,9 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
.activate_mm = xen_activate_mm, .activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap, .dup_mmap = xen_dup_mmap,
.exit_mmap = xen_exit_mmap, .exit_mmap = xen_exit_mmap,
};
static const struct pv_misc_ops xen_misc_ops __initdata = {
.set_lazy_mode = xen_set_lazy_mode, .set_lazy_mode = xen_set_lazy_mode,
}; };
...@@ -1091,7 +1107,15 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1091,7 +1107,15 @@ asmlinkage void __init xen_start_kernel(void)
BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
/* Install Xen paravirt ops */ /* Install Xen paravirt ops */
paravirt_ops = xen_paravirt_ops; pv_info = xen_info;
pv_init_ops = xen_init_ops;
pv_time_ops = xen_time_ops;
pv_cpu_ops = xen_cpu_ops;
pv_irq_ops = xen_irq_ops;
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
pv_misc_ops = xen_misc_ops;
machine_ops = xen_machine_ops; machine_ops = xen_machine_ops;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -1124,9 +1148,9 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1124,9 +1148,9 @@ asmlinkage void __init xen_start_kernel(void)
xen_setup_vcpu_info_placement(); xen_setup_vcpu_info_placement();
#endif #endif
paravirt_ops.kernel_rpl = 1; pv_info.kernel_rpl = 1;
if (xen_feature(XENFEAT_supervisor_mode_kernel)) if (xen_feature(XENFEAT_supervisor_mode_kernel))
paravirt_ops.kernel_rpl = 0; pv_info.kernel_rpl = 0;
/* set the limit of our address space */ /* set the limit of our address space */
reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
......
...@@ -115,7 +115,7 @@ static struct hv_ops lguest_cons = { ...@@ -115,7 +115,7 @@ static struct hv_ops lguest_cons = {
* (0), and the struct hv_ops containing the put_chars() function. */ * (0), and the struct hv_ops containing the put_chars() function. */
static int __init cons_init(void) static int __init cons_init(void)
{ {
if (strcmp(paravirt_ops.name, "lguest") != 0) if (strcmp(pv_info.name, "lguest") != 0)
return 0; return 0;
return hvc_instantiate(0, 0, &lguest_cons); return hvc_instantiate(0, 0, &lguest_cons);
......
...@@ -248,8 +248,8 @@ static void unmap_switcher(void) ...@@ -248,8 +248,8 @@ static void unmap_switcher(void)
} }
/*H:130 Our Guest is usually so well behaved; it never tries to do things it /*H:130 Our Guest is usually so well behaved; it never tries to do things it
* isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
* complete, because it doesn't contain replacements for the Intel I/O * quite complete, because it doesn't contain replacements for the Intel I/O
* instructions. As a result, the Guest sometimes fumbles across one during * instructions. As a result, the Guest sometimes fumbles across one during
* the boot process as it probes for various things which are usually attached * the boot process as it probes for various things which are usually attached
* to a PC. * to a PC.
...@@ -694,7 +694,7 @@ static int __init init(void) ...@@ -694,7 +694,7 @@ static int __init init(void)
/* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
if (paravirt_enabled()) { if (paravirt_enabled()) {
printk("lguest is afraid of %s\n", paravirt_ops.name); printk("lguest is afraid of %s\n", pv_info.name);
return -EPERM; return -EPERM;
} }
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
* *
* So how does the kernel know it's a Guest? The Guest starts at a special * So how does the kernel know it's a Guest? The Guest starts at a special
* entry point marked with a magic string, which sets up a few things then * entry point marked with a magic string, which sets up a few things then
* calls here. We replace the native functions in "struct paravirt_ops" * calls here. We replace the native functions various "paravirt" structures
* with our Guest versions, then boot like normal. :*/ * with our Guest versions, then boot like normal. :*/
/* /*
...@@ -331,7 +331,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) ...@@ -331,7 +331,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
} }
/*G:038 That's enough excitement for now, back to ploughing through each of /*G:038 That's enough excitement for now, back to ploughing through each of
* the paravirt_ops (we're about 1/3 of the way through). * the different pv_ops structures (we're about 1/3 of the way through).
* *
* This is the Local Descriptor Table, another weird Intel thingy. Linux only * This is the Local Descriptor Table, another weird Intel thingy. Linux only
* uses this for some strange applications like Wine. We don't do anything * uses this for some strange applications like Wine. We don't do anything
...@@ -558,7 +558,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) ...@@ -558,7 +558,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
} }
/* Unfortunately for Lguest, the paravirt_ops for page tables were based on /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
* native page table operations. On native hardware you can set a new page * native page table operations. On native hardware you can set a new page
* table entry whenever you want, but if you want to remove one you have to do * table entry whenever you want, but if you want to remove one you have to do
* a TLB flush (a TLB is a little cache of page table entries kept by the CPU). * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
...@@ -782,7 +782,7 @@ static void lguest_time_init(void) ...@@ -782,7 +782,7 @@ static void lguest_time_init(void)
clocksource_register(&lguest_clock); clocksource_register(&lguest_clock);
/* Now we've set up our clock, we can use it as the scheduler clock */ /* Now we've set up our clock, we can use it as the scheduler clock */
paravirt_ops.sched_clock = lguest_sched_clock; pv_time_ops.sched_clock = lguest_sched_clock;
/* We can't set cpumask in the initializer: damn C limitations! Set it /* We can't set cpumask in the initializer: damn C limitations! Set it
* here and register our timer device. */ * here and register our timer device. */
...@@ -902,7 +902,7 @@ static __init char *lguest_memory_setup(void) ...@@ -902,7 +902,7 @@ static __init char *lguest_memory_setup(void)
/*G:050 /*G:050
* Patching (Powerfully Placating Performance Pedants) * Patching (Powerfully Placating Performance Pedants)
* *
* We have already seen that "struct paravirt_ops" lets us replace simple * We have already seen that pv_ops structures let us replace simple
* native instructions with calls to the appropriate back end all throughout * native instructions with calls to the appropriate back end all throughout
* the kernel. This allows the same kernel to run as a Guest and as a native * the kernel. This allows the same kernel to run as a Guest and as a native
* kernel, but it's slow because of all the indirect branches. * kernel, but it's slow because of all the indirect branches.
...@@ -927,10 +927,10 @@ static const struct lguest_insns ...@@ -927,10 +927,10 @@ static const struct lguest_insns
{ {
const char *start, *end; const char *start, *end;
} lguest_insns[] = { } lguest_insns[] = {
[PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
[PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
[PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
[PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
}; };
/* Now our patch routine is fairly simple (based on the native one in /* Now our patch routine is fairly simple (based on the native one in
...@@ -957,9 +957,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, ...@@ -957,9 +957,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
return insn_len; return insn_len;
} }
/*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
* structure in the kernel provides a single point for (almost) every routine * structures in the kernel provide points for (almost) every routine we have
* we have to override to avoid privileged instructions. */ * to override to avoid privileged instructions. */
__init void lguest_init(void *boot) __init void lguest_init(void *boot)
{ {
/* Copy boot parameters first: the Launcher put the physical location /* Copy boot parameters first: the Launcher put the physical location
...@@ -974,54 +974,68 @@ __init void lguest_init(void *boot) ...@@ -974,54 +974,68 @@ __init void lguest_init(void *boot)
/* We're under lguest, paravirt is enabled, and we're running at /* We're under lguest, paravirt is enabled, and we're running at
* privilege level 1, not 0 as normal. */ * privilege level 1, not 0 as normal. */
paravirt_ops.name = "lguest"; pv_info.name = "lguest";
paravirt_ops.paravirt_enabled = 1; pv_info.paravirt_enabled = 1;
paravirt_ops.kernel_rpl = 1; pv_info.kernel_rpl = 1;
/* We set up all the lguest overrides for sensitive operations. These /* We set up all the lguest overrides for sensitive operations. These
* are detailed with the operations themselves. */ * are detailed with the operations themselves. */
paravirt_ops.save_fl = save_fl;
paravirt_ops.restore_fl = restore_fl; /* interrupt-related operations */
paravirt_ops.irq_disable = irq_disable; pv_irq_ops.init_IRQ = lguest_init_IRQ;
paravirt_ops.irq_enable = irq_enable; pv_irq_ops.save_fl = save_fl;
paravirt_ops.load_gdt = lguest_load_gdt; pv_irq_ops.restore_fl = restore_fl;
paravirt_ops.memory_setup = lguest_memory_setup; pv_irq_ops.irq_disable = irq_disable;
paravirt_ops.cpuid = lguest_cpuid; pv_irq_ops.irq_enable = irq_enable;
paravirt_ops.write_cr3 = lguest_write_cr3; pv_irq_ops.safe_halt = lguest_safe_halt;
paravirt_ops.flush_tlb_user = lguest_flush_tlb_user;
paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; /* init-time operations */
paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; pv_init_ops.memory_setup = lguest_memory_setup;
paravirt_ops.set_pte = lguest_set_pte; pv_init_ops.patch = lguest_patch;
paravirt_ops.set_pte_at = lguest_set_pte_at;
paravirt_ops.set_pmd = lguest_set_pmd; /* Intercepts of various cpu instructions */
pv_cpu_ops.load_gdt = lguest_load_gdt;
pv_cpu_ops.cpuid = lguest_cpuid;
pv_cpu_ops.load_idt = lguest_load_idt;
pv_cpu_ops.iret = lguest_iret;
pv_cpu_ops.load_esp0 = lguest_load_esp0;
pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
pv_cpu_ops.set_ldt = lguest_set_ldt;
pv_cpu_ops.load_tls = lguest_load_tls;
pv_cpu_ops.set_debugreg = lguest_set_debugreg;
pv_cpu_ops.clts = lguest_clts;
pv_cpu_ops.read_cr0 = lguest_read_cr0;
pv_cpu_ops.write_cr0 = lguest_write_cr0;
pv_cpu_ops.read_cr4 = lguest_read_cr4;
pv_cpu_ops.write_cr4 = lguest_write_cr4;
pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
pv_cpu_ops.wbinvd = lguest_wbinvd;
/* pagetable management */
pv_mmu_ops.write_cr3 = lguest_write_cr3;
pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
pv_mmu_ops.set_pte = lguest_set_pte;
pv_mmu_ops.set_pte_at = lguest_set_pte_at;
pv_mmu_ops.set_pmd = lguest_set_pmd;
pv_mmu_ops.read_cr2 = lguest_read_cr2;
pv_mmu_ops.read_cr3 = lguest_read_cr3;
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
paravirt_ops.apic_write = lguest_apic_write; /* apic read/write intercepts */
paravirt_ops.apic_write_atomic = lguest_apic_write; pv_apic_ops.apic_write = lguest_apic_write;
paravirt_ops.apic_read = lguest_apic_read; pv_apic_ops.apic_write_atomic = lguest_apic_write;
pv_apic_ops.apic_read = lguest_apic_read;
#endif #endif
paravirt_ops.load_idt = lguest_load_idt;
paravirt_ops.iret = lguest_iret; /* time operations */
paravirt_ops.load_esp0 = lguest_load_esp0; pv_time_ops.get_wallclock = lguest_get_wallclock;
paravirt_ops.load_tr_desc = lguest_load_tr_desc; pv_time_ops.time_init = lguest_time_init;
paravirt_ops.set_ldt = lguest_set_ldt;
paravirt_ops.load_tls = lguest_load_tls; pv_misc_ops.set_lazy_mode = lguest_lazy_mode;
paravirt_ops.set_debugreg = lguest_set_debugreg;
paravirt_ops.clts = lguest_clts;
paravirt_ops.read_cr0 = lguest_read_cr0;
paravirt_ops.write_cr0 = lguest_write_cr0;
paravirt_ops.init_IRQ = lguest_init_IRQ;
paravirt_ops.read_cr2 = lguest_read_cr2;
paravirt_ops.read_cr3 = lguest_read_cr3;
paravirt_ops.read_cr4 = lguest_read_cr4;
paravirt_ops.write_cr4 = lguest_write_cr4;
paravirt_ops.write_gdt_entry = lguest_write_gdt_entry;
paravirt_ops.write_idt_entry = lguest_write_idt_entry;
paravirt_ops.patch = lguest_patch;
paravirt_ops.safe_halt = lguest_safe_halt;
paravirt_ops.get_wallclock = lguest_get_wallclock;
paravirt_ops.time_init = lguest_time_init;
paravirt_ops.set_lazy_mode = lguest_lazy_mode;
paravirt_ops.wbinvd = lguest_wbinvd;
/* Now is a good time to look at the implementations of these functions /* Now is a good time to look at the implementations of these functions
* before returning to the rest of lguest_init(). */ * before returning to the rest of lguest_init(). */
......
...@@ -201,7 +201,7 @@ static void scan_devices(void) ...@@ -201,7 +201,7 @@ static void scan_devices(void)
* "struct lguest_device_desc" array. */ * "struct lguest_device_desc" array. */
static int __init lguest_bus_init(void) static int __init lguest_bus_init(void)
{ {
if (strcmp(paravirt_ops.name, "lguest") != 0) if (strcmp(pv_info.name, "lguest") != 0)
return 0; return 0;
/* Devices are in a single page above top of "normal" mem */ /* Devices are in a single page above top of "normal" mem */
......
...@@ -33,19 +33,23 @@ enum paravirt_lazy_mode { ...@@ -33,19 +33,23 @@ enum paravirt_lazy_mode {
PARAVIRT_LAZY_FLUSH = 3, PARAVIRT_LAZY_FLUSH = 3,
}; };
struct paravirt_ops
{ /* general info */
struct pv_info {
unsigned int kernel_rpl; unsigned int kernel_rpl;
int shared_kernel_pmd; int shared_kernel_pmd;
int paravirt_enabled; int paravirt_enabled;
const char *name; const char *name;
};
struct pv_init_ops {
/* /*
* Patch may replace one of the defined code sequences with arbitrary * Patch may replace one of the defined code sequences with
* code, subject to the same register constraints. This generally * arbitrary code, subject to the same register constraints.
* means the code is not free to clobber any registers other than EAX. * This generally means the code is not free to clobber any
* The patch function should return the number of bytes of code * registers other than EAX. The patch function should return
* generated, as we nop pad the rest in generic code. * the number of bytes of code generated, as we nop pad the
* rest in generic code.
*/ */
unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
unsigned long addr, unsigned len); unsigned long addr, unsigned len);
...@@ -55,29 +59,28 @@ struct paravirt_ops ...@@ -55,29 +59,28 @@ struct paravirt_ops
char *(*memory_setup)(void); char *(*memory_setup)(void);
void (*post_allocator_init)(void); void (*post_allocator_init)(void);
void (*init_IRQ)(void);
void (*time_init)(void);
/*
* Called before/after init_mm pagetable setup. setup_start
* may reset %cr3, and may pre-install parts of the pagetable;
* pagetable setup is expected to preserve any existing
* mapping.
*/
void (*pagetable_setup_start)(pgd_t *pgd_base);
void (*pagetable_setup_done)(pgd_t *pgd_base);
/* Print a banner to identify the environment */ /* Print a banner to identify the environment */
void (*banner)(void); void (*banner)(void);
};
struct pv_misc_ops {
/* Set deferred update mode, used for batching operations. */
void (*set_lazy_mode)(enum paravirt_lazy_mode mode);
};
struct pv_time_ops {
void (*time_init)(void);
/* Set and set time of day */ /* Set and set time of day */
unsigned long (*get_wallclock)(void); unsigned long (*get_wallclock)(void);
int (*set_wallclock)(unsigned long); int (*set_wallclock)(unsigned long);
/* cpuid emulation, mostly so that caps bits can be disabled */ unsigned long long (*sched_clock)(void);
void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned long (*get_cpu_khz)(void);
unsigned int *ecx, unsigned int *edx); };
struct pv_cpu_ops {
/* hooks for various privileged instructions */ /* hooks for various privileged instructions */
unsigned long (*get_debugreg)(int regno); unsigned long (*get_debugreg)(int regno);
void (*set_debugreg)(int regno, unsigned long value); void (*set_debugreg)(int regno, unsigned long value);
...@@ -87,41 +90,10 @@ struct paravirt_ops ...@@ -87,41 +90,10 @@ struct paravirt_ops
unsigned long (*read_cr0)(void); unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long); void (*write_cr0)(unsigned long);
unsigned long (*read_cr2)(void);
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
void (*write_cr3)(unsigned long);
unsigned long (*read_cr4_safe)(void); unsigned long (*read_cr4_safe)(void);
unsigned long (*read_cr4)(void); unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long); void (*write_cr4)(unsigned long);
/*
* Get/set interrupt state. save_fl and restore_fl are only
* expected to use X86_EFLAGS_IF; all other bits
* returned from save_fl are undefined, and may be ignored by
* restore_fl.
*/
unsigned long (*save_fl)(void);
void (*restore_fl)(unsigned long);
void (*irq_disable)(void);
void (*irq_enable)(void);
void (*safe_halt)(void);
void (*halt)(void);
void (*wbinvd)(void);
/* MSR, PMC and TSR operations.
err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
u64 (*read_msr)(unsigned int msr, int *err);
int (*write_msr)(unsigned int msr, u64 val);
u64 (*read_tsc)(void);
u64 (*read_pmc)(void);
unsigned long long (*sched_clock)(void);
unsigned long (*get_cpu_khz)(void);
/* Segment descriptor handling */ /* Segment descriptor handling */
void (*load_tr_desc)(void); void (*load_tr_desc)(void);
void (*load_gdt)(const struct Xgt_desc_struct *); void (*load_gdt)(const struct Xgt_desc_struct *);
...@@ -140,18 +112,45 @@ struct paravirt_ops ...@@ -140,18 +112,45 @@ struct paravirt_ops
void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
void (*set_iopl_mask)(unsigned mask); void (*set_iopl_mask)(unsigned mask);
void (*wbinvd)(void);
void (*io_delay)(void); void (*io_delay)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
/* MSR, PMC and TSR operations.
err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
u64 (*read_msr)(unsigned int msr, int *err);
int (*write_msr)(unsigned int msr, u64 val);
u64 (*read_tsc)(void);
u64 (*read_pmc)(void);
/* These two are jmp to, not actually called. */
void (*irq_enable_sysexit)(void);
void (*iret)(void);
};
struct pv_irq_ops {
void (*init_IRQ)(void);
/* /*
* Hooks for intercepting the creation/use/destruction of an * Get/set interrupt state. save_fl and restore_fl are only
* mm_struct. * expected to use X86_EFLAGS_IF; all other bits
* returned from save_fl are undefined, and may be ignored by
* restore_fl.
*/ */
void (*activate_mm)(struct mm_struct *prev, unsigned long (*save_fl)(void);
struct mm_struct *next); void (*restore_fl)(unsigned long);
void (*dup_mmap)(struct mm_struct *oldmm, void (*irq_disable)(void);
struct mm_struct *mm); void (*irq_enable)(void);
void (*exit_mmap)(struct mm_struct *mm); void (*safe_halt)(void);
void (*halt)(void);
};
struct pv_apic_ops {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/* /*
* Direct APIC operations, principally for VMI. Ideally * Direct APIC operations, principally for VMI. Ideally
...@@ -167,6 +166,34 @@ struct paravirt_ops ...@@ -167,6 +166,34 @@ struct paravirt_ops
unsigned long start_eip, unsigned long start_eip,
unsigned long start_esp); unsigned long start_esp);
#endif #endif
};
struct pv_mmu_ops {
/*
* Called before/after init_mm pagetable setup. setup_start
* may reset %cr3, and may pre-install parts of the pagetable;
* pagetable setup is expected to preserve any existing
* mapping.
*/
void (*pagetable_setup_start)(pgd_t *pgd_base);
void (*pagetable_setup_done)(pgd_t *pgd_base);
unsigned long (*read_cr2)(void);
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
void (*write_cr3)(unsigned long);
/*
* Hooks for intercepting the creation/use/destruction of an
* mm_struct.
*/
void (*activate_mm)(struct mm_struct *prev,
struct mm_struct *next);
void (*dup_mmap)(struct mm_struct *oldmm,
struct mm_struct *mm);
void (*exit_mmap)(struct mm_struct *mm);
/* TLB operations */ /* TLB operations */
void (*flush_tlb_user)(void); void (*flush_tlb_user)(void);
...@@ -191,15 +218,12 @@ struct paravirt_ops ...@@ -191,15 +218,12 @@ struct paravirt_ops
void (*pte_update_defer)(struct mm_struct *mm, void (*pte_update_defer)(struct mm_struct *mm,
unsigned long addr, pte_t *ptep); unsigned long addr, pte_t *ptep);
#ifdef CONFIG_HIGHPTE
void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
#endif
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
void (*set_pud)(pud_t *pudp, pud_t pudval); void (*set_pud)(pud_t *pudp, pud_t pudval);
void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void (*pmd_clear)(pmd_t *pmdp); void (*pmd_clear)(pmd_t *pmdp);
unsigned long long (*pte_val)(pte_t); unsigned long long (*pte_val)(pte_t);
...@@ -217,21 +241,40 @@ struct paravirt_ops ...@@ -217,21 +241,40 @@ struct paravirt_ops
pgd_t (*make_pgd)(unsigned long pgd); pgd_t (*make_pgd)(unsigned long pgd);
#endif #endif
/* Set deferred update mode, used for batching operations. */ #ifdef CONFIG_HIGHPTE
void (*set_lazy_mode)(enum paravirt_lazy_mode mode); void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
#endif
};
/* These two are jmp to, not actually called. */ /* This contains all the paravirt structures: we get a convenient
void (*irq_enable_sysexit)(void); * number for each function using the offset which we use to indicate
void (*iret)(void); * what to patch. */
struct paravirt_patch_template
{
struct pv_init_ops pv_init_ops;
struct pv_misc_ops pv_misc_ops;
struct pv_time_ops pv_time_ops;
struct pv_cpu_ops pv_cpu_ops;
struct pv_irq_ops pv_irq_ops;
struct pv_apic_ops pv_apic_ops;
struct pv_mmu_ops pv_mmu_ops;
}; };
extern struct paravirt_ops paravirt_ops; extern struct pv_info pv_info;
extern struct pv_init_ops pv_init_ops;
extern struct pv_misc_ops pv_misc_ops;
extern struct pv_time_ops pv_time_ops;
extern struct pv_cpu_ops pv_cpu_ops;
extern struct pv_irq_ops pv_irq_ops;
extern struct pv_apic_ops pv_apic_ops;
extern struct pv_mmu_ops pv_mmu_ops;
#define PARAVIRT_PATCH(x) \ #define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_ops, x) / sizeof(void *)) (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
#define paravirt_type(type) \ #define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(type)) [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
[paravirt_opptr] "m" (op)
#define paravirt_clobber(clobber) \ #define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber) [paravirt_clobber] "i" (clobber)
...@@ -258,7 +301,7 @@ unsigned paravirt_patch_call(void *insnbuf, ...@@ -258,7 +301,7 @@ unsigned paravirt_patch_call(void *insnbuf,
const void *target, u16 tgt_clobbers, const void *target, u16 tgt_clobbers,
unsigned long addr, u16 site_clobbers, unsigned long addr, u16 site_clobbers,
unsigned len); unsigned len);
unsigned paravirt_patch_jmp(const void *target, void *insnbuf, unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
unsigned long addr, unsigned len); unsigned long addr, unsigned len);
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
unsigned long addr, unsigned len); unsigned long addr, unsigned len);
...@@ -271,14 +314,14 @@ int paravirt_disable_iospace(void); ...@@ -271,14 +314,14 @@ int paravirt_disable_iospace(void);
/* /*
* This generates an indirect call based on the operation type number. * This generates an indirect call based on the operation type number.
* The type number, computed in PARAVIRT_PATCH, is derived from the * The type number, computed in PARAVIRT_PATCH, is derived from the
* offset into the paravirt_ops structure, and can therefore be freely * offset into the paravirt_patch_template structure, and can therefore be
* converted back into a structure offset. * freely converted back into a structure offset.
*/ */
#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" #define PARAVIRT_CALL "call *%[paravirt_opptr];"
/* /*
* These macros are intended to wrap calls into a paravirt_ops * These macros are intended to wrap calls through one of the paravirt
* operation, so that they can be later identified and patched at * ops structs, so that they can be later identified and patched at
* runtime. * runtime.
* *
* Normally, a call to a pv_op function is a simple indirect call: * Normally, a call to a pv_op function is a simple indirect call:
...@@ -301,7 +344,7 @@ int paravirt_disable_iospace(void); ...@@ -301,7 +344,7 @@ int paravirt_disable_iospace(void);
* The call instruction itself is marked by placing its start address * The call instruction itself is marked by placing its start address
* and size into the .parainstructions section, so that * and size into the .parainstructions section, so that
* apply_paravirt() in arch/i386/kernel/alternative.c can do the * apply_paravirt() in arch/i386/kernel/alternative.c can do the
* appropriate patching under the control of the backend paravirt_ops * appropriate patching under the control of the backend pv_init_ops
* implementation. * implementation.
* *
* Unfortunately there's no way to get gcc to generate the args setup * Unfortunately there's no way to get gcc to generate the args setup
...@@ -409,36 +452,36 @@ int paravirt_disable_iospace(void); ...@@ -409,36 +452,36 @@ int paravirt_disable_iospace(void);
static inline int paravirt_enabled(void) static inline int paravirt_enabled(void)
{ {
return paravirt_ops.paravirt_enabled; return pv_info.paravirt_enabled;
} }
static inline void load_esp0(struct tss_struct *tss, static inline void load_esp0(struct tss_struct *tss,
struct thread_struct *thread) struct thread_struct *thread)
{ {
PVOP_VCALL2(load_esp0, tss, thread); PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
} }
#define ARCH_SETUP paravirt_ops.arch_setup(); #define ARCH_SETUP pv_init_ops.arch_setup();
static inline unsigned long get_wallclock(void) static inline unsigned long get_wallclock(void)
{ {
return PVOP_CALL0(unsigned long, get_wallclock); return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
} }
static inline int set_wallclock(unsigned long nowtime) static inline int set_wallclock(unsigned long nowtime)
{ {
return PVOP_CALL1(int, set_wallclock, nowtime); return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
} }
static inline void (*choose_time_init(void))(void) static inline void (*choose_time_init(void))(void)
{ {
return paravirt_ops.time_init; return pv_time_ops.time_init;
} }
/* The paravirtualized CPUID instruction. */ /* The paravirtualized CPUID instruction. */
static inline void __cpuid(unsigned int *eax, unsigned int *ebx, static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx) unsigned int *ecx, unsigned int *edx)
{ {
PVOP_VCALL4(cpuid, eax, ebx, ecx, edx); PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
} }
/* /*
...@@ -446,87 +489,87 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, ...@@ -446,87 +489,87 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
*/ */
static inline unsigned long paravirt_get_debugreg(int reg) static inline unsigned long paravirt_get_debugreg(int reg)
{ {
return PVOP_CALL1(unsigned long, get_debugreg, reg); return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
} }
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
static inline void set_debugreg(unsigned long val, int reg) static inline void set_debugreg(unsigned long val, int reg)
{ {
PVOP_VCALL2(set_debugreg, reg, val); PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
} }
static inline void clts(void) static inline void clts(void)
{ {
PVOP_VCALL0(clts); PVOP_VCALL0(pv_cpu_ops.clts);
} }
static inline unsigned long read_cr0(void) static inline unsigned long read_cr0(void)
{ {
return PVOP_CALL0(unsigned long, read_cr0); return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
} }
static inline void write_cr0(unsigned long x) static inline void write_cr0(unsigned long x)
{ {
PVOP_VCALL1(write_cr0, x); PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
} }
static inline unsigned long read_cr2(void) static inline unsigned long read_cr2(void)
{ {
return PVOP_CALL0(unsigned long, read_cr2); return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
} }
static inline void write_cr2(unsigned long x) static inline void write_cr2(unsigned long x)
{ {
PVOP_VCALL1(write_cr2, x); PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
} }
static inline unsigned long read_cr3(void) static inline unsigned long read_cr3(void)
{ {
return PVOP_CALL0(unsigned long, read_cr3); return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
} }
static inline void write_cr3(unsigned long x) static inline void write_cr3(unsigned long x)
{ {
PVOP_VCALL1(write_cr3, x); PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
} }
static inline unsigned long read_cr4(void) static inline unsigned long read_cr4(void)
{ {
return PVOP_CALL0(unsigned long, read_cr4); return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
} }
static inline unsigned long read_cr4_safe(void) static inline unsigned long read_cr4_safe(void)
{ {
return PVOP_CALL0(unsigned long, read_cr4_safe); return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
} }
static inline void write_cr4(unsigned long x) static inline void write_cr4(unsigned long x)
{ {
PVOP_VCALL1(write_cr4, x); PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
} }
static inline void raw_safe_halt(void) static inline void raw_safe_halt(void)
{ {
PVOP_VCALL0(safe_halt); PVOP_VCALL0(pv_irq_ops.safe_halt);
} }
static inline void halt(void) static inline void halt(void)
{ {
PVOP_VCALL0(safe_halt); PVOP_VCALL0(pv_irq_ops.safe_halt);
} }
static inline void wbinvd(void) static inline void wbinvd(void)
{ {
PVOP_VCALL0(wbinvd); PVOP_VCALL0(pv_cpu_ops.wbinvd);
} }
#define get_kernel_rpl() (paravirt_ops.kernel_rpl) #define get_kernel_rpl() (pv_info.kernel_rpl)
static inline u64 paravirt_read_msr(unsigned msr, int *err) static inline u64 paravirt_read_msr(unsigned msr, int *err)
{ {
return PVOP_CALL2(u64, read_msr, msr, err); return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
} }
static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
{ {
return PVOP_CALL3(int, write_msr, msr, low, high); return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
} }
/* These should all do BUG_ON(_err), but our headers are too tangled. */ /* These should all do BUG_ON(_err), but our headers are too tangled. */
...@@ -560,7 +603,7 @@ static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) ...@@ -560,7 +603,7 @@ static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
static inline u64 paravirt_read_tsc(void) static inline u64 paravirt_read_tsc(void)
{ {
return PVOP_CALL0(u64, read_tsc); return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
} }
#define rdtscl(low) do { \ #define rdtscl(low) do { \
...@@ -572,15 +615,15 @@ static inline u64 paravirt_read_tsc(void) ...@@ -572,15 +615,15 @@ static inline u64 paravirt_read_tsc(void)
static inline unsigned long long paravirt_sched_clock(void) static inline unsigned long long paravirt_sched_clock(void)
{ {
return PVOP_CALL0(unsigned long long, sched_clock); return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
} }
#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
static inline unsigned long long paravirt_read_pmc(int counter) static inline unsigned long long paravirt_read_pmc(int counter)
{ {
return PVOP_CALL1(u64, read_pmc, counter); return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
} }
#define rdpmc(counter,low,high) do { \ #define rdpmc(counter,low,high) do { \
...@@ -591,61 +634,61 @@ static inline unsigned long long paravirt_read_pmc(int counter) ...@@ -591,61 +634,61 @@ static inline unsigned long long paravirt_read_pmc(int counter)
static inline void load_TR_desc(void) static inline void load_TR_desc(void)
{ {
PVOP_VCALL0(load_tr_desc); PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
} }
static inline void load_gdt(const struct Xgt_desc_struct *dtr) static inline void load_gdt(const struct Xgt_desc_struct *dtr)
{ {
PVOP_VCALL1(load_gdt, dtr); PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
} }
static inline void load_idt(const struct Xgt_desc_struct *dtr) static inline void load_idt(const struct Xgt_desc_struct *dtr)
{ {
PVOP_VCALL1(load_idt, dtr); PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
} }
static inline void set_ldt(const void *addr, unsigned entries) static inline void set_ldt(const void *addr, unsigned entries)
{ {
PVOP_VCALL2(set_ldt, addr, entries); PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
} }
static inline void store_gdt(struct Xgt_desc_struct *dtr) static inline void store_gdt(struct Xgt_desc_struct *dtr)
{ {
PVOP_VCALL1(store_gdt, dtr); PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
} }
static inline void store_idt(struct Xgt_desc_struct *dtr) static inline void store_idt(struct Xgt_desc_struct *dtr)
{ {
PVOP_VCALL1(store_idt, dtr); PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
} }
static inline unsigned long paravirt_store_tr(void) static inline unsigned long paravirt_store_tr(void)
{ {
return PVOP_CALL0(unsigned long, store_tr); return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
} }
#define store_tr(tr) ((tr) = paravirt_store_tr()) #define store_tr(tr) ((tr) = paravirt_store_tr())
static inline void load_TLS(struct thread_struct *t, unsigned cpu) static inline void load_TLS(struct thread_struct *t, unsigned cpu)
{ {
PVOP_VCALL2(load_tls, t, cpu); PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
} }
static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
{ {
PVOP_VCALL4(write_ldt_entry, dt, entry, low, high); PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
} }
static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
{ {
PVOP_VCALL4(write_gdt_entry, dt, entry, low, high); PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
} }
static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
{ {
PVOP_VCALL4(write_idt_entry, dt, entry, low, high); PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high);
} }
static inline void set_iopl_mask(unsigned mask) static inline void set_iopl_mask(unsigned mask)
{ {
PVOP_VCALL1(set_iopl_mask, mask); PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
} }
/* The paravirtualized I/O functions */ /* The paravirtualized I/O functions */
static inline void slow_down_io(void) { static inline void slow_down_io(void) {
paravirt_ops.io_delay(); pv_cpu_ops.io_delay();
#ifdef REALLY_SLOW_IO #ifdef REALLY_SLOW_IO
paravirt_ops.io_delay(); pv_cpu_ops.io_delay();
paravirt_ops.io_delay(); pv_cpu_ops.io_delay();
paravirt_ops.io_delay(); pv_cpu_ops.io_delay();
#endif #endif
} }
...@@ -655,121 +698,120 @@ static inline void slow_down_io(void) { ...@@ -655,121 +698,120 @@ static inline void slow_down_io(void) {
*/ */
static inline void apic_write(unsigned long reg, unsigned long v) static inline void apic_write(unsigned long reg, unsigned long v)
{ {
PVOP_VCALL2(apic_write, reg, v); PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
} }
static inline void apic_write_atomic(unsigned long reg, unsigned long v) static inline void apic_write_atomic(unsigned long reg, unsigned long v)
{ {
PVOP_VCALL2(apic_write_atomic, reg, v); PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
} }
static inline unsigned long apic_read(unsigned long reg) static inline unsigned long apic_read(unsigned long reg)
{ {
return PVOP_CALL1(unsigned long, apic_read, reg); return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
} }
static inline void setup_boot_clock(void) static inline void setup_boot_clock(void)
{ {
PVOP_VCALL0(setup_boot_clock); PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
} }
static inline void setup_secondary_clock(void) static inline void setup_secondary_clock(void)
{ {
PVOP_VCALL0(setup_secondary_clock); PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
} }
#endif #endif
static inline void paravirt_post_allocator_init(void) static inline void paravirt_post_allocator_init(void)
{ {
if (paravirt_ops.post_allocator_init) if (pv_init_ops.post_allocator_init)
(*paravirt_ops.post_allocator_init)(); (*pv_init_ops.post_allocator_init)();
} }
static inline void paravirt_pagetable_setup_start(pgd_t *base) static inline void paravirt_pagetable_setup_start(pgd_t *base)
{ {
if (paravirt_ops.pagetable_setup_start) (*pv_mmu_ops.pagetable_setup_start)(base);
(*paravirt_ops.pagetable_setup_start)(base);
} }
static inline void paravirt_pagetable_setup_done(pgd_t *base) static inline void paravirt_pagetable_setup_done(pgd_t *base)
{ {
if (paravirt_ops.pagetable_setup_done) (*pv_mmu_ops.pagetable_setup_done)(base);
(*paravirt_ops.pagetable_setup_done)(base);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
unsigned long start_esp) unsigned long start_esp)
{ {
PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp); PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
phys_apicid, start_eip, start_esp);
} }
#endif #endif
static inline void paravirt_activate_mm(struct mm_struct *prev, static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next) struct mm_struct *next)
{ {
PVOP_VCALL2(activate_mm, prev, next); PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
} }
static inline void arch_dup_mmap(struct mm_struct *oldmm, static inline void arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm) struct mm_struct *mm)
{ {
PVOP_VCALL2(dup_mmap, oldmm, mm); PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
} }
static inline void arch_exit_mmap(struct mm_struct *mm) static inline void arch_exit_mmap(struct mm_struct *mm)
{ {
PVOP_VCALL1(exit_mmap, mm); PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
} }
static inline void __flush_tlb(void) static inline void __flush_tlb(void)
{ {
PVOP_VCALL0(flush_tlb_user); PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
} }
static inline void __flush_tlb_global(void) static inline void __flush_tlb_global(void)
{ {
PVOP_VCALL0(flush_tlb_kernel); PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
} }
static inline void __flush_tlb_single(unsigned long addr) static inline void __flush_tlb_single(unsigned long addr)
{ {
PVOP_VCALL1(flush_tlb_single, addr); PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
} }
static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va) unsigned long va)
{ {
PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
} }
static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn)
{ {
PVOP_VCALL2(alloc_pt, mm, pfn); PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn);
} }
static inline void paravirt_release_pt(unsigned pfn) static inline void paravirt_release_pt(unsigned pfn)
{ {
PVOP_VCALL1(release_pt, pfn); PVOP_VCALL1(pv_mmu_ops.release_pt, pfn);
} }
static inline void paravirt_alloc_pd(unsigned pfn) static inline void paravirt_alloc_pd(unsigned pfn)
{ {
PVOP_VCALL1(alloc_pd, pfn); PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn);
} }
static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
unsigned start, unsigned count) unsigned start, unsigned count)
{ {
PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count); PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count);
} }
static inline void paravirt_release_pd(unsigned pfn) static inline void paravirt_release_pd(unsigned pfn)
{ {
PVOP_VCALL1(release_pd, pfn); PVOP_VCALL1(pv_mmu_ops.release_pd, pfn);
} }
#ifdef CONFIG_HIGHPTE #ifdef CONFIG_HIGHPTE
static inline void *kmap_atomic_pte(struct page *page, enum km_type type) static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
{ {
unsigned long ret; unsigned long ret;
ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type); ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
return (void *)ret; return (void *)ret;
} }
#endif #endif
...@@ -777,162 +819,171 @@ static inline void *kmap_atomic_pte(struct page *page, enum km_type type) ...@@ -777,162 +819,171 @@ static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
static inline void pte_update(struct mm_struct *mm, unsigned long addr, static inline void pte_update(struct mm_struct *mm, unsigned long addr,
pte_t *ptep) pte_t *ptep)
{ {
PVOP_VCALL3(pte_update, mm, addr, ptep); PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
} }
static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
pte_t *ptep) pte_t *ptep)
{ {
PVOP_VCALL3(pte_update_defer, mm, addr, ptep); PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
} }
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
static inline pte_t __pte(unsigned long long val) static inline pte_t __pte(unsigned long long val)
{ {
unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte, unsigned long long ret = PVOP_CALL2(unsigned long long,
pv_mmu_ops.make_pte,
val, val >> 32); val, val >> 32);
return (pte_t) { ret, ret >> 32 }; return (pte_t) { ret, ret >> 32 };
} }
static inline pmd_t __pmd(unsigned long long val) static inline pmd_t __pmd(unsigned long long val)
{ {
return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) }; return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
val, val >> 32) };
} }
static inline pgd_t __pgd(unsigned long long val) static inline pgd_t __pgd(unsigned long long val)
{ {
return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) }; return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd,
val, val >> 32) };
} }
static inline unsigned long long pte_val(pte_t x) static inline unsigned long long pte_val(pte_t x)
{ {
return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high); return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val,
x.pte_low, x.pte_high);
} }
static inline unsigned long long pmd_val(pmd_t x) static inline unsigned long long pmd_val(pmd_t x)
{ {
return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32); return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
x.pmd, x.pmd >> 32);
} }
static inline unsigned long long pgd_val(pgd_t x) static inline unsigned long long pgd_val(pgd_t x)
{ {
return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32); return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val,
x.pgd, x.pgd >> 32);
} }
static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void set_pte(pte_t *ptep, pte_t pteval)
{ {
PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high); PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
} }
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval) pte_t *ptep, pte_t pteval)
{ {
/* 5 arg words */ /* 5 arg words */
paravirt_ops.set_pte_at(mm, addr, ptep, pteval); pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval);
} }
static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
{ {
PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high); PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
pteval.pte_low, pteval.pte_high);
} }
static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte) pte_t *ptep, pte_t pte)
{ {
/* 5 arg words */ /* 5 arg words */
paravirt_ops.set_pte_present(mm, addr, ptep, pte); pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
} }
static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
{ {
PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32); PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
pmdval.pmd, pmdval.pmd >> 32);
} }
static inline void set_pud(pud_t *pudp, pud_t pudval) static inline void set_pud(pud_t *pudp, pud_t pudval)
{ {
PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32); PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
pudval.pgd.pgd, pudval.pgd.pgd >> 32);
} }
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{ {
PVOP_VCALL3(pte_clear, mm, addr, ptep); PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
} }
static inline void pmd_clear(pmd_t *pmdp) static inline void pmd_clear(pmd_t *pmdp)
{ {
PVOP_VCALL1(pmd_clear, pmdp); PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
} }
#else /* !CONFIG_X86_PAE */ #else /* !CONFIG_X86_PAE */
static inline pte_t __pte(unsigned long val) static inline pte_t __pte(unsigned long val)
{ {
return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) };
} }
static inline pgd_t __pgd(unsigned long val) static inline pgd_t __pgd(unsigned long val)
{ {
return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) }; return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
} }
static inline unsigned long pte_val(pte_t x) static inline unsigned long pte_val(pte_t x)
{ {
return PVOP_CALL1(unsigned long, pte_val, x.pte_low); return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low);
} }
static inline unsigned long pgd_val(pgd_t x) static inline unsigned long pgd_val(pgd_t x)
{ {
return PVOP_CALL1(unsigned long, pgd_val, x.pgd); return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);
} }
static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void set_pte(pte_t *ptep, pte_t pteval)
{ {
PVOP_VCALL2(set_pte, ptep, pteval.pte_low); PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);
} }
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval) pte_t *ptep, pte_t pteval)
{ {
PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low); PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low);
} }
static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
{ {
PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
} }
#endif /* CONFIG_X86_PAE */ #endif /* CONFIG_X86_PAE */
#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
static inline void arch_enter_lazy_cpu_mode(void) static inline void arch_enter_lazy_cpu_mode(void)
{ {
PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU); PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_CPU);
} }
static inline void arch_leave_lazy_cpu_mode(void) static inline void arch_leave_lazy_cpu_mode(void)
{ {
PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_NONE);
} }
static inline void arch_flush_lazy_cpu_mode(void) static inline void arch_flush_lazy_cpu_mode(void)
{ {
PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_FLUSH);
} }
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void) static inline void arch_enter_lazy_mmu_mode(void)
{ {
PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU); PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_MMU);
} }
static inline void arch_leave_lazy_mmu_mode(void) static inline void arch_leave_lazy_mmu_mode(void)
{ {
PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_NONE);
} }
static inline void arch_flush_lazy_mmu_mode(void) static inline void arch_flush_lazy_mmu_mode(void)
{ {
PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); PVOP_VCALL1(pv_misc_ops.set_lazy_mode, PARAVIRT_LAZY_FLUSH);
} }
void _paravirt_nop(void); void _paravirt_nop(void);
...@@ -957,7 +1008,7 @@ static inline unsigned long __raw_local_save_flags(void) ...@@ -957,7 +1008,7 @@ static inline unsigned long __raw_local_save_flags(void)
PARAVIRT_CALL PARAVIRT_CALL
"popl %%edx; popl %%ecx") "popl %%edx; popl %%ecx")
: "=a"(f) : "=a"(f)
: paravirt_type(save_fl), : paravirt_type(pv_irq_ops.save_fl),
paravirt_clobber(CLBR_EAX) paravirt_clobber(CLBR_EAX)
: "memory", "cc"); : "memory", "cc");
return f; return f;
...@@ -970,7 +1021,7 @@ static inline void raw_local_irq_restore(unsigned long f) ...@@ -970,7 +1021,7 @@ static inline void raw_local_irq_restore(unsigned long f)
"popl %%edx; popl %%ecx") "popl %%edx; popl %%ecx")
: "=a"(f) : "=a"(f)
: "0"(f), : "0"(f),
paravirt_type(restore_fl), paravirt_type(pv_irq_ops.restore_fl),
paravirt_clobber(CLBR_EAX) paravirt_clobber(CLBR_EAX)
: "memory", "cc"); : "memory", "cc");
} }
...@@ -981,7 +1032,7 @@ static inline void raw_local_irq_disable(void) ...@@ -981,7 +1032,7 @@ static inline void raw_local_irq_disable(void)
PARAVIRT_CALL PARAVIRT_CALL
"popl %%edx; popl %%ecx") "popl %%edx; popl %%ecx")
: :
: paravirt_type(irq_disable), : paravirt_type(pv_irq_ops.irq_disable),
paravirt_clobber(CLBR_EAX) paravirt_clobber(CLBR_EAX)
: "memory", "eax", "cc"); : "memory", "eax", "cc");
} }
...@@ -992,7 +1043,7 @@ static inline void raw_local_irq_enable(void) ...@@ -992,7 +1043,7 @@ static inline void raw_local_irq_enable(void)
PARAVIRT_CALL PARAVIRT_CALL
"popl %%edx; popl %%ecx") "popl %%edx; popl %%ecx")
: :
: paravirt_type(irq_enable), : paravirt_type(pv_irq_ops.irq_enable),
paravirt_clobber(CLBR_EAX) paravirt_clobber(CLBR_EAX)
: "memory", "eax", "cc"); : "memory", "eax", "cc");
} }
...@@ -1008,21 +1059,23 @@ static inline unsigned long __raw_local_irq_save(void) ...@@ -1008,21 +1059,23 @@ static inline unsigned long __raw_local_irq_save(void)
#define CLI_STRING \ #define CLI_STRING \
_paravirt_alt("pushl %%ecx; pushl %%edx;" \ _paravirt_alt("pushl %%ecx; pushl %%edx;" \
"call *paravirt_ops+%c[paravirt_cli_type]*4;" \ "call *%[paravirt_cli_opptr];" \
"popl %%edx; popl %%ecx", \ "popl %%edx; popl %%ecx", \
"%c[paravirt_cli_type]", "%c[paravirt_clobber]") "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
#define STI_STRING \ #define STI_STRING \
_paravirt_alt("pushl %%ecx; pushl %%edx;" \ _paravirt_alt("pushl %%ecx; pushl %%edx;" \
"call *paravirt_ops+%c[paravirt_sti_type]*4;" \ "call *%[paravirt_sti_opptr];" \
"popl %%edx; popl %%ecx", \ "popl %%edx; popl %%ecx", \
"%c[paravirt_sti_type]", "%c[paravirt_clobber]") "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
#define CLI_STI_CLOBBERS , "%eax" #define CLI_STI_CLOBBERS , "%eax"
#define CLI_STI_INPUT_ARGS \ #define CLI_STI_INPUT_ARGS \
, \ , \
[paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)), \ [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \
[paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \
[paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \
[paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \
paravirt_clobber(CLBR_EAX) paravirt_clobber(CLBR_EAX)
/* Make sure as little as possible of this mess escapes. */ /* Make sure as little as possible of this mess escapes. */
...@@ -1042,7 +1095,7 @@ static inline unsigned long __raw_local_irq_save(void) ...@@ -1042,7 +1095,7 @@ static inline unsigned long __raw_local_irq_save(void)
#else /* __ASSEMBLY__ */ #else /* __ASSEMBLY__ */
#define PARA_PATCH(off) ((off) / 4) #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
#define PARA_SITE(ptype, clobbers, ops) \ #define PARA_SITE(ptype, clobbers, ops) \
771:; \ 771:; \
...@@ -1055,29 +1108,29 @@ static inline unsigned long __raw_local_irq_save(void) ...@@ -1055,29 +1108,29 @@ static inline unsigned long __raw_local_irq_save(void)
.short clobbers; \ .short clobbers; \
.popsection .popsection
#define INTERRUPT_RETURN \ #define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE, \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp *%cs:paravirt_ops+PARAVIRT_iret) jmp *%cs:pv_cpu_ops+PV_CPU_iret)
#define DISABLE_INTERRUPTS(clobbers) \ #define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
pushl %eax; pushl %ecx; pushl %edx; \ pushl %eax; pushl %ecx; pushl %edx; \
call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \
popl %edx; popl %ecx; popl %eax) \ popl %edx; popl %ecx; popl %eax) \
#define ENABLE_INTERRUPTS(clobbers) \ #define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
pushl %eax; pushl %ecx; pushl %edx; \ pushl %eax; pushl %ecx; pushl %edx; \
call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \
popl %edx; popl %ecx; popl %eax) popl %edx; popl %ecx; popl %eax)
#define ENABLE_INTERRUPTS_SYSEXIT \ #define ENABLE_INTERRUPTS_SYSEXIT \
PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE, \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
#define GET_CR0_INTO_EAX \ #define GET_CR0_INTO_EAX \
push %ecx; push %edx; \ push %ecx; push %edx; \
call *paravirt_ops+PARAVIRT_read_cr0; \ call *pv_cpu_ops+PV_CPU_read_cr0; \
pop %edx; pop %ecx pop %edx; pop %ecx
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define _I386_PGTABLE_3LEVEL_DEFS_H #define _I386_PGTABLE_3LEVEL_DEFS_H
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd) #define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd)
#else #else
#define SHARED_KERNEL_PMD 1 #define SHARED_KERNEL_PMD 1
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment