Commit 7069ed67 authored by Marcelo Tosatti's avatar Marcelo Tosatti

x86: kvmclock: allocate pvclock shared memory area

We want to expose the pvclock shared memory areas, which
the hypervisor periodically updates, to userspace.

For a linear mapping from userspace, it is necessary that
entire page sized regions are used for array of pvclock
structures.

There is no such guarantee with per cpu areas, therefore move
to memblock_alloc based allocation.
Acked-by: default avatarGlauber Costa <glommer@parallels.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent 78c0337a
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/memblock.h>
#include <asm/x86_init.h> #include <asm/x86_init.h>
#include <asm/reboot.h> #include <asm/reboot.h>
...@@ -39,7 +40,11 @@ static int parse_no_kvmclock(char *arg) ...@@ -39,7 +40,11 @@ static int parse_no_kvmclock(char *arg)
early_param("no-kvmclock", parse_no_kvmclock); early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */ /* The hypervisor will put information about time periodically here */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); struct pvclock_aligned_vcpu_time_info {
struct pvclock_vcpu_time_info clock;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
static struct pvclock_aligned_vcpu_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock; static struct pvclock_wall_clock wall_clock;
/* /*
...@@ -52,15 +57,20 @@ static unsigned long kvm_get_wallclock(void) ...@@ -52,15 +57,20 @@ static unsigned long kvm_get_wallclock(void)
struct pvclock_vcpu_time_info *vcpu_time; struct pvclock_vcpu_time_info *vcpu_time;
struct timespec ts; struct timespec ts;
int low, high; int low, high;
int cpu;
low = (int)__pa_symbol(&wall_clock); low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32); high = ((u64)__pa_symbol(&wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high); native_write_msr(msr_kvm_wall_clock, low, high);
vcpu_time = &get_cpu_var(hv_clock); preempt_disable();
cpu = smp_processor_id();
vcpu_time = &hv_clock[cpu].clock;
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
put_cpu_var(hv_clock);
preempt_enable();
return ts.tv_sec; return ts.tv_sec;
} }
...@@ -74,9 +84,11 @@ static cycle_t kvm_clock_read(void) ...@@ -74,9 +84,11 @@ static cycle_t kvm_clock_read(void)
{ {
struct pvclock_vcpu_time_info *src; struct pvclock_vcpu_time_info *src;
cycle_t ret; cycle_t ret;
int cpu;
preempt_disable_notrace(); preempt_disable_notrace();
src = &__get_cpu_var(hv_clock); cpu = smp_processor_id();
src = &hv_clock[cpu].clock;
ret = pvclock_clocksource_read(src); ret = pvclock_clocksource_read(src);
preempt_enable_notrace(); preempt_enable_notrace();
return ret; return ret;
...@@ -99,8 +111,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) ...@@ -99,8 +111,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
static unsigned long kvm_get_tsc_khz(void) static unsigned long kvm_get_tsc_khz(void)
{ {
struct pvclock_vcpu_time_info *src; struct pvclock_vcpu_time_info *src;
src = &per_cpu(hv_clock, 0); int cpu;
return pvclock_tsc_khz(src); unsigned long tsc_khz;
preempt_disable();
cpu = smp_processor_id();
src = &hv_clock[cpu].clock;
tsc_khz = pvclock_tsc_khz(src);
preempt_enable();
return tsc_khz;
} }
static void kvm_get_preset_lpj(void) static void kvm_get_preset_lpj(void)
...@@ -119,10 +138,14 @@ bool kvm_check_and_clear_guest_paused(void) ...@@ -119,10 +138,14 @@ bool kvm_check_and_clear_guest_paused(void)
{ {
bool ret = false; bool ret = false;
struct pvclock_vcpu_time_info *src; struct pvclock_vcpu_time_info *src;
int cpu = smp_processor_id();
if (!hv_clock)
return ret;
src = &__get_cpu_var(hv_clock); src = &hv_clock[cpu].clock;
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
__this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); src->flags &= ~PVCLOCK_GUEST_STOPPED;
ret = true; ret = true;
} }
...@@ -141,9 +164,10 @@ int kvm_register_clock(char *txt) ...@@ -141,9 +164,10 @@ int kvm_register_clock(char *txt)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
int low, high, ret; int low, high, ret;
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].clock;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; low = (int)__pa(src) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); high = ((u64)__pa(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high); ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt); cpu, high, low, txt);
...@@ -197,6 +221,8 @@ static void kvm_shutdown(void) ...@@ -197,6 +221,8 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void) void __init kvmclock_init(void)
{ {
unsigned long mem;
if (!kvm_para_available()) if (!kvm_para_available())
return; return;
...@@ -209,8 +235,18 @@ void __init kvmclock_init(void) ...@@ -209,8 +235,18 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x", printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock); msr_kvm_system_time, msr_kvm_wall_clock);
if (kvm_register_clock("boot clock")) mem = memblock_alloc(sizeof(struct pvclock_aligned_vcpu_time_info) * NR_CPUS,
PAGE_SIZE);
if (!mem)
return; return;
hv_clock = __va(mem);
if (kvm_register_clock("boot clock")) {
hv_clock = NULL;
memblock_free(mem,
sizeof(struct pvclock_aligned_vcpu_time_info)*NR_CPUS);
return;
}
pv_time_ops.sched_clock = kvm_clock_read; pv_time_ops.sched_clock = kvm_clock_read;
x86_platform.calibrate_tsc = kvm_get_tsc_khz; x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.get_wallclock = kvm_get_wallclock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment