aboutsummaryrefslogtreecommitdiff
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig17
-rw-r--r--arch/i386/kernel/apic.c11
-rw-r--r--arch/i386/kernel/hpet.c80
-rw-r--r--arch/i386/kernel/i8253.c2
-rw-r--r--arch/i386/kernel/paravirt.c8
-rw-r--r--arch/i386/kernel/setup.c1
-rw-r--r--arch/i386/kernel/smpboot.c5
-rw-r--r--arch/i386/kernel/time.c15
-rw-r--r--arch/i386/kernel/tsc.c91
-rw-r--r--arch/i386/kernel/vmi.c210
-rw-r--r--arch/i386/kernel/vmitime.c59
11 files changed, 255 insertions, 244 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 2f7672545fe..cee4ff679d3 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -220,11 +220,11 @@ config PARAVIRT
config VMI
bool "VMI Paravirt-ops support"
- depends on PARAVIRT && !NO_HZ
- default y
+ depends on PARAVIRT
help
- VMI provides a paravirtualized interface to multiple hypervisors
- include VMware ESX server and Xen by connecting to a ROM module
+ VMI provides a paravirtualized interface to the VMware ESX server
+ (it could be used by other hypervisors in theory too, but is not
+ at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
config ACPI_SRAT
@@ -1287,12 +1287,3 @@ config X86_TRAMPOLINE
config KTIME_SCALAR
bool
default y
-
-config NO_IDLE_HZ
- bool
- depends on PARAVIRT
- default y
- help
- Switches the regular HZ timer off when the system is going idle.
- This helps a hypervisor detect that the Linux system is idle,
- reducing the overhead of idle systems.
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 7a2c9cbdb51..2383bcf18c5 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -493,8 +493,15 @@ void __init setup_boot_APIC_clock(void)
/* No broadcast on UP ! */
if (num_possible_cpus() == 1)
return;
- } else
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ } else {
+ /*
+ * If nmi_watchdog is set to IO_APIC, we need the
+ * PIT/HPET going. Otherwise register lapic as a dummy
+ * device.
+ */
+ if (nmi_watchdog != NMI_IO_APIC)
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ }
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index e1006b7acc9..f3ab61ee749 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -201,12 +201,30 @@ static int hpet_next_event(unsigned long delta,
}
/*
+ * Clock source related code
+ */
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)hpet_readl(HPET_COUNTER);
+}
+
+static struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = HPET_MASK,
+ .shift = HPET_SHIFT,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+/*
* Try to setup the HPET timer
*/
int __init hpet_enable(void)
{
unsigned long id;
uint64_t hpet_freq;
+ u64 tmp;
if (!is_hpet_capable())
return 0;
@@ -253,6 +271,25 @@ int __init hpet_enable(void)
/* Start the counter */
hpet_start_counter();
+ /* Initialize and register HPET clocksource
+ *
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ clocksource_register(&clocksource_hpet);
+
+
if (id & HPET_ID_LEGSUP) {
hpet_enable_int();
hpet_reserve_platform_timers(id);
@@ -273,49 +310,6 @@ out_nohpet:
return 0;
}
-/*
- * Clock source related code
- */
-static cycle_t read_hpet(void)
-{
- return (cycle_t)hpet_readl(HPET_COUNTER);
-}
-
-static struct clocksource clocksource_hpet = {
- .name = "hpet",
- .rating = 250,
- .read = read_hpet,
- .mask = HPET_MASK,
- .shift = HPET_SHIFT,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int __init init_hpet_clocksource(void)
-{
- u64 tmp;
-
- if (!hpet_virt_address)
- return -ENODEV;
-
- /*
- * hpet period is in femto seconds per cycle
- * so we need to convert this to ns/cyc units
- * aproximated by mult/2^shift
- *
- * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
- * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
- * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
- * (fsec/cyc << shift)/1000000 = mult
- * (hpet_period << shift)/FSEC_PER_NSEC = mult
- */
- tmp = (u64)hpet_period << HPET_SHIFT;
- do_div(tmp, FSEC_PER_NSEC);
- clocksource_hpet.mult = (u32)tmp;
-
- return clocksource_register(&clocksource_hpet);
-}
-
-module_init(init_hpet_clocksource);
#ifdef CONFIG_HPET_EMULATE_RTC
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index a6bc7bb3883..5cbb776b308 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -195,4 +195,4 @@ static int __init init_pit_clocksource(void)
clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
return clocksource_register(&clocksource_pit);
}
-module_init(init_pit_clocksource);
+arch_initcall(init_pit_clocksource);
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index c156ecfa387..2ec331e03fa 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -32,6 +32,7 @@
#include <asm/fixmap.h>
#include <asm/apic.h>
#include <asm/tlbflush.h>
+#include <asm/timer.h>
/* nop stub */
static void native_nop(void)
@@ -493,7 +494,7 @@ struct paravirt_ops paravirt_ops = {
.memory_setup = machine_specific_memory_setup,
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
- .time_init = time_init_hook,
+ .time_init = hpet_time_init,
.init_IRQ = native_init_IRQ,
.cpuid = native_cpuid,
@@ -520,6 +521,8 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
+ .get_scheduled_cycles = native_read_tsc,
+ .get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
@@ -535,7 +538,6 @@ struct paravirt_ops paravirt_ops = {
.set_iopl_mask = native_set_iopl_mask,
.io_delay = native_io_delay,
- .const_udelay = __const_udelay,
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write,
@@ -550,6 +552,8 @@ struct paravirt_ops paravirt_ops = {
.flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single,
+ .map_pt_hook = (void *)native_nop,
+
.alloc_pt = (void *)native_nop,
.alloc_pd = (void *)native_nop,
.alloc_pd_clone = (void *)native_nop,
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 122623dcc6e..698c24fe482 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -657,5 +657,4 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
- tsc_init();
}
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 48bfcaa13ec..9b0dd2744c8 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -33,11 +33,6 @@
* Dave Jones : Report invalid combinations of Athlon CPUs.
* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
-
-/* SMP boot always wants to use real time delay to allow sufficient time for
- * the APs to come online */
-#define USE_REAL_TIME_DELAY
-
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a5350059557..94e5cb09110 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -262,14 +262,23 @@ void notify_arch_cmos_timer(void)
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
-static void __init hpet_time_init(void)
+void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
- do_time_init();
+ time_init_hook();
}
+/*
+ * This is called directly from init code; we must delay timer setup in the
+ * HPET case as we can't make the decision to turn on HPET this early in the
+ * boot process.
+ *
+ * The chosen time_init function will usually be hpet_time_init, above, but
+ * in the case of virtual hardware, an alternative function may be substituted.
+ */
void __init time_init(void)
{
- late_time_init = hpet_time_init;
+ tsc_init();
+ late_time_init = choose_time_init();
}
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 3082a418635..875d8a6ecc0 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -14,6 +14,7 @@
#include <asm/delay.h>
#include <asm/tsc.h>
#include <asm/io.h>
+#include <asm/timer.h>
#include "mach_timer.h"
@@ -102,9 +103,6 @@ unsigned long long sched_clock(void)
{
unsigned long long this_offset;
- if (unlikely(custom_sched_clock))
- return (*custom_sched_clock)();
-
/*
* Fall back to jiffies if there's no TSC available:
*/
@@ -113,13 +111,13 @@ unsigned long long sched_clock(void)
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */
- rdtscll(this_offset);
+ get_scheduled_cycles(this_offset);
/* return the value in ns */
return cycles_2_ns(this_offset);
}
-static unsigned long calculate_cpu_khz(void)
+unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
unsigned long count;
@@ -186,34 +184,6 @@ int recalibrate_cpu_khz(void)
EXPORT_SYMBOL(recalibrate_cpu_khz);
-void __init tsc_init(void)
-{
- if (!cpu_has_tsc || tsc_disable)
- goto out_no_tsc;
-
- cpu_khz = calculate_cpu_khz();
- tsc_khz = cpu_khz;
-
- if (!cpu_khz)
- goto out_no_tsc;
-
- printk("Detected %lu.%03lu MHz processor.\n",
- (unsigned long)cpu_khz / 1000,
- (unsigned long)cpu_khz % 1000);
-
- set_cyc2ns_scale(cpu_khz);
- use_tsc_delay();
- return;
-
-out_no_tsc:
- /*
- * Set the tsc_disable flag if there's no TSC support, this
- * makes it a fast flag for the kernel to see whether it
- * should be using the TSC.
- */
- tsc_disable = 1;
-}
-
#ifdef CONFIG_CPU_FREQ
/*
@@ -383,28 +353,47 @@ static void __init check_geode_tsc_reliable(void)
static inline void check_geode_tsc_reliable(void) { }
#endif
-static int __init init_tsc_clocksource(void)
+
+void __init tsc_init(void)
{
+ if (!cpu_has_tsc || tsc_disable)
+ goto out_no_tsc;
- if (cpu_has_tsc && tsc_khz && !tsc_disable) {
- /* check blacklist */
- dmi_check_system(bad_tsc_dmi_table);
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
- unsynchronized_tsc();
- check_geode_tsc_reliable();
- current_tsc_khz = tsc_khz;
- clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
- clocksource_tsc.shift);
- /* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
- clocksource_tsc.rating = 0;
- clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- }
+ if (!cpu_khz)
+ goto out_no_tsc;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ set_cyc2ns_scale(cpu_khz);
+ use_tsc_delay();
- return clocksource_register(&clocksource_tsc);
+ /* Check and install the TSC clocksource */
+ dmi_check_system(bad_tsc_dmi_table);
+
+ unsynchronized_tsc();
+ check_geode_tsc_reliable();
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ /* lower the rating if we already know its unstable: */
+ if (check_tsc_unstable()) {
+ clocksource_tsc.rating = 0;
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
}
+ clocksource_register(&clocksource_tsc);
- return 0;
-}
+ return;
-module_init(init_tsc_clocksource);
+out_no_tsc:
+ /*
+ * Set the tsc_disable flag if there's no TSC support, this
+ * makes it a fast flag for the kernel to see whether it
+ * should be using the TSC.
+ */
+ tsc_disable = 1;
+}
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index bb5a7abf949..fbf45fa0832 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -35,6 +35,7 @@
#include <asm/processor.h>
#include <asm/timer.h>
#include <asm/vmi_time.h>
+#include <asm/kmap_types.h>
/* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -48,12 +49,13 @@ typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int);
static struct vrom_header *vmi_rom;
static int license_gplok;
-static int disable_nodelay;
static int disable_pge;
static int disable_pse;
static int disable_sep;
static int disable_tsc;
static int disable_mtrr;
+static int disable_noidle;
+static int disable_vmi_timer;
/* Cached VMI operations */
struct {
@@ -255,7 +257,6 @@ static void vmi_nop(void)
}
/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
-#ifdef CONFIG_NO_IDLE_HZ
static fastcall void vmi_safe_halt(void)
{
int idle = vmi_stop_hz_timer();
@@ -266,7 +267,6 @@ static fastcall void vmi_safe_halt(void)
local_irq_enable();
}
}
-#endif
#ifdef CONFIG_DEBUG_PAGE_TYPE
@@ -371,6 +371,24 @@ static void vmi_check_page_type(u32 pfn, int type)
#define vmi_check_page_type(p,t) do { } while (0)
#endif
+static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
+{
+ /*
+ * Internally, the VMI ROM must map virtual addresses to physical
+ * addresses for processing MMU updates. By the time MMU updates
+ * are issued, this information is typically already lost.
+ * Fortunately, the VMI provides a cache of mapping slots for active
+ * page tables.
+ *
+ * We use slot zero for the linear mapping of physical memory, and
+ * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1.
+ *
+ * args: SLOT VA COUNT PFN
+ */
+ BUG_ON(type != KM_PTE0 && type != KM_PTE1);
+ vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn);
+}
+
static void vmi_allocate_pt(u32 pfn)
{
vmi_set_page_type(pfn, VMI_PAGE_L1);
@@ -508,13 +526,14 @@ void vmi_pmd_clear(pmd_t *pmd)
#endif
#ifdef CONFIG_SMP
-struct vmi_ap_state ap;
extern void setup_pda(void);
-static void __init /* XXX cpu hotplug */
+static void __devinit
vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
unsigned long start_esp)
{
+ struct vmi_ap_state ap;
+
/* Default everything to zero. This is fine for most GPRs. */
memset(&ap, 0, sizeof(struct vmi_ap_state));
@@ -553,7 +572,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
/* Protected mode, paging, AM, WP, NE, MP. */
ap.cr0 = 0x80050023;
ap.cr4 = mmu_cr4_features;
- vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid);
+ vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid);
}
#endif
@@ -645,12 +664,12 @@ static inline int __init probe_vmi_rom(void)
void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
- if (vmi_rom)
+ if (vmi_ops.set_linear_mapping)
vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
}
/*
- * Return a pointer to the VMI function or a NOP stub
+ * Return a pointer to a VMI function or NULL if unimplemented
*/
static void *vmi_get_function(int vmicall)
{
@@ -661,12 +680,13 @@ static void *vmi_get_function(int vmicall)
if (rel->type == VMI_RELOCATION_CALL_REL)
return (void *)rel->eip;
else
- return (void *)vmi_nop;
+ return NULL;
}
/*
* Helper macro for making the VMI paravirt-ops fill code readable.
- * For unimplemented operations, fall back to default.
+ * For unimplemented operations, fall back to default, unless nop
+ * is returned by the ROM.
*/
#define para_fill(opname, vmicall) \
do { \
@@ -675,9 +695,29 @@ do { \
if (rel->type != VMI_RELOCATION_NONE) { \
BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \
paravirt_ops.opname = (void *)rel->eip; \
+ } else if (rel->type == VMI_RELOCATION_NOP) \
+ paravirt_ops.opname = (void *)vmi_nop; \
+} while (0)
+
+/*
+ * Helper macro for making the VMI paravirt-ops fill code readable.
+ * For cached operations which do not match the VMI ROM ABI and must
+ * go through a tranlation stub. Ignore NOPs, since it is not clear
+ * a NOP * VMI function corresponds to a NOP paravirt-op when the
+ * functions are not in 1-1 correspondence.
+ */
+#define para_wrap(opname, wrapper, cache, vmicall) \
+do { \
+ reloc = call_vrom_long_func(vmi_rom, get_reloc, \
+ VMI_CALL_##vmicall); \
+ BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
+ if (rel->type == VMI_RELOCATION_CALL_REL) { \
+ paravirt_ops.opname = wrapper; \
+ vmi_ops.cache = (void *)rel->eip; \
} \
} while (0)
+
/*
* Activate the VMI interface and switch into paravirtualized mode
*/
@@ -714,13 +754,8 @@ static inline int __init activate_vmi(void)
* rdpmc is not yet used in Linux
*/
- /* CPUID is special, so very special */
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops.cpuid = (void *)rel->eip;
- paravirt_ops.cpuid = vmi_cpuid;
- }
+ /* CPUID is special, so very special it gets wrapped like a present */
+ para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
para_fill(clts, CLTS);
para_fill(get_debugreg, GetDR);
@@ -737,38 +772,26 @@ static inline int __init activate_vmi(void)
para_fill(restore_fl, SetInterruptMask);
para_fill(irq_disable, DisableInterrupts);
para_fill(irq_enable, EnableInterrupts);
+
/* irq_save_disable !!! sheer pain */
patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
(char *)paravirt_ops.save_fl);
patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
(char *)paravirt_ops.irq_disable);
-#ifndef CONFIG_NO_IDLE_HZ
- para_fill(safe_halt, Halt);
-#else
- vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
- paravirt_ops.safe_halt = vmi_safe_halt;
-#endif
+
para_fill(wbinvd, WBINVD);
+ para_fill(read_tsc, RDTSC);
+
+ /* The following we emulate with trap and emulate for now */
/* paravirt_ops.read_msr = vmi_rdmsr */
/* paravirt_ops.write_msr = vmi_wrmsr */
- para_fill(read_tsc, RDTSC);
/* paravirt_ops.rdpmc = vmi_rdpmc */
- /* TR interface doesn't pass TR value */
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops.set_tr = (void *)rel->eip;
- paravirt_ops.load_tr_desc = vmi_set_tr;
- }
+ /* TR interface doesn't pass TR value, wrap */
+ para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
/* LDT is special, too */
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops._set_ldt = (void *)rel->eip;
- paravirt_ops.set_ldt = vmi_set_ldt;
- }
+ para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
para_fill(load_gdt, SetGDT);
para_fill(load_idt, SetIDT);
@@ -779,28 +802,14 @@ static inline int __init activate_vmi(void)
para_fill(write_ldt_entry, WriteLDTEntry);
para_fill(write_gdt_entry, WriteGDTEntry);
para_fill(write_idt_entry, WriteIDTEntry);
- reloc = call_vrom_long_func(vmi_rom, get_reloc,
- VMI_CALL_UpdateKernelStack);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops.set_kernel_stack = (void *)rel->eip;
- paravirt_ops.load_esp0 = vmi_load_esp0;
- }
-
+ para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
para_fill(set_iopl_mask, SetIOPLMask);
- paravirt_ops.io_delay = (void *)vmi_nop;
- if (!disable_nodelay) {
- paravirt_ops.const_udelay = (void *)vmi_nop;
- }
-
+ para_fill(io_delay, IODelay);
para_fill(set_lazy_mode, SetLazyMode);
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB);
- if (rel->type != VMI_RELOCATION_NONE) {
- vmi_ops.flush_tlb = (void *)rel->eip;
- paravirt_ops.flush_tlb_user = vmi_flush_tlb_user;
- paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel;
- }
+ /* user and kernel flush are just handled with different flags to FlushTLB */
+ para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB);
+ para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB);
para_fill(flush_tlb_single, InvalPage);
/*
@@ -815,27 +824,40 @@ static inline int __init activate_vmi(void)
vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
#endif
- vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
- vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
- vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
- paravirt_ops.alloc_pt = vmi_allocate_pt;
- paravirt_ops.alloc_pd = vmi_allocate_pd;
- paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
- paravirt_ops.release_pt = vmi_release_pt;
- paravirt_ops.release_pd = vmi_release_pd;
- paravirt_ops.set_pte = vmi_set_pte;
- paravirt_ops.set_pte_at = vmi_set_pte_at;
- paravirt_ops.set_pmd = vmi_set_pmd;
- paravirt_ops.pte_update = vmi_update_pte;
- paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+ if (vmi_ops.set_pte) {
+ paravirt_ops.set_pte = vmi_set_pte;
+ paravirt_ops.set_pte_at = vmi_set_pte_at;
+ paravirt_ops.set_pmd = vmi_set_pmd;
#ifdef CONFIG_X86_PAE
- paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
- paravirt_ops.set_pte_present = vmi_set_pte_present;
- paravirt_ops.set_pud = vmi_set_pud;
- paravirt_ops.pte_clear = vmi_pte_clear;
- paravirt_ops.pmd_clear = vmi_pmd_clear;
+ paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
+ paravirt_ops.set_pte_present = vmi_set_pte_present;
+ paravirt_ops.set_pud = vmi_set_pud;
+ paravirt_ops.pte_clear = vmi_pte_clear;
+ paravirt_ops.pmd_clear = vmi_pmd_clear;
#endif
+ }
+
+ if (vmi_ops.update_pte) {
+ paravirt_ops.pte_update = vmi_update_pte;
+ paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+ }
+
+ vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
+ if (vmi_ops.allocate_page) {
+ paravirt_ops.alloc_pt = vmi_allocate_pt;
+ paravirt_ops.alloc_pd = vmi_allocate_pd;
+ paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+ }
+
+ vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
+ if (vmi_ops.release_page) {
+ paravirt_ops.release_pt = vmi_release_pt;
+ paravirt_ops.release_pd = vmi_release_pd;
+ }
+ para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping,
+ SetLinearMapping);
+
/*
* These MUST always be patched. Don't support indirect jumps
* through these operations, as the VMI interface may use either
@@ -847,21 +869,20 @@ static inline int __init activate_vmi(void)
paravirt_ops.iret = (void *)0xbadbab0;
#ifdef CONFIG_SMP
- paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook;
- vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState);
+ para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead);
- paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite);
- paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
+ para_fill(apic_read, APICRead);
+ para_fill(apic_write, APICWrite);
+ para_fill(apic_write_atomic, APICWrite);
#endif
/*
* Check for VMI timer functionality by probing for a cycle frequency method
*/
reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
- if (rel->type != VMI_RELOCATION_NONE) {
+ if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) {
vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
vmi_timer_ops.get_cycle_counter =
vmi_get_function(VMI_CALL_GetCycleCounter);
@@ -879,9 +900,22 @@ static inline int __init activate_vmi(void)
paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
#endif
- custom_sched_clock = vmi_sched_clock;
+ paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
+ paravirt_ops.get_cpu_khz = vmi_cpu_khz;
+
+ /* We have true wallclock functions; disable CMOS clock sync */
+ no_sync_cmos_clock = 1;
+ } else {
+ disable_noidle = 1;
+ disable_vmi_timer = 1;
}
+ /* No idle HZ mode only works if VMI timer and no idle is enabled */
+ if (disable_noidle || disable_vmi_timer)
+ para_fill(safe_halt, Halt);
+ else
+ para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
+
/*
* Alternative instruction rewriting doesn't happen soon enough
* to convert VMI_IRET to a call instead of a jump; so we have
@@ -914,7 +948,9 @@ void __init vmi_init(void)
local_irq_save(flags);
activate_vmi();
-#ifdef CONFIG_SMP
+
+#ifdef CONFIG_X86_IO_APIC
+ /* This is virtual hardware; timer routing is wired correctly */
no_timer_check = 1;
#endif
local_irq_restore(flags & X86_EFLAGS_IF);
@@ -925,9 +961,7 @@ static int __init parse_vmi(char *arg)
if (!arg)
return -EINVAL;
- if (!strcmp(arg, "disable_nodelay"))
- disable_nodelay = 1;
- else if (!strcmp(arg, "disable_pge")) {
+ if (!strcmp(arg, "disable_pge")) {
clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
disable_pge = 1;
} else if (!strcmp(arg, "disable_pse")) {
@@ -942,7 +976,11 @@ static int __init parse_vmi(char *arg)
} else if (!strcmp(arg, "disable_mtrr")) {
clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
disable_mtrr = 1;
- }
+ } else if (!strcmp(arg, "disable_timer")) {
+ disable_vmi_timer = 1;
+ disable_noidle = 1;
+ } else if (!strcmp(arg, "disable_noidle"))
+ disable_noidle = 1;
return 0;
}
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 76d2adcae5a..8dc72d57566 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c
@@ -153,13 +153,6 @@ static void vmi_get_wallclock_ts(struct timespec *ts)
ts->tv_sec = wallclock;
}
-static void update_xtime_from_wallclock(void)
-{
- struct timespec ts;
- vmi_get_wallclock_ts(&ts);
- do_settimeofday(&ts);
-}
-
unsigned long vmi_get_wallclock(void)
{
struct timespec ts;
@@ -172,11 +165,20 @@ int vmi_set_wallclock(unsigned long now)
return -1;
}
-unsigned long long vmi_sched_clock(void)
+unsigned long long vmi_get_sched_cycles(void)
{
return read_available_cycles();
}
+unsigned long vmi_cpu_khz(void)
+{
+ unsigned long long khz;
+
+ khz = vmi_timer_ops.get_cycle_frequency();
+ (void)do_div(khz, 1000);
+ return khz;
+}
+
void __init vmi_time_init(void)
{
unsigned long long cycles_per_sec, cycles_per_msec;
@@ -188,25 +190,16 @@ void __init vmi_time_init(void)
set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
#endif
- no_sync_cmos_clock = 1;
-
- vmi_get_wallclock_ts(&xtime);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
real_cycles_accounted_system = read_real_cycles();
- update_xtime_from_wallclock();
per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
-
cycles_per_jiffy = cycles_per_sec;
(void)do_div(cycles_per_jiffy, HZ);
cycles_per_alarm = cycles_per_sec;
(void)do_div(cycles_per_alarm, alarm_hz);
cycles_per_msec = cycles_per_sec;
(void)do_div(cycles_per_msec, 1000);
- cpu_khz = cycles_per_msec;
printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
"cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
@@ -250,7 +243,7 @@ void __init vmi_timer_setup_boot_alarm(void)
/* Initialize the time accounting variables for an AP on an SMP system.
* Also, set the local alarm for the AP. */
-void __init vmi_timer_setup_secondary_alarm(void)
+void __devinit vmi_timer_setup_secondary_alarm(void)
{
int cpu = smp_processor_id();
@@ -276,16 +269,13 @@ static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
while (cycles_not_accounted >= cycles_per_jiffy) {
- /* systems wide jiffies and wallclock. */
+ /* systems wide jiffies. */
do_timer(1);
cycles_not_accounted -= cycles_per_jiffy;
real_cycles_accounted_system += cycles_per_jiffy;
}
- if (vmi_timer_ops.wallclock_updated())
- update_xtime_from_wallclock();
-
write_sequnlock(&xtime_lock);
}
@@ -380,7 +370,6 @@ int vmi_stop_hz_timer(void)
unsigned long seq, next;
unsigned long long real_cycles_expiry;
int cpu = smp_processor_id();
- int idle;
BUG_ON(!irqs_disabled());
if (sysctl_hz_timer != 0)
@@ -388,13 +377,13 @@ int vmi_stop_hz_timer(void)
cpu_set(cpu, nohz_cpu_mask);
smp_mb();
+
if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
- (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
+ (next = next_timer_interrupt(),
+ time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
cpu_clear(cpu, nohz_cpu_mask);
- next = jiffies;
- idle = 0;
- } else
- idle = 1;
+ return 0;
+ }
/* Convert jiffies to the real cycle counter. */
do {
@@ -404,17 +393,13 @@ int vmi_stop_hz_timer(void)
} while (read_seqretry(&xtime_lock, seq));
/* This cpu is going idle. Disable the periodic alarm. */
- if (idle) {
- vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
- per_cpu(idle_start_jiffies, cpu) = jiffies;
- }
-
+ vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
+ per_cpu(idle_start_jiffies, cpu) = jiffies;
/* Set the real time alarm to expire at the next event. */
vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
- real_cycles_expiry, 0);
-
- return idle;
+ VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
+ real_cycles_expiry, 0);
+ return 1;
}
static void vmi_reenable_hz_timer(int cpu)