diff options
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/Kconfig | 17 | ||||
-rw-r--r-- | arch/i386/kernel/apic.c | 11 | ||||
-rw-r--r-- | arch/i386/kernel/hpet.c | 80 | ||||
-rw-r--r-- | arch/i386/kernel/i8253.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/paravirt.c | 8 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 1 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 5 | ||||
-rw-r--r-- | arch/i386/kernel/time.c | 15 | ||||
-rw-r--r-- | arch/i386/kernel/tsc.c | 91 | ||||
-rw-r--r-- | arch/i386/kernel/vmi.c | 210 | ||||
-rw-r--r-- | arch/i386/kernel/vmitime.c | 59 |
11 files changed, 255 insertions, 244 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 2f7672545fe..cee4ff679d3 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -220,11 +220,11 @@ config PARAVIRT config VMI bool "VMI Paravirt-ops support" - depends on PARAVIRT && !NO_HZ - default y + depends on PARAVIRT help - VMI provides a paravirtualized interface to multiple hypervisors - include VMware ESX server and Xen by connecting to a ROM module + VMI provides a paravirtualized interface to the VMware ESX server + (it could be used by other hypervisors in theory too, but is not + at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. config ACPI_SRAT @@ -1287,12 +1287,3 @@ config X86_TRAMPOLINE config KTIME_SCALAR bool default y - -config NO_IDLE_HZ - bool - depends on PARAVIRT - default y - help - Switches the regular HZ timer off when the system is going idle. - This helps a hypervisor detect that the Linux system is idle, - reducing the overhead of idle systems. diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 7a2c9cbdb51..2383bcf18c5 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -493,8 +493,15 @@ void __init setup_boot_APIC_clock(void) /* No broadcast on UP ! */ if (num_possible_cpus() == 1) return; - } else - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + } else { + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + } /* Setup the lapic or request the broadcast */ setup_APIC_timer(); diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index e1006b7acc9..f3ab61ee749 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -201,12 +201,30 @@ static int hpet_next_event(unsigned long delta, } /* + * Clock source related code + */ +static cycle_t read_hpet(void) +{ + return (cycle_t)hpet_readl(HPET_COUNTER); +} + +static struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = HPET_MASK, + .shift = HPET_SHIFT, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* * Try to setup the HPET timer */ int __init hpet_enable(void) { unsigned long id; uint64_t hpet_freq; + u64 tmp; if (!is_hpet_capable()) return 0; @@ -253,6 +271,25 @@ int __init hpet_enable(void) /* Start the counter */ hpet_start_counter(); + /* Initialize and register HPET clocksource + * + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + clocksource_register(&clocksource_hpet); + + if (id & HPET_ID_LEGSUP) { hpet_enable_int(); hpet_reserve_platform_timers(id); @@ -273,49 +310,6 @@ out_nohpet: return 0; } -/* - * Clock source related code - */ -static cycle_t read_hpet(void) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = HPET_MASK, - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static int __init init_hpet_clocksource(void) -{ - u64 tmp; - - if (!hpet_virt_address) - return -ENODEV; - - /* - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - - return clocksource_register(&clocksource_hpet); -} - -module_init(init_hpet_clocksource); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index a6bc7bb3883..5cbb776b308 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c @@ -195,4 +195,4 @@ static int __init init_pit_clocksource(void) clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); return clocksource_register(&clocksource_pit); } -module_init(init_pit_clocksource); +arch_initcall(init_pit_clocksource); diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index c156ecfa387..2ec331e03fa 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -32,6 +32,7 @@ #include <asm/fixmap.h> #include <asm/apic.h> #include <asm/tlbflush.h> +#include <asm/timer.h> /* nop stub */ static void native_nop(void) @@ -493,7 +494,7 @@ struct paravirt_ops paravirt_ops = { .memory_setup = machine_specific_memory_setup, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, - .time_init = time_init_hook, + .time_init = hpet_time_init, .init_IRQ = native_init_IRQ, .cpuid = native_cpuid, @@ -520,6 +521,8 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .get_scheduled_cycles = native_read_tsc, + .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, @@ -535,7 +538,6 @@ struct paravirt_ops paravirt_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .const_udelay = __const_udelay, #ifdef CONFIG_X86_LOCAL_APIC .apic_write = native_apic_write, @@ -550,6 +552,8 @@ struct paravirt_ops paravirt_ops = { .flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_single = native_flush_tlb_single, + .map_pt_hook = (void *)native_nop, + .alloc_pt = (void *)native_nop, .alloc_pd = (void *)native_nop, .alloc_pd_clone = (void *)native_nop, diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 122623dcc6e..698c24fe482 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -657,5 +657,4 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif - tsc_init(); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 48bfcaa13ec..9b0dd2744c8 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,11 +33,6 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ - -/* SMP boot always wants to use real time delay to allow sufficient time for - * the APs to come online */ -#define USE_REAL_TIME_DELAY - #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a5350059557..94e5cb09110 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -262,14 +262,23 @@ void notify_arch_cmos_timer(void) extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ -static void __init hpet_time_init(void) +void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - do_time_init(); + time_init_hook(); } +/* + * This is called directly from init code; we must delay timer setup in the + * HPET case as we can't make the decision to turn on HPET this early in the + * boot process. + * + * The chosen time_init function will usually be hpet_time_init, above, but + * in the case of virtual hardware, an alternative function may be substituted. + */ void __init time_init(void) { - late_time_init = hpet_time_init; + tsc_init(); + late_time_init = choose_time_init(); } diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 3082a418635..875d8a6ecc0 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -14,6 +14,7 @@ #include <asm/delay.h> #include <asm/tsc.h> #include <asm/io.h> +#include <asm/timer.h> #include "mach_timer.h" @@ -102,9 +103,6 @@ unsigned long long sched_clock(void) { unsigned long long this_offset; - if (unlikely(custom_sched_clock)) - return (*custom_sched_clock)(); - /* * Fall back to jiffies if there's no TSC available: */ @@ -113,13 +111,13 @@ unsigned long long sched_clock(void) return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - rdtscll(this_offset); + get_scheduled_cycles(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } -static unsigned long calculate_cpu_khz(void) +unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; unsigned long count; @@ -186,34 +184,6 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -void __init tsc_init(void) -{ - if (!cpu_has_tsc || tsc_disable) - goto out_no_tsc; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (!cpu_khz) - goto out_no_tsc; - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - set_cyc2ns_scale(cpu_khz); - use_tsc_delay(); - return; - -out_no_tsc: - /* - * Set the tsc_disable flag if there's no TSC support, this - * makes it a fast flag for the kernel to see whether it - * should be using the TSC. - */ - tsc_disable = 1; -} - #ifdef CONFIG_CPU_FREQ /* @@ -383,28 +353,47 @@ static void __init check_geode_tsc_reliable(void) static inline void check_geode_tsc_reliable(void) { } #endif -static int __init init_tsc_clocksource(void) + +void __init tsc_init(void) { + if (!cpu_has_tsc || tsc_disable) + goto out_no_tsc; - if (cpu_has_tsc && tsc_khz && !tsc_disable) { - /* check blacklist */ - dmi_check_system(bad_tsc_dmi_table); + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; - unsynchronized_tsc(); - check_geode_tsc_reliable(); - current_tsc_khz = tsc_khz; - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, - clocksource_tsc.shift); - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } + if (!cpu_khz) + goto out_no_tsc; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + set_cyc2ns_scale(cpu_khz); + use_tsc_delay(); - return clocksource_register(&clocksource_tsc); + /* Check and install the TSC clocksource */ + dmi_check_system(bad_tsc_dmi_table); + + unsynchronized_tsc(); + check_geode_tsc_reliable(); + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + /* lower the rating if we already know its unstable: */ + if (check_tsc_unstable()) { + clocksource_tsc.rating = 0; + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } + clocksource_register(&clocksource_tsc); - return 0; -} + return; -module_init(init_tsc_clocksource); +out_no_tsc: + /* + * Set the tsc_disable flag if there's no TSC support, this + * makes it a fast flag for the kernel to see whether it + * should be using the TSC. + */ + tsc_disable = 1; +} diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index bb5a7abf949..fbf45fa0832 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -35,6 +35,7 @@ #include <asm/processor.h> #include <asm/timer.h> #include <asm/vmi_time.h> +#include <asm/kmap_types.h> /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -48,12 +49,13 @@ typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); static struct vrom_header *vmi_rom; static int license_gplok; -static int disable_nodelay; static int disable_pge; static int disable_pse; static int disable_sep; static int disable_tsc; static int disable_mtrr; +static int disable_noidle; +static int disable_vmi_timer; /* Cached VMI operations */ struct { @@ -255,7 +257,6 @@ static void vmi_nop(void) } /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ -#ifdef CONFIG_NO_IDLE_HZ static fastcall void vmi_safe_halt(void) { int idle = vmi_stop_hz_timer(); @@ -266,7 +267,6 @@ static fastcall void vmi_safe_halt(void) local_irq_enable(); } } -#endif #ifdef CONFIG_DEBUG_PAGE_TYPE @@ -371,6 +371,24 @@ static void vmi_check_page_type(u32 pfn, int type) #define vmi_check_page_type(p,t) do { } while (0) #endif +static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) +{ + /* + * Internally, the VMI ROM must map virtual addresses to physical + * addresses for processing MMU updates. By the time MMU updates + * are issued, this information is typically already lost. + * Fortunately, the VMI provides a cache of mapping slots for active + * page tables. + * + * We use slot zero for the linear mapping of physical memory, and + * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1. + * + * args: SLOT VA COUNT PFN + */ + BUG_ON(type != KM_PTE0 && type != KM_PTE1); + vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn); +} + static void vmi_allocate_pt(u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); @@ -508,13 +526,14 @@ void vmi_pmd_clear(pmd_t *pmd) #endif #ifdef CONFIG_SMP -struct vmi_ap_state ap; extern void setup_pda(void); -static void __init /* XXX cpu hotplug */ +static void __devinit vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) { + struct vmi_ap_state ap; + /* Default everything to zero. This is fine for most GPRs. */ memset(&ap, 0, sizeof(struct vmi_ap_state)); @@ -553,7 +572,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, /* Protected mode, paging, AM, WP, NE, MP. */ ap.cr0 = 0x80050023; ap.cr4 = mmu_cr4_features; - vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid); + vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); } #endif @@ -645,12 +664,12 @@ static inline int __init probe_vmi_rom(void) void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ - if (vmi_rom) + if (vmi_ops.set_linear_mapping) vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); } /* - * Return a pointer to the VMI function or a NOP stub + * Return a pointer to a VMI function or NULL if unimplemented */ static void *vmi_get_function(int vmicall) { @@ -661,12 +680,13 @@ static void *vmi_get_function(int vmicall) if (rel->type == VMI_RELOCATION_CALL_REL) return (void *)rel->eip; else - return (void *)vmi_nop; + return NULL; } /* * Helper macro for making the VMI paravirt-ops fill code readable. - * For unimplemented operations, fall back to default. + * For unimplemented operations, fall back to default, unless nop + * is returned by the ROM. */ #define para_fill(opname, vmicall) \ do { \ @@ -675,9 +695,29 @@ do { \ if (rel->type != VMI_RELOCATION_NONE) { \ BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ paravirt_ops.opname = (void *)rel->eip; \ + } else if (rel->type == VMI_RELOCATION_NOP) \ + paravirt_ops.opname = (void *)vmi_nop; \ +} while (0) + +/* + * Helper macro for making the VMI paravirt-ops fill code readable. + * For cached operations which do not match the VMI ROM ABI and must + * go through a tranlation stub. Ignore NOPs, since it is not clear + * a NOP * VMI function corresponds to a NOP paravirt-op when the + * functions are not in 1-1 correspondence. + */ +#define para_wrap(opname, wrapper, cache, vmicall) \ +do { \ + reloc = call_vrom_long_func(vmi_rom, get_reloc, \ + VMI_CALL_##vmicall); \ + BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ + if (rel->type == VMI_RELOCATION_CALL_REL) { \ + paravirt_ops.opname = wrapper; \ + vmi_ops.cache = (void *)rel->eip; \ } \ } while (0) + /* * Activate the VMI interface and switch into paravirtualized mode */ @@ -714,13 +754,8 @@ static inline int __init activate_vmi(void) * rdpmc is not yet used in Linux */ - /* CPUID is special, so very special */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.cpuid = (void *)rel->eip; - paravirt_ops.cpuid = vmi_cpuid; - } + /* CPUID is special, so very special it gets wrapped like a present */ + para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); para_fill(clts, CLTS); para_fill(get_debugreg, GetDR); @@ -737,38 +772,26 @@ static inline int __init activate_vmi(void) para_fill(restore_fl, SetInterruptMask); para_fill(irq_disable, DisableInterrupts); para_fill(irq_enable, EnableInterrupts); + /* irq_save_disable !!! sheer pain */ patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], (char *)paravirt_ops.save_fl); patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], (char *)paravirt_ops.irq_disable); -#ifndef CONFIG_NO_IDLE_HZ - para_fill(safe_halt, Halt); -#else - vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); - paravirt_ops.safe_halt = vmi_safe_halt; -#endif + para_fill(wbinvd, WBINVD); + para_fill(read_tsc, RDTSC); + + /* The following we emulate with trap and emulate for now */ /* paravirt_ops.read_msr = vmi_rdmsr */ /* paravirt_ops.write_msr = vmi_wrmsr */ - para_fill(read_tsc, RDTSC); /* paravirt_ops.rdpmc = vmi_rdpmc */ - /* TR interface doesn't pass TR value */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.set_tr = (void *)rel->eip; - paravirt_ops.load_tr_desc = vmi_set_tr; - } + /* TR interface doesn't pass TR value, wrap */ + para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); /* LDT is special, too */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops._set_ldt = (void *)rel->eip; - paravirt_ops.set_ldt = vmi_set_ldt; - } + para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); para_fill(load_gdt, SetGDT); para_fill(load_idt, SetIDT); @@ -779,28 +802,14 @@ static inline int __init activate_vmi(void) para_fill(write_ldt_entry, WriteLDTEntry); para_fill(write_gdt_entry, WriteGDTEntry); para_fill(write_idt_entry, WriteIDTEntry); - reloc = call_vrom_long_func(vmi_rom, get_reloc, - VMI_CALL_UpdateKernelStack); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.set_kernel_stack = (void *)rel->eip; - paravirt_ops.load_esp0 = vmi_load_esp0; - } - + para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); para_fill(set_iopl_mask, SetIOPLMask); - paravirt_ops.io_delay = (void *)vmi_nop; - if (!disable_nodelay) { - paravirt_ops.const_udelay = (void *)vmi_nop; - } - + para_fill(io_delay, IODelay); para_fill(set_lazy_mode, SetLazyMode); - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); - if (rel->type != VMI_RELOCATION_NONE) { - vmi_ops.flush_tlb = (void *)rel->eip; - paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; - paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; - } + /* user and kernel flush are just handled with different flags to FlushTLB */ + para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); + para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); para_fill(flush_tlb_single, InvalPage); /* @@ -815,27 +824,40 @@ static inline int __init activate_vmi(void) vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); #endif - vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); - vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); - vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); - paravirt_ops.alloc_pt = vmi_allocate_pt; - paravirt_ops.alloc_pd = vmi_allocate_pd; - paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; - paravirt_ops.release_pt = vmi_release_pt; - paravirt_ops.release_pd = vmi_release_pd; - paravirt_ops.set_pte = vmi_set_pte; - paravirt_ops.set_pte_at = vmi_set_pte_at; - paravirt_ops.set_pmd = vmi_set_pmd; - paravirt_ops.pte_update = vmi_update_pte; - paravirt_ops.pte_update_defer = vmi_update_pte_defer; + if (vmi_ops.set_pte) { + paravirt_ops.set_pte = vmi_set_pte; + paravirt_ops.set_pte_at = vmi_set_pte_at; + paravirt_ops.set_pmd = vmi_set_pmd; #ifdef CONFIG_X86_PAE - paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; - paravirt_ops.set_pte_present = vmi_set_pte_present; - paravirt_ops.set_pud = vmi_set_pud; - paravirt_ops.pte_clear = vmi_pte_clear; - paravirt_ops.pmd_clear = vmi_pmd_clear; + paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; + paravirt_ops.set_pte_present = vmi_set_pte_present; + paravirt_ops.set_pud = vmi_set_pud; + paravirt_ops.pte_clear = vmi_pte_clear; + paravirt_ops.pmd_clear = vmi_pmd_clear; #endif + } + + if (vmi_ops.update_pte) { + paravirt_ops.pte_update = vmi_update_pte; + paravirt_ops.pte_update_defer = vmi_update_pte_defer; + } + + vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); + if (vmi_ops.allocate_page) { + paravirt_ops.alloc_pt = vmi_allocate_pt; + paravirt_ops.alloc_pd = vmi_allocate_pd; + paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; + } + + vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); + if (vmi_ops.release_page) { + paravirt_ops.release_pt = vmi_release_pt; + paravirt_ops.release_pd = vmi_release_pd; + } + para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, + SetLinearMapping); + /* * These MUST always be patched. Don't support indirect jumps * through these operations, as the VMI interface may use either @@ -847,21 +869,20 @@ static inline int __init activate_vmi(void) paravirt_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP - paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; - vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); + para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); #endif #ifdef CONFIG_X86_LOCAL_APIC - paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); - paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); - paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); + para_fill(apic_read, APICRead); + para_fill(apic_write, APICWrite); + para_fill(apic_write_atomic, APICWrite); #endif /* * Check for VMI timer functionality by probing for a cycle frequency method */ reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); - if (rel->type != VMI_RELOCATION_NONE) { + if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; vmi_timer_ops.get_cycle_counter = vmi_get_function(VMI_CALL_GetCycleCounter); @@ -879,9 +900,22 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; #endif - custom_sched_clock = vmi_sched_clock; + paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.get_cpu_khz = vmi_cpu_khz; + + /* We have true wallclock functions; disable CMOS clock sync */ + no_sync_cmos_clock = 1; + } else { + disable_noidle = 1; + disable_vmi_timer = 1; } + /* No idle HZ mode only works if VMI timer and no idle is enabled */ + if (disable_noidle || disable_vmi_timer) + para_fill(safe_halt, Halt); + else + para_wrap(safe_halt, vmi_safe_halt, halt, Halt); + /* * Alternative instruction rewriting doesn't happen soon enough * to convert VMI_IRET to a call instead of a jump; so we have @@ -914,7 +948,9 @@ void __init vmi_init(void) local_irq_save(flags); activate_vmi(); -#ifdef CONFIG_SMP + +#ifdef CONFIG_X86_IO_APIC + /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif local_irq_restore(flags & X86_EFLAGS_IF); @@ -925,9 +961,7 @@ static int __init parse_vmi(char *arg) if (!arg) return -EINVAL; - if (!strcmp(arg, "disable_nodelay")) - disable_nodelay = 1; - else if (!strcmp(arg, "disable_pge")) { + if (!strcmp(arg, "disable_pge")) { clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); disable_pge = 1; } else if (!strcmp(arg, "disable_pse")) { @@ -942,7 +976,11 @@ static int __init parse_vmi(char *arg) } else if (!strcmp(arg, "disable_mtrr")) { clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); disable_mtrr = 1; - } + } else if (!strcmp(arg, "disable_timer")) { + disable_vmi_timer = 1; + disable_noidle = 1; + } else if (!strcmp(arg, "disable_noidle")) + disable_noidle = 1; return 0; } diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c index 76d2adcae5a..8dc72d57566 100644 --- a/arch/i386/kernel/vmitime.c +++ b/arch/i386/kernel/vmitime.c @@ -153,13 +153,6 @@ static void vmi_get_wallclock_ts(struct timespec *ts) ts->tv_sec = wallclock; } -static void update_xtime_from_wallclock(void) -{ - struct timespec ts; - vmi_get_wallclock_ts(&ts); - do_settimeofday(&ts); -} - unsigned long vmi_get_wallclock(void) { struct timespec ts; @@ -172,11 +165,20 @@ int vmi_set_wallclock(unsigned long now) return -1; } -unsigned long long vmi_sched_clock(void) +unsigned long long vmi_get_sched_cycles(void) { return read_available_cycles(); } +unsigned long vmi_cpu_khz(void) +{ + unsigned long long khz; + + khz = vmi_timer_ops.get_cycle_frequency(); + (void)do_div(khz, 1000); + return khz; +} + void __init vmi_time_init(void) { unsigned long long cycles_per_sec, cycles_per_msec; @@ -188,25 +190,16 @@ void __init vmi_time_init(void) set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); #endif - no_sync_cmos_clock = 1; - - vmi_get_wallclock_ts(&xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - real_cycles_accounted_system = read_real_cycles(); - update_xtime_from_wallclock(); per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); - cycles_per_jiffy = cycles_per_sec; (void)do_div(cycles_per_jiffy, HZ); cycles_per_alarm = cycles_per_sec; (void)do_div(cycles_per_alarm, alarm_hz); cycles_per_msec = cycles_per_sec; (void)do_div(cycles_per_msec, 1000); - cpu_khz = cycles_per_msec; printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, @@ -250,7 +243,7 @@ void __init vmi_timer_setup_boot_alarm(void) /* Initialize the time accounting variables for an AP on an SMP system. * Also, set the local alarm for the AP. */ -void __init vmi_timer_setup_secondary_alarm(void) +void __devinit vmi_timer_setup_secondary_alarm(void) { int cpu = smp_processor_id(); @@ -276,16 +269,13 @@ static void vmi_account_real_cycles(unsigned long long cur_real_cycles) cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; while (cycles_not_accounted >= cycles_per_jiffy) { - /* systems wide jiffies and wallclock. */ + /* systems wide jiffies. */ do_timer(1); cycles_not_accounted -= cycles_per_jiffy; real_cycles_accounted_system += cycles_per_jiffy; } - if (vmi_timer_ops.wallclock_updated()) - update_xtime_from_wallclock(); - write_sequnlock(&xtime_lock); } @@ -380,7 +370,6 @@ int vmi_stop_hz_timer(void) unsigned long seq, next; unsigned long long real_cycles_expiry; int cpu = smp_processor_id(); - int idle; BUG_ON(!irqs_disabled()); if (sysctl_hz_timer != 0) @@ -388,13 +377,13 @@ int vmi_stop_hz_timer(void) cpu_set(cpu, nohz_cpu_mask); smp_mb(); + if (rcu_needs_cpu(cpu) || local_softirq_pending() || - (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { + (next = next_timer_interrupt(), + time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { cpu_clear(cpu, nohz_cpu_mask); - next = jiffies; - idle = 0; - } else - idle = 1; + return 0; + } /* Convert jiffies to the real cycle counter. */ do { @@ -404,17 +393,13 @@ int vmi_stop_hz_timer(void) } while (read_seqretry(&xtime_lock, seq)); /* This cpu is going idle. Disable the periodic alarm. */ - if (idle) { - vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); - per_cpu(idle_start_jiffies, cpu) = jiffies; - } - + vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); + per_cpu(idle_start_jiffies, cpu) = jiffies; /* Set the real time alarm to expire at the next event. */ vmi_timer_ops.set_alarm( - VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, - real_cycles_expiry, 0); - - return idle; + VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, + real_cycles_expiry, 0); + return 1; } static void vmi_reenable_hz_timer(int cpu) |