From 0812a579c92fefa57506821fa08e90f47cb6dbdd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Feb 2007 13:26:19 +0100 Subject: [PATCH] x86-64: Add __copy_from_user_nocache This does user copies in fs write() into the page cache with write combining. This pushes the destination out of the CPU's cache, but allows higher bandwidth in some case. The theory is that the page cache data is usually not touched by the CPU again and it's better to not pollute the cache with it. Also it is a little faster. Signed-off-by: Andi Kleen --- include/asm-x86_64/uaccess.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h index 8079e29c14f..1981f70fcad 100644 --- a/include/asm-x86_64/uaccess.h +++ b/include/asm-x86_64/uaccess.h @@ -367,4 +367,18 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) return copy_user_generic((__force void *)dst, src, size); } +#define ARCH_HAS_NOCACHE_UACCESS 1 +extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); + +static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) +{ + might_sleep(); + return __copy_user_nocache(dst, (__force void *)src, size, 1); +} + +static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) +{ + return __copy_user_nocache(dst, (__force void *)src, size, 0); +} + #endif /* __X86_64_UACCESS_H */ -- cgit v1.2.3 From 076422d2af7e3d8e72c6e70843f6ea377714b082 Mon Sep 17 00:00:00 2001 From: Amul Shah Date: Tue, 13 Feb 2007 13:26:19 +0100 Subject: [PATCH] x86-64: Allocate the NUMA hash function nodemap dynamically Remove the statically allocated memory to NUMA node hash map in favor of a dynamically allocated memory to node hash map (it is cache aligned). This patch has the nice side effect in that it allows the hash map to grow for systems with large amounts of memory (256GB - 1TB), but suffer from having small PCI space tacked onto the boot node (which is somewhere between 192MB to 512MB on the ES7000). Signed-off-by: Amul Shah Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Rohit Seth Signed-off-by: Andrew Morton --- include/asm-x86_64/e820.h | 1 + include/asm-x86_64/mmzone.h | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index fa208677410..855fb4a454b 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h @@ -56,6 +56,7 @@ extern void finish_e820_parsing(void); extern struct e820map e820; extern unsigned ebda_addr, ebda_size; +extern unsigned long nodemap_addr, nodemap_size; #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index c38ebdf6f42..39ef106986e 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -11,24 +11,25 @@ #include -/* Should really switch to dynamic allocation at some point */ -#define NODEMAPSIZE 0x4fff - /* Simple perfect hash to map physical addresses to node numbers */ struct memnode { int shift; - u8 map[NODEMAPSIZE]; -} ____cacheline_aligned; + unsigned int mapsize; + u8 *map; + u8 embedded_map[64-16]; +} ____cacheline_aligned; /* total size = 64 bytes */ extern struct memnode memnode; #define memnode_shift memnode.shift #define memnodemap memnode.map +#define memnodemapsize memnode.mapsize extern struct pglist_data *node_data[]; static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) { unsigned nid; - VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); + VIRTUAL_BUG_ON(!memnodemap); + VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize); nid = memnodemap[addr >> memnode_shift]; VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); return nid; -- cgit v1.2.3 From 464d1a78fbf8cf6c7fd970e7b3e2db50a320ce28 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 13 Feb 2007 13:26:20 +0100 Subject: [PATCH] i386: Convert i386 PDA code to use %fs Convert the PDA code to use %fs rather than %gs as the segment for per-processor data. This is because some processors show a small but measurable performance gain for reloading a NULL segment selector (as %fs generally is in user-space) versus a non-NULL one (as %gs generally is). On modern processors the difference is very small, perhaps undetectable. Some old AMD "K6 3D+" processors are noticably slower when %fs is used rather than %gs; I have no idea why this might be, but I think they're sufficiently rare that it doesn't matter much. This patch also fixes the math emulator, which had not been adjusted to match the changed struct pt_regs. [frederik.deweerdt@gmail.com: fixit with gdb] [mingo@elte.hu: Fix KVM too] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Ian Campbell Acked-by: Ingo Molnar Acked-by: Zachary Amsden Cc: Eric Dumazet Signed-off-by: Frederik Deweerdt Signed-off-by: Andrew Morton --- include/asm-i386/elf.h | 4 ++-- include/asm-i386/mmu_context.h | 2 +- include/asm-i386/pda.h | 12 ++++++------ include/asm-i386/processor.h | 6 +++--- include/asm-i386/ptrace.h | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 369035dfe4b..8d33c9bb7c1 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -90,8 +90,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t; pr_reg[6] = regs->eax; \ pr_reg[7] = regs->xds; \ pr_reg[8] = regs->xes; \ - savesegment(fs,pr_reg[9]); \ - pr_reg[10] = regs->xgs; \ + pr_reg[9] = regs->xfs; \ + savesegment(gs,pr_reg[10]); \ pr_reg[11] = regs->orig_eax; \ pr_reg[12] = regs->eip; \ pr_reg[13] = regs->xcs; \ diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 68ff102d6f5..e6aa30f8de5 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -63,7 +63,7 @@ static inline void switch_mm(struct mm_struct *prev, } #define deactivate_mm(tsk, mm) \ - asm("movl %0,%%fs": :"r" (0)); + asm("movl %0,%%gs": :"r" (0)); #define activate_mm(prev, next) \ switch_mm((prev),(next),NULL) diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index 2ba2736aa10..b12d59a318b 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h @@ -39,19 +39,19 @@ extern struct i386_pda _proxy_pda; if (0) { T__ tmp__; tmp__ = (val); } \ switch (sizeof(_proxy_pda.field)) { \ case 1: \ - asm(op "b %1,%%gs:%c2" \ + asm(op "b %1,%%fs:%c2" \ : "+m" (_proxy_pda.field) \ :"ri" ((T__)val), \ "i"(pda_offset(field))); \ break; \ case 2: \ - asm(op "w %1,%%gs:%c2" \ + asm(op "w %1,%%fs:%c2" \ : "+m" (_proxy_pda.field) \ :"ri" ((T__)val), \ "i"(pda_offset(field))); \ break; \ case 4: \ - asm(op "l %1,%%gs:%c2" \ + asm(op "l %1,%%fs:%c2" \ : "+m" (_proxy_pda.field) \ :"ri" ((T__)val), \ "i"(pda_offset(field))); \ @@ -65,19 +65,19 @@ extern struct i386_pda _proxy_pda; typeof(_proxy_pda.field) ret__; \ switch (sizeof(_proxy_pda.field)) { \ case 1: \ - asm(op "b %%gs:%c1,%0" \ + asm(op "b %%fs:%c1,%0" \ : "=r" (ret__) \ : "i" (pda_offset(field)), \ "m" (_proxy_pda.field)); \ break; \ case 2: \ - asm(op "w %%gs:%c1,%0" \ + asm(op "w %%fs:%c1,%0" \ : "=r" (ret__) \ : "i" (pda_offset(field)), \ "m" (_proxy_pda.field)); \ break; \ case 4: \ - asm(op "l %%gs:%c1,%0" \ + asm(op "l %%fs:%c1,%0" \ : "=r" (ret__) \ : "i" (pda_offset(field)), \ "m" (_proxy_pda.field)); \ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 359f10b54f5..11bf899de8a 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -424,7 +424,7 @@ struct thread_struct { .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ - .gs = __KERNEL_PDA, \ + .fs = __KERNEL_PDA, \ } /* @@ -442,8 +442,8 @@ struct thread_struct { } #define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs": :"r" (0)); \ - regs->xgs = 0; \ + __asm__("movl %0,%%gs": :"r" (0)); \ + regs->xfs = 0; \ set_fs(USER_DS); \ regs->xds = __USER_DS; \ regs->xes = __USER_DS; \ diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index bdbc894339b..1646996c73d 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -16,8 +16,8 @@ struct pt_regs { long eax; int xds; int xes; - /* int xfs; */ - int xgs; + int xfs; + /* int xgs; */ long orig_eax; long eip; int xcs; -- cgit v1.2.3 From 5809f9d442e9dbb23859e2c37d8c47043f6b5cc9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] x86-64: get rid of ARCH_HAVE_XTIME_LOCK ARCH_HAVE_XTIME_LOCK is used by x86_64 arch . This arch needs to place a read only copy of xtime_lock into vsyscall page. This read only copy is named __xtime_lock, and xtime_lock is defined in arch/x86_64/kernel/vmlinux.lds.S as an alias. So the declaration of xtime_lock in kernel/timer.c was guarded by ARCH_HAVE_XTIME_LOCK define, defined to true on x86_64. We can get same result with _attribute__((weak)) in the declaration. linker should do the job. Signed-off-by: Eric Dumazet Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-x86_64/vsyscall.h | 5 ----- include/linux/time.h | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 05cb8dd200d..0c7847165ea 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h @@ -56,11 +56,6 @@ extern struct vxtime_data vxtime; extern int vgetcpu_mode; extern struct timezone sys_tz; extern int sysctl_vsyscall; -extern seqlock_t xtime_lock; - -extern int sysctl_vsyscall; - -#define ARCH_HAVE_XTIME_LOCK 1 #endif /* __KERNEL__ */ diff --git a/include/linux/time.h b/include/linux/time.h index 55cee172d72..eceb1a59b07 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -90,7 +90,7 @@ static inline struct timespec timespec_sub(struct timespec lhs, extern struct timespec xtime; extern struct timespec wall_to_monotonic; -extern seqlock_t xtime_lock; +extern seqlock_t xtime_lock __attribute__((weak)); void timekeeping_init(void); -- cgit v1.2.3 From c119ecce894120790903ef535dac3e105f3d6cde Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] MM: page allocation hooks for VMI backend The VMI backend uses explicit page type notification to track shadow page tables. The allocation of page table roots is especially tricky. We need to clone the root for non-PAE mode while it is protected under the pgd lock to correctly copy the shadow. We don't need to allocate pgds in PAE mode, (PDPs in Intel terminology) as they only have 4 entries, and are cached entirely by the processor, which makes shadowing them rather simple. For base page table level allocation, pmd_populate provides the exact hook point we need. Also, we need to allocate pages when splitting a large page, and we must release pages before returning the page to any free pool. Despite being required with these slightly odd semantics for VMI, Xen also uses these hooks to determine the exact moment when page tables are created or released. AK: All nops for other architectures Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Rusty Russell Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/paravirt.h | 14 ++++++++++++++ include/asm-i386/pgalloc.h | 30 ++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 9f06265065f..53da276a2ec 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -127,6 +127,12 @@ struct paravirt_ops void (fastcall *flush_tlb_kernel)(void); void (fastcall *flush_tlb_single)(u32 addr); + void (fastcall *alloc_pt)(u32 pfn); + void (fastcall *alloc_pd)(u32 pfn); + void (fastcall *alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); + void (fastcall *release_pt)(u32 pfn); + void (fastcall *release_pd)(u32 pfn); + void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); @@ -320,6 +326,14 @@ static inline unsigned long apic_read(unsigned long reg) #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) +#define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) +#define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) + +#define paravirt_alloc_pd(pfn) paravirt_ops.alloc_pd(pfn) +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) \ + paravirt_ops.alloc_pd_clone(pfn, clonepfn, start, count) +#define paravirt_release_pd(pfn) paravirt_ops.release_pd(pfn) + static inline void set_pte(pte_t *ptep, pte_t pteval) { paravirt_ops.set_pte(ptep, pteval); diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h index 4b1e61359f8..c8dc2d0141a 100644 --- a/include/asm-i386/pgalloc.h +++ b/include/asm-i386/pgalloc.h @@ -5,13 +5,31 @@ #include #include /* for struct page */ -#define pmd_populate_kernel(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) +#ifdef CONFIG_PARAVIRT +#include +#else +#define paravirt_alloc_pt(pfn) do { } while (0) +#define paravirt_alloc_pd(pfn) do { } while (0) +#define paravirt_alloc_pd(pfn) do { } while (0) +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) +#define paravirt_release_pt(pfn) do { } while (0) +#define paravirt_release_pd(pfn) do { } while (0) +#endif + +#define pmd_populate_kernel(mm, pmd, pte) \ +do { \ + paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ +} while (0) #define pmd_populate(mm, pmd, pte) \ +do { \ + paravirt_alloc_pt(page_to_pfn(pte)); \ set_pmd(pmd, __pmd(_PAGE_TABLE + \ ((unsigned long long)page_to_pfn(pte) << \ - (unsigned long long) PAGE_SHIFT))) + (unsigned long long) PAGE_SHIFT))); \ +} while (0) + /* * Allocate and free page tables. */ @@ -32,7 +50,11 @@ static inline void pte_free(struct page *pte) } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + paravirt_release_pt(page_to_pfn(pte)); \ + tlb_remove_page((tlb),(pte)); \ +} while (0) #ifdef CONFIG_X86_PAE /* -- cgit v1.2.3 From 9226d125d94c7e4964dd41cc5e9ca2ff84091d01 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] i386: paravirt CPU hypercall batching mode The VMI ROM has a mode where hypercalls can be queued and batched. This turns out to be a significant win during context switch, but must be done at a specific point before side effects to CPU state are visible to subsequent instructions. This is similar to the MMU batching hooks already provided. The same hooks could be used by the Xen backend to implement a context switch multicall. To explain a bit more about lazy modes in the paravirt patches, basically, the idea is that only one of lazy CPU or MMU mode can be active at any given time. Lazy MMU mode is similar to this lazy CPU mode, and allows for batching of multiple PTE updates (say, inside a remap loop), but to avoid keeping some kind of state machine about when to flush cpu or mmu updates, we just allow one or the other to be active. Although there is no real reason a more comprehensive scheme could not be implemented, there is also no demonstrated need for this extra complexity. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Rusty Russell Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-generic/pgtable.h | 13 +++++++++++++ include/asm-i386/paravirt.h | 15 +++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 9d774d07d95..00c23433b39 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -182,6 +182,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define arch_leave_lazy_mmu_mode() do {} while (0) #endif +/* + * A facility to provide batching of the reload of page tables with the + * actual context switch code for paravirtualized guests. By convention, + * only one of the lazy modes (CPU, MMU) should be active at any given + * time, entry should never be nested, and entry and exits should always + * be paired. This is for sanity of maintaining and reasoning about the + * kernel code. + */ +#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE +#define arch_enter_lazy_cpu_mode() do {} while (0) +#define arch_leave_lazy_cpu_mode() do {} while (0) +#endif + /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 53da276a2ec..38e5164bd0e 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -146,6 +146,8 @@ struct paravirt_ops void (fastcall *pmd_clear)(pmd_t *pmdp); #endif + void (fastcall *set_lazy_mode)(int mode); + /* These two are jmp to, not actually called. */ void (fastcall *irq_enable_sysexit)(void); void (fastcall *iret)(void); @@ -386,6 +388,19 @@ static inline void pmd_clear(pmd_t *pmdp) } #endif +/* Lazy mode for batching updates / context switch */ +#define PARAVIRT_LAZY_NONE 0 +#define PARAVIRT_LAZY_MMU 1 +#define PARAVIRT_LAZY_CPU 2 + +#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE +#define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU) +#define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE +#define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU) +#define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ -- cgit v1.2.3 From ae5da273fe3352febd38658d8d34484cbcfb3423 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] i386: SMP boot hook for paravirt Add VMI SMP boot hook. We emulate a regular boot sequence and use the same APIC IPI initiation, we just poke magic values to load into the CPU state when the startup IPI is received, rather than having to jump through a real mode trampoline. This is all that was needed to get SMP to work. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Rusty Russell Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/paravirt.h | 9 +++++++++ include/asm-i386/smp.h | 5 +++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 38e5164bd0e..6ccf36499b2 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -151,6 +151,8 @@ struct paravirt_ops /* These two are jmp to, not actually called. */ void (fastcall *irq_enable_sysexit)(void); void (fastcall *iret)(void); + + void (fastcall *startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); }; /* Mark a paravirt probe function. */ @@ -323,6 +325,13 @@ static inline unsigned long apic_read(unsigned long reg) } #endif +#ifdef CONFIG_SMP +static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, + unsigned long start_esp) +{ + return paravirt_ops.startup_ipi_hook(phys_apicid, start_eip, start_esp); +} +#endif #define __flush_tlb() paravirt_ops.flush_tlb_user() #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 64fe624c02c..6bf0033a301 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -52,6 +52,11 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif +#ifndef CONFIG_PARAVIRT +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ +do { } while (0) +#endif + /* * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. We map APIC_BASE very early in page_setup(), -- cgit v1.2.3 From 7ce0bcfd1667736f1293cff845139bbee53186de Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] i386: vMI backend for paravirt-ops Fairly straightforward implementation of VMI backend for paravirt-ops. [Adrian Bunk: some cleanups] Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Rusty Russell Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/timer.h | 1 + include/asm-i386/vmi.h | 262 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 263 insertions(+) create mode 100644 include/asm-i386/vmi.h (limited to 'include') diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index d0ebd05f851..1ee64e34cd3 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -8,6 +8,7 @@ void setup_pit_timer(void); /* Modifiers for buggy PIT handling */ extern int pit_latch_buggy; extern int timer_ack; +extern int no_timer_check; extern int recalibrate_cpu_khz(void); #endif diff --git a/include/asm-i386/vmi.h b/include/asm-i386/vmi.h new file mode 100644 index 00000000000..43c89333037 --- /dev/null +++ b/include/asm-i386/vmi.h @@ -0,0 +1,262 @@ +/* + * VMI interface definition + * + * Copyright (C) 2005, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Maintained by: Zachary Amsden zach@vmware.com + * + */ +#include + +/* + *--------------------------------------------------------------------- + * + * VMI Option ROM API + * + *--------------------------------------------------------------------- + */ +#define VMI_SIGNATURE 0x696d5663 /* "cVmi" */ + +#define PCI_VENDOR_ID_VMWARE 0x15AD +#define PCI_DEVICE_ID_VMWARE_VMI 0x0801 + +/* + * We use two version numbers for compatibility, with the major + * number signifying interface breakages, and the minor number + * interface extensions. + */ +#define VMI_API_REV_MAJOR 3 +#define VMI_API_REV_MINOR 0 + +#define VMI_CALL_CPUID 0 +#define VMI_CALL_WRMSR 1 +#define VMI_CALL_RDMSR 2 +#define VMI_CALL_SetGDT 3 +#define VMI_CALL_SetLDT 4 +#define VMI_CALL_SetIDT 5 +#define VMI_CALL_SetTR 6 +#define VMI_CALL_GetGDT 7 +#define VMI_CALL_GetLDT 8 +#define VMI_CALL_GetIDT 9 +#define VMI_CALL_GetTR 10 +#define VMI_CALL_WriteGDTEntry 11 +#define VMI_CALL_WriteLDTEntry 12 +#define VMI_CALL_WriteIDTEntry 13 +#define VMI_CALL_UpdateKernelStack 14 +#define VMI_CALL_SetCR0 15 +#define VMI_CALL_SetCR2 16 +#define VMI_CALL_SetCR3 17 +#define VMI_CALL_SetCR4 18 +#define VMI_CALL_GetCR0 19 +#define VMI_CALL_GetCR2 20 +#define VMI_CALL_GetCR3 21 +#define VMI_CALL_GetCR4 22 +#define VMI_CALL_WBINVD 23 +#define VMI_CALL_SetDR 24 +#define VMI_CALL_GetDR 25 +#define VMI_CALL_RDPMC 26 +#define VMI_CALL_RDTSC 27 +#define VMI_CALL_CLTS 28 +#define VMI_CALL_EnableInterrupts 29 +#define VMI_CALL_DisableInterrupts 30 +#define VMI_CALL_GetInterruptMask 31 +#define VMI_CALL_SetInterruptMask 32 +#define VMI_CALL_IRET 33 +#define VMI_CALL_SYSEXIT 34 +#define VMI_CALL_Halt 35 +#define VMI_CALL_Reboot 36 +#define VMI_CALL_Shutdown 37 +#define VMI_CALL_SetPxE 38 +#define VMI_CALL_SetPxELong 39 +#define VMI_CALL_UpdatePxE 40 +#define VMI_CALL_UpdatePxELong 41 +#define VMI_CALL_MachineToPhysical 42 +#define VMI_CALL_PhysicalToMachine 43 +#define VMI_CALL_AllocatePage 44 +#define VMI_CALL_ReleasePage 45 +#define VMI_CALL_InvalPage 46 +#define VMI_CALL_FlushTLB 47 +#define VMI_CALL_SetLinearMapping 48 + +#define VMI_CALL_SetIOPLMask 61 +#define VMI_CALL_SetInitialAPState 62 +#define VMI_CALL_APICWrite 63 +#define VMI_CALL_APICRead 64 +#define VMI_CALL_SetLazyMode 73 + +/* + *--------------------------------------------------------------------- + * + * MMU operation flags + * + *--------------------------------------------------------------------- + */ + +/* Flags used by VMI_{Allocate|Release}Page call */ +#define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */ +#define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */ +#define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */ + + +/* Flags shared by Allocate|Release Page and PTE updates */ +#define VMI_PAGE_PT 0x01 +#define VMI_PAGE_PD 0x02 +#define VMI_PAGE_PDP 0x04 +#define VMI_PAGE_PML4 0x08 + +#define VMI_PAGE_NORMAL 0x00 /* for debugging */ + +/* Flags used by PTE updates */ +#define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */ +#define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */ +#define VMI_PAGE_VA_MASK 0xfffff000 + +#ifdef CONFIG_X86_PAE +#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED) +#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED) +#else +#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED) +#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED) +#endif + +/* Flags used by VMI_FlushTLB call */ +#define VMI_FLUSH_TLB 0x01 +#define VMI_FLUSH_GLOBAL 0x02 + +/* + *--------------------------------------------------------------------- + * + * VMI relocation definitions for ROM call get_reloc + * + *--------------------------------------------------------------------- + */ + +/* VMI Relocation types */ +#define VMI_RELOCATION_NONE 0 +#define VMI_RELOCATION_CALL_REL 1 +#define VMI_RELOCATION_JUMP_REL 2 +#define VMI_RELOCATION_NOP 3 + +#ifndef __ASSEMBLY__ +struct vmi_relocation_info { + unsigned char *eip; + unsigned char type; + unsigned char reserved[3]; +}; +#endif + + +/* + *--------------------------------------------------------------------- + * + * Generic ROM structures and definitions + * + *--------------------------------------------------------------------- + */ + +#ifndef __ASSEMBLY__ + +struct vrom_header { + u16 rom_signature; // option ROM signature + u8 rom_length; // ROM length in 512 byte chunks + u8 rom_entry[4]; // 16-bit code entry point + u8 rom_pad0; // 4-byte align pad + u32 vrom_signature; // VROM identification signature + u8 api_version_min;// Minor version of API + u8 api_version_maj;// Major version of API + u8 jump_slots; // Number of jump slots + u8 reserved1; // Reserved for expansion + u32 virtual_top; // Hypervisor virtual address start + u16 reserved2; // Reserved for expansion + u16 license_offs; // Offset to License string + u16 pci_header_offs;// Offset to PCI OPROM header + u16 pnp_header_offs;// Offset to PnP OPROM header + u32 rom_pad3; // PnP reserverd / VMI reserved + u8 reserved[96]; // Reserved for headers + char vmi_init[8]; // VMI_Init jump point + char get_reloc[8]; // VMI_GetRelocationInfo jump point +} __attribute__((packed)); + +struct pnp_header { + char sig[4]; + char rev; + char size; + short next; + short res; + long devID; + unsigned short manufacturer_offset; + unsigned short product_offset; +} __attribute__((packed)); + +struct pci_header { + char sig[4]; + short vendorID; + short deviceID; + short vpdData; + short size; + char rev; + char class; + char subclass; + char interface; + short chunks; + char rom_version_min; + char rom_version_maj; + char codetype; + char lastRom; + short reserved; +} __attribute__((packed)); + +/* Function prototypes for bootstrapping */ +extern void vmi_init(void); +extern void vmi_bringup(void); +extern void vmi_apply_boot_page_allocations(void); + +/* State needed to start an application processor in an SMP system. */ +struct vmi_ap_state { + u32 cr0; + u32 cr2; + u32 cr3; + u32 cr4; + + u64 efer; + + u32 eip; + u32 eflags; + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esp; + u32 ebp; + u32 esi; + u32 edi; + u16 cs; + u16 ss; + u16 ds; + u16 es; + u16 fs; + u16 gs; + u16 ldtr; + + u16 gdtr_limit; + u32 gdtr_base; + u32 idtr_base; + u16 idtr_limit; +}; + +#endif -- cgit v1.2.3 From bbab4f3bb7f528d2b8ccb5de9ae5f6ff3fb29684 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] i386: vMI timer patches VMI timer code. It works by taking over the local APIC clock when APIC is configured, which requires a couple hooks into the APIC code. The backend timer code could be commonized into the timer infrastructure, but there are some pieces missing (stolen time, in particular), and the exact semantics of when to do accounting for NO_IDLE need to be shared between different hypervisors as well. So for now, VMI timer is a separate module. [Adrian Bunk: cleanups] Subject: VMI timer patches Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Rusty Russell Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/apic.h | 2 + include/asm-i386/paravirt.h | 12 ++++++ include/asm-i386/time.h | 1 + include/asm-i386/timer.h | 2 + include/asm-i386/vmi_time.h | 103 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100644 include/asm-i386/vmi_time.h (limited to 'include') diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index 41a44319905..3a61206fd10 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -43,6 +43,8 @@ extern void generic_apic_probe(void); #define apic_write native_apic_write #define apic_write_atomic native_apic_write_atomic #define apic_read native_apic_read +#define setup_boot_clock setup_boot_APIC_clock +#define setup_secondary_clock setup_secondary_APIC_clock #endif static __inline fastcall void native_apic_write(unsigned long reg, diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 6ccf36499b2..12ef95924da 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -121,6 +121,8 @@ struct paravirt_ops void (fastcall *apic_write)(unsigned long reg, unsigned long v); void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); unsigned long (fastcall *apic_read)(unsigned long reg); + void (*setup_boot_clock)(void); + void (*setup_secondary_clock)(void); #endif void (fastcall *flush_tlb_user)(void); @@ -323,6 +325,16 @@ static inline unsigned long apic_read(unsigned long reg) { return paravirt_ops.apic_read(reg); } + +static inline void setup_boot_clock(void) +{ + paravirt_ops.setup_boot_clock(); +} + +static inline void setup_secondary_clock(void) +{ + paravirt_ops.setup_secondary_clock(); +} #endif #ifdef CONFIG_SMP diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h index ea8065af825..571b4294dc2 100644 --- a/include/asm-i386/time.h +++ b/include/asm-i386/time.h @@ -30,6 +30,7 @@ static inline int native_set_wallclock(unsigned long nowtime) #ifdef CONFIG_PARAVIRT #include +extern unsigned long long native_sched_clock(void); #else /* !CONFIG_PARAVIRT */ #define get_wallclock() native_get_wallclock() diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 1ee64e34cd3..4752c3a6a70 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -9,6 +9,8 @@ void setup_pit_timer(void); extern int pit_latch_buggy; extern int timer_ack; extern int no_timer_check; +extern unsigned long long (*custom_sched_clock)(void); +extern int no_sync_cmos_clock; extern int recalibrate_cpu_khz(void); #endif diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h new file mode 100644 index 00000000000..c1293121100 --- /dev/null +++ b/include/asm-i386/vmi_time.h @@ -0,0 +1,103 @@ +/* + * VMI Time wrappers + * + * Copyright (C) 2006, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to dhecht@vmware.com + * + */ + +#ifndef __VMI_TIME_H +#define __VMI_TIME_H + +/* + * Raw VMI call indices for timer functions + */ +#define VMI_CALL_GetCycleFrequency 66 +#define VMI_CALL_GetCycleCounter 67 +#define VMI_CALL_SetAlarm 68 +#define VMI_CALL_CancelAlarm 69 +#define VMI_CALL_GetWallclockTime 70 +#define VMI_CALL_WallclockUpdated 71 + +/* Cached VMI timer operations */ +extern struct vmi_timer_ops { + u64 (*get_cycle_frequency)(void); + u64 (*get_cycle_counter)(int); + u64 (*get_wallclock)(void); + int (*wallclock_updated)(void); + void (*set_alarm)(u32 flags, u64 expiry, u64 period); + void (*cancel_alarm)(u32 flags); +} vmi_timer_ops; + +/* Prototypes */ +extern void __init vmi_time_init(void); +extern unsigned long vmi_get_wallclock(void); +extern int vmi_set_wallclock(unsigned long now); +extern unsigned long long vmi_sched_clock(void); + +#ifdef CONFIG_X86_LOCAL_APIC +extern void __init vmi_timer_setup_boot_alarm(void); +extern void __init vmi_timer_setup_secondary_alarm(void); +extern void apic_vmi_timer_interrupt(void); +#endif + +#ifdef CONFIG_NO_IDLE_HZ +extern int vmi_stop_hz_timer(void); +extern void vmi_account_time_restart_hz_timer(void); +#endif + +/* + * When run under a hypervisor, a vcpu is always in one of three states: + * running, halted, or ready. The vcpu is in the 'running' state if it + * is executing. When the vcpu executes the halt interface, the vcpu + * enters the 'halted' state and remains halted until there is some work + * pending for the vcpu (e.g. an alarm expires, host I/O completes on + * behalf of virtual I/O). At this point, the vcpu enters the 'ready' + * state (waiting for the hypervisor to reschedule it). Finally, at any + * time when the vcpu is not in the 'running' state nor the 'halted' + * state, it is in the 'ready' state. + * + * Real time is advances while the vcpu is 'running', 'ready', or + * 'halted'. Stolen time is the time in which the vcpu is in the + * 'ready' state. Available time is the remaining time -- the vcpu is + * either 'running' or 'halted'. + * + * All three views of time are accessible through the VMI cycle + * counters. + */ + +/* The cycle counters. */ +#define VMI_CYCLES_REAL 0 +#define VMI_CYCLES_AVAILABLE 1 +#define VMI_CYCLES_STOLEN 2 + +/* The alarm interface 'flags' bits */ +#define VMI_ALARM_COUNTERS 2 + +#define VMI_ALARM_COUNTER_MASK 0x000000ff + +#define VMI_ALARM_WIRED_IRQ0 0x00000000 +#define VMI_ALARM_WIRED_LVTT 0x00010000 + +#define VMI_ALARM_IS_ONESHOT 0x00000000 +#define VMI_ALARM_IS_PERIODIC 0x00000100 + +#define CONFIG_VMI_ALARM_HZ 100 + +#endif -- cgit v1.2.3 From 7b3552024380f306a6c50d5105d18d9d4258fa4e Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 13 Feb 2007 13:26:21 +0100 Subject: [PATCH] i386: Profile pc badness Profile_pc was broken when using paravirtualization because the assumption the kernel was running at CPL 0 was violated, causing bad logic to read a random value off the stack. The only way to be in kernel lock functions is to be in kernel code, so validate that assumption explicitly by checking the CS value. We don't want to be fooled by BIOS / APM segments and try to read those stacks, so only match KERNEL_CS. I moved some stuff in segment.h to make it prettier. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen --- include/asm-i386/ptrace.h | 4 ++++ include/asm-i386/segment.h | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 1646996c73d..6002597b9e1 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -49,6 +49,10 @@ static inline int user_mode_vm(struct pt_regs *regs) { return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; } +static inline int v8086_mode(struct pt_regs *regs) +{ + return (regs->eflags & VM_MASK); +} #define instruction_pointer(regs) ((regs)->eip) #define regs_return_value(regs) ((regs)->eax) diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 3c796af3377..065f10bfa48 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -83,14 +83,8 @@ * The GDT has 32 entries */ #define GDT_ENTRIES 32 - #define GDT_SIZE (GDT_ENTRIES * 8) -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) - /* Simple and small GDT entries for booting only */ #define GDT_ENTRY_BOOT_CS 2 @@ -134,4 +128,17 @@ #ifndef CONFIG_PARAVIRT #define get_kernel_rpl() 0 #endif +/* + * Matching rules for certain types of segments. + */ + +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8) + +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) + +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) + #endif -- cgit v1.2.3 From 2ff2d3d74705d34ab71b21f54634fcf50d57bdd5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 13 Feb 2007 13:26:22 +0100 Subject: [PATCH] i386: add idle notifier Add a notifier mechanism to the low level idle loop. You can register a callback function which gets invoked on entry and exit from the low level idle loop. The low level idle loop is defined as the polling loop, low-power call, or the mwait instruction. Interrupts processed by the idle thread are not considered part of the low level loop. The notifier can be used to measure precisely how much is spent in useless execution (or low power mode). The perfmon subsystem uses it to turn on/off monitoring. Signed-off-by: stephane eranian Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/idle.h | 14 ++++++++++++++ include/asm-i386/processor.h | 8 ++++++++ 2 files changed, 22 insertions(+) create mode 100644 include/asm-i386/idle.h (limited to 'include') diff --git a/include/asm-i386/idle.h b/include/asm-i386/idle.h new file mode 100644 index 00000000000..87ab9391119 --- /dev/null +++ b/include/asm-i386/idle.h @@ -0,0 +1,14 @@ +#ifndef _ASM_I386_IDLE_H +#define _ASM_I386_IDLE_H 1 + +#define IDLE_START 1 +#define IDLE_END 2 + +struct notifier_block; +void idle_notifier_register(struct notifier_block *n); +void idle_notifier_unregister(struct notifier_block *n); + +void exit_idle(void); +void enter_idle(void); + +#endif diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 11bf899de8a..edfbe46a5e1 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -257,6 +257,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) : :"a" (eax), "c" (ecx)); } +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax,%ecx;" */ + asm volatile( + "sti; .byte 0x0f,0x01,0xc9;" + : :"a" (eax), "c" (ecx)); +} + extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); /* from system description table in BIOS. Mostly for MCA use, but -- cgit v1.2.3 From f9690982b8c2f9a2c65acdc113e758ec356676a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2007 13:26:22 +0100 Subject: [PATCH] i386: improve sched_clock() on i686 Clean up sched_clock() on i686: it will use the TSC if available and falls back to jiffies only if the user asked for it to be disabled via notsc or the CPU calibration code didnt figure out the right cpu_khz. This generally makes the scheduler timestamps more finegrained, on all hardware. (the current scheduler is pretty resistant against asynchronous sched_clock() values on different CPUs, it will allow at most up to a jiffy of jitter.) Also simplify sched_clock()'s check for TSC availability: propagate the desire and ability to use the TSC into the tsc_disable flag, previously this flag only indicated whether the notsc option was passed. This makes the rare low-res sched_clock() codepath a single branch off a read-mostly flag. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/bugs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 38f1aebbbdb..c90c7c49930 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -160,7 +160,7 @@ static void __init check_config(void) * If we configured ourselves for a TSC, we'd better have one! */ #ifdef CONFIG_X86_TSC - if (!cpu_has_tsc) + if (!cpu_has_tsc && !tsc_disable) panic("Kernel compiled for Pentium+, requires TSC feature!"); #endif -- cgit v1.2.3 From 53fee04f318222a3179ca5933d8bda82c1eef17a Mon Sep 17 00:00:00 2001 From: Rohit Seth Date: Tue, 13 Feb 2007 13:26:22 +0100 Subject: [PATCH] x86-64: Fix fake numa for x86_64 machines with big IO hole This patch resolves the issue of running with numa=fake=X on kernel command line on x86_64 machines that have big IO hole. While calculating the size of each node now we look at the total hole size in that range. Previously there were nodes that only had IO holes in them causing kernel boot problems. We now use the NODE_MIN_SIZE (64MB) as the minimum size of memory that any node must have. We reduce the number of allocated nodes if the number of nodes specified on kernel command line results in any node getting memory smaller than NODE_MIN_SIZE. This change allows the extra memory to be incremented in NODE_MIN_SIZE granule and uniformly distribute among as many nodes (called big nodes) as possible. [akpm@osdl.org: build fix] Signed-off-by: David Rientjes Signed-off-by: Paul Menage Signed-off-by: Rohit Seth Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-x86_64/e820.h | 1 + include/asm-x86_64/mmzone.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index 855fb4a454b..6216fa3f280 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h @@ -46,6 +46,7 @@ extern void e820_mark_nosave_regions(void); extern void e820_print_map(char *who); extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern unsigned long e820_hole_size(unsigned long start, unsigned long end); extern void e820_setup_gap(void); extern void e820_register_active_regions(int nid, diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index 39ef106986e..fb558fb1d21 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -47,5 +47,10 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) extern int pfn_valid(unsigned long pfn); #endif +#ifdef CONFIG_NUMA_EMU +#define FAKE_NODE_MIN_SIZE (64*1024*1024) +#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1ul)) +#endif + #endif #endif -- cgit v1.2.3 From c49c5330c9592f29a69bb2ea8f6e7fd5d9c151e8 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Tue, 13 Feb 2007 13:26:22 +0100 Subject: [PATCH] x86-64: Remove fastcall references in x86_64 code Unlike x86, x86_64 already passes arguments in registers. The use of regparm attribute makes no difference in produced code, and the use of fastcall just bloats the code. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-x86_64/hw_irq.h | 2 +- include/asm-x86_64/mutex.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index 179cce755aa..552df5f10a6 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -91,7 +91,7 @@ extern void enable_8259A_irq(unsigned int irq); extern int i8259A_irq_pending(unsigned int irq); extern void make_8259A_irq(unsigned int irq); extern void init_8259A(int aeoi); -extern void FASTCALL(send_IPI_self(int vector)); +extern void send_IPI_self(int vector); extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h index 16396b1de3e..6c2949a3c67 100644 --- a/include/asm-x86_64/mutex.h +++ b/include/asm-x86_64/mutex.h @@ -21,7 +21,7 @@ do { \ unsigned long dummy; \ \ typecheck(atomic_t *, v); \ - typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ \ __asm__ __volatile__( \ LOCK_PREFIX " decl (%%rdi) \n" \ @@ -47,7 +47,7 @@ do { \ */ static inline int __mutex_fastpath_lock_retval(atomic_t *count, - int fastcall (*fail_fn)(atomic_t *)) + int (*fail_fn)(atomic_t *)) { if (unlikely(atomic_dec_return(count) < 0)) return fail_fn(count); @@ -67,7 +67,7 @@ do { \ unsigned long dummy; \ \ typecheck(atomic_t *, v); \ - typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ + typecheck_fn(void (*)(atomic_t *), fail_fn); \ \ __asm__ __volatile__( \ LOCK_PREFIX " incl (%%rdi) \n" \ -- cgit v1.2.3 From a98f0dd34d94ea0b5f3816196bea5dba467827bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Feb 2007 13:26:23 +0100 Subject: [PATCH] x86-64: Allow to run a program when a machine check event is detected When a machine check event is detected (including a AMD RevF threshold overflow event) allow to run a "trigger" program. This allows user space to react to such events sooner. The trigger is configured using a new trigger entry in the machinecheck sysfs interface. It is currently shared between all CPUs. I also fixed the AMD threshold handler to run the machine check polling code immediately to actually log any events that might have caused the threshold interrupt. Also added some documentation for the mce sysfs interface. Signed-off-by: Andi Kleen --- include/asm-x86_64/mce.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 5a11146d6d9..177e92b4019 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h @@ -103,6 +103,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status); extern atomic_t mce_entry; +extern void do_machine_check(struct pt_regs *, long); + #endif #endif -- cgit v1.2.3 From 930f8b8bcde30b501fdf00fb7624aefb9bf35f47 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 13 Feb 2007 13:26:23 +0100 Subject: [PATCH] x86-64: remove get_pmd() Function is dead. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-x86_64/pgalloc.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 43d4c333a8b..4e28b6060a5 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h @@ -18,11 +18,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); } -static inline pmd_t *get_pmd(void) -{ - return (pmd_t *)get_zeroed_page(GFP_KERNEL); -} - static inline void pmd_free(pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); -- cgit v1.2.3 From b0957f1a3a7687bfaf5b0bfe402b50985ea2f06b Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Tue, 13 Feb 2007 13:26:23 +0100 Subject: [PATCH] x86-64: Fix preprocessor condition Old code was legal standard C, but apparently not sparse-C. Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: Andi Kleen --- include/asm-x86_64/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h index f5d84bb7c94..de2cd9a2303 100644 --- a/include/asm-x86_64/io.h +++ b/include/asm-x86_64/io.h @@ -100,7 +100,7 @@ __OUTS(l) #define IO_SPACE_LIMIT 0xffff -#if defined(__KERNEL__) && __x86_64__ +#if defined(__KERNEL__) && defined(__x86_64__) #include -- cgit v1.2.3 From 2fa8a050a0026eadbb39a2f281011991e00fe29a Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 13 Feb 2007 13:26:24 +0100 Subject: [PATCH] x86-64: define dma noncoherent API functions x86-64 is missing these: Signed-off-by: Jeff Garzik Signed-off-by: Andi Kleen --- include/asm-x86_64/dma-mapping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index 49dbab09ef2..d2af227f06d 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h @@ -66,6 +66,9 @@ static inline int dma_mapping_error(dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + extern void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, -- cgit v1.2.3 From 5d0e600d903caa09e790824cc5812f0d97113b23 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2007 13:26:24 +0100 Subject: [PATCH] x86: fix laptop bootup hang in init_acpi() During kernel bootup, a new T60 laptop (CoreDuo, 32-bit) hangs about 10%-20% of the time in acpi_init(): Calling initcall 0xc055ce1a: topology_init+0x0/0x2f() Calling initcall 0xc055d75e: mtrr_init_finialize+0x0/0x2c() Calling initcall 0xc05664f3: param_sysfs_init+0x0/0x175() Calling initcall 0xc014cb65: pm_sysrq_init+0x0/0x17() Calling initcall 0xc0569f99: init_bio+0x0/0xf4() Calling initcall 0xc056b865: genhd_device_init+0x0/0x50() Calling initcall 0xc056c4bd: fbmem_init+0x0/0x87() Calling initcall 0xc056dd74: acpi_init+0x0/0x1ee() It's a hard hang that not even an NMI could punch through! Frustratingly, adding printks or function tracing to the ACPI code made the hangs go away ... After some time an additional detail emerged: disabling the NMI watchdog made these occasional hangs go away. So i spent the better part of today trying to debug this and trying out various theories when i finally found the likely reason for the hang: if acpi_ns_initialize_devices() executes an _INI AML method and an NMI happens to hit that AML execution in the wrong moment, the machine would hang. (my theory is that this must be some sort of chipset setup method doing stores to chipset mmio registers?) Unfortunately given the characteristics of the hang it was sheer impossible to figure out which of the numerous AML methods is impacted by this problem. As a workaround i wrote an interface to disable chipset-based NMIs while executing _INI sections - and indeed this fixed the hang. I did a boot-loop of 100 separate reboots and none hung - while without the patch it would hang every 5-10 attempts. Out of caution i did not touch the nmi_watchdog=2 case (it's not related to the chipset anyway and didnt hang). I implemented this for both x86_64 and i686, tested the i686 laptop both with nmi_watchdog=1 [which triggered the hangs] and nmi_watchdog=2, and tested an Athlon64 box with the 64-bit kernel as well. Everything builds and works with the patch applied. Signed-off-by: Ingo Molnar Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Len Brown Signed-off-by: Andrew Morton --- include/linux/nmi.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index acb4ed13024..29af2d5df09 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -17,8 +17,15 @@ #ifdef ARCH_HAS_NMI_WATCHDOG #include extern void touch_nmi_watchdog(void); +extern void acpi_nmi_disable(void); +extern void acpi_nmi_enable(void); #else -# define touch_nmi_watchdog() touch_softlockup_watchdog() +static inline void touch_nmi_watchdog(void) +{ + touch_softlockup_watchdog(); +} +static inline void acpi_nmi_disable(void) { } +static inline void acpi_nmi_enable(void) { } #endif #ifndef trigger_all_cpu_backtrace -- cgit v1.2.3 From ee4eff6ff6cbfc8ce38131058a18802bf6206879 Mon Sep 17 00:00:00 2001 From: Benjamin Romer Date: Tue, 13 Feb 2007 13:26:25 +0100 Subject: [PATCH] x86-64: update IO-APIC dest field to 8-bit for xAPIC On the Unisys ES7000/ONE system, we encountered a problem where performing a kexec reboot or dump on any cell other than cell 0 causes the system timer to stop working, resulting in a hang during timer calibration in the new kernel. We traced the problem to one line of code in disable_IO_APIC(), which needs to restore the timer's IO-APIC configuration before rebooting. The code is currently using the 4-bit physical destination field, rather than using the 8-bit logical destination field, and it cuts off the upper 4 bits of the timer's APIC ID. If we change this to use the logical destination field, the timer works and we can kexec on the upper cells. This was tested on two different cells (0 and 2) in an ES7000/ONE system. For reference, the relevant Intel xAPIC spec is kept at ftp://download.intel.com/design/chipsets/e8501/datashts/30962001.pdf, specifically on page 334. Signed-off-by: Benjamin M Romer Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Eric W. Biederman" Cc: Vivek Goyal Signed-off-by: Andrew Morton --- include/asm-x86_64/io_apic.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h index 561ecbfd4cb..f4fb238c89f 100644 --- a/include/asm-x86_64/io_apic.h +++ b/include/asm-x86_64/io_apic.h @@ -85,18 +85,8 @@ struct IO_APIC_route_entry { mask : 1, /* 0: enabled, 1: disabled */ __reserved_2 : 15; - union { struct { __u32 - __reserved_1 : 24, - physical_dest : 4, - __reserved_2 : 4; - } physical; - - struct { __u32 - __reserved_1 : 24, - logical_dest : 8; - } logical; - } dest; - + __u32 __reserved_3 : 24, + dest : 8; } __attribute__ ((packed)); /* -- cgit v1.2.3 From 9f6026b8c308365d955faaf31dd0f457266d11f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Feb 2007 13:26:25 +0100 Subject: [PATCH] x86-64: Fix wrong gcc check in bitops.h gcc 5.0 will likely not have the constraint problem Signed-off-by: Andi Kleen --- include/asm-x86_64/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index 8da9609070f..d4dbbe5f7bd 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h @@ -7,7 +7,7 @@ #include -#if __GNUC__ < 4 || __GNUC_MINOR__ < 1 +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) /* Technically wrong, but this avoids compilation errors on some gcc versions. */ #define ADDR "=m" (*(volatile long *) addr) -- cgit v1.2.3 From 1a1eecd1c272f704f135a7d8060ec3da1c201b4c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Feb 2007 13:26:25 +0100 Subject: [PATCH] i386: Remove fastcall in paravirt.[ch] Not needed because fastcall is always default now Signed-off-by: Andi Kleen --- include/asm-i386/paravirt.h | 134 ++++++++++++++++++++++---------------------- 1 file changed, 67 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 12ef95924da..6317e0a4d73 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -59,102 +59,102 @@ struct paravirt_ops convention. This makes it easier to implement inline assembler replacements. */ - void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx, + void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); - unsigned long (fastcall *get_debugreg)(int regno); - void (fastcall *set_debugreg)(int regno, unsigned long value); + unsigned long (*get_debugreg)(int regno); + void (*set_debugreg)(int regno, unsigned long value); - void (fastcall *clts)(void); + void (*clts)(void); - unsigned long (fastcall *read_cr0)(void); - void (fastcall *write_cr0)(unsigned long); + unsigned long (*read_cr0)(void); + void (*write_cr0)(unsigned long); - unsigned long (fastcall *read_cr2)(void); - void (fastcall *write_cr2)(unsigned long); + unsigned long (*read_cr2)(void); + void (*write_cr2)(unsigned long); - unsigned long (fastcall *read_cr3)(void); - void (fastcall *write_cr3)(unsigned long); + unsigned long (*read_cr3)(void); + void (*write_cr3)(unsigned long); - unsigned long (fastcall *read_cr4_safe)(void); - unsigned long (fastcall *read_cr4)(void); - void (fastcall *write_cr4)(unsigned long); + unsigned long (*read_cr4_safe)(void); + unsigned long (*read_cr4)(void); + void (*write_cr4)(unsigned long); - unsigned long (fastcall *save_fl)(void); - void (fastcall *restore_fl)(unsigned long); - void (fastcall *irq_disable)(void); - void (fastcall *irq_enable)(void); - void (fastcall *safe_halt)(void); - void (fastcall *halt)(void); - void (fastcall *wbinvd)(void); + unsigned long (*save_fl)(void); + void (*restore_fl)(unsigned long); + void (*irq_disable)(void); + void (*irq_enable)(void); + void (*safe_halt)(void); + void (*halt)(void); + void (*wbinvd)(void); /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ - u64 (fastcall *read_msr)(unsigned int msr, int *err); - int (fastcall *write_msr)(unsigned int msr, u64 val); - - u64 (fastcall *read_tsc)(void); - u64 (fastcall *read_pmc)(void); - - void (fastcall *load_tr_desc)(void); - void (fastcall *load_gdt)(const struct Xgt_desc_struct *); - void (fastcall *load_idt)(const struct Xgt_desc_struct *); - void (fastcall *store_gdt)(struct Xgt_desc_struct *); - void (fastcall *store_idt)(struct Xgt_desc_struct *); - void (fastcall *set_ldt)(const void *desc, unsigned entries); - unsigned long (fastcall *store_tr)(void); - void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu); - void (fastcall *write_ldt_entry)(void *dt, int entrynum, + u64 (*read_msr)(unsigned int msr, int *err); + int (*write_msr)(unsigned int msr, u64 val); + + u64 (*read_tsc)(void); + u64 (*read_pmc)(void); + + void (*load_tr_desc)(void); + void (*load_gdt)(const struct Xgt_desc_struct *); + void (*load_idt)(const struct Xgt_desc_struct *); + void (*store_gdt)(struct Xgt_desc_struct *); + void (*store_idt)(struct Xgt_desc_struct *); + void (*set_ldt)(const void *desc, unsigned entries); + unsigned long (*store_tr)(void); + void (*load_tls)(struct thread_struct *t, unsigned int cpu); + void (*write_ldt_entry)(void *dt, int entrynum, u32 low, u32 high); - void (fastcall *write_gdt_entry)(void *dt, int entrynum, + void (*write_gdt_entry)(void *dt, int entrynum, u32 low, u32 high); - void (fastcall *write_idt_entry)(void *dt, int entrynum, + void (*write_idt_entry)(void *dt, int entrynum, u32 low, u32 high); - void (fastcall *load_esp0)(struct tss_struct *tss, + void (*load_esp0)(struct tss_struct *tss, struct thread_struct *thread); - void (fastcall *set_iopl_mask)(unsigned mask); + void (*set_iopl_mask)(unsigned mask); - void (fastcall *io_delay)(void); + void (*io_delay)(void); void (*const_udelay)(unsigned long loops); #ifdef CONFIG_X86_LOCAL_APIC - void (fastcall *apic_write)(unsigned long reg, unsigned long v); - void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); - unsigned long (fastcall *apic_read)(unsigned long reg); + void (*apic_write)(unsigned long reg, unsigned long v); + void (*apic_write_atomic)(unsigned long reg, unsigned long v); + unsigned long (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); #endif - void (fastcall *flush_tlb_user)(void); - void (fastcall *flush_tlb_kernel)(void); - void (fastcall *flush_tlb_single)(u32 addr); - - void (fastcall *alloc_pt)(u32 pfn); - void (fastcall *alloc_pd)(u32 pfn); - void (fastcall *alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); - void (fastcall *release_pt)(u32 pfn); - void (fastcall *release_pd)(u32 pfn); - - void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); - void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); - void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); - void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (*flush_tlb_user)(void); + void (*flush_tlb_kernel)(void); + void (*flush_tlb_single)(u32 addr); + + void (*alloc_pt)(u32 pfn); + void (*alloc_pd)(u32 pfn); + void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); + void (*release_pt)(u32 pfn); + void (*release_pd)(u32 pfn); + + void (*set_pte)(pte_t *ptep, pte_t pteval); + void (*set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); #ifdef CONFIG_X86_PAE - void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - void (fastcall *set_pud)(pud_t *pudp, pud_t pudval); - void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (fastcall *pmd_clear)(pmd_t *pmdp); + void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (*set_pud)(pud_t *pudp, pud_t pudval); + void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pmd_clear)(pmd_t *pmdp); #endif - void (fastcall *set_lazy_mode)(int mode); + void (*set_lazy_mode)(int mode); /* These two are jmp to, not actually called. */ - void (fastcall *irq_enable_sysexit)(void); - void (fastcall *iret)(void); + void (*irq_enable_sysexit)(void); + void (*iret)(void); - void (fastcall *startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); + void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); }; /* Mark a paravirt probe function. */ -- cgit v1.2.3 From 9fbbd4dd17d0712054368e5e939e28b2456bfe1b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Feb 2007 13:26:26 +0100 Subject: [PATCH] x86: Don't require the vDSO for handling a.out signals and in other strange binfmts. vDSO is not necessarily mapped there. Signed-off-by: Andi Kleen --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c1e82c51444..2d956cd566a 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -59,6 +59,7 @@ struct linux_binfmt { int (*load_shlib)(struct file *); int (*core_dump)(long signr, struct pt_regs * regs, struct file * file); unsigned long min_coredump; /* minimal dump size */ + int hasvdso; }; extern int register_binfmt(struct linux_binfmt *); -- cgit v1.2.3 From 105fddb862d3da2f414329ff7719794fb2bd706b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 13 Feb 2007 13:26:26 +0100 Subject: [PATCH] i386: Move mce_disabled to asm/mce.h Allows external actors to disable mce. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen =================================================================== --- include/asm-i386/mce.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-i386/mce.h b/include/asm-i386/mce.h index 7cc1a973bf0..b0a02ee34ff 100644 --- a/include/asm-i386/mce.h +++ b/include/asm-i386/mce.h @@ -3,3 +3,5 @@ extern void mcheck_init(struct cpuinfo_x86 *c); #else #define mcheck_init(c) do {} while(0) #endif + +extern int mce_disabled; -- cgit v1.2.3 From 2a57ff1a7051f0936b57342a57c25658d7ca3cc6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 13 Feb 2007 13:26:26 +0100 Subject: [PATCH] i386: Rename cpu_gdt_descr and remove extern declaration from smpboot.c When I implemented the DECLARE_PER_CPU(var) macros, I was careful that people couldn't use "var" in a non-percpu context, by prepending percpu__. I never considered that this would allow them to overload the same name for a per-cpu and a non-percpu variable. It is only one of many horrors in the i386 boot code, but let's rename the non-perpcu cpu_gdt_descr to early_gdt_descr (not boot_gdt_descr, that's something else...) Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen =================================================================== --- include/asm-i386/desc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index f398cc45644..050831f34f7 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -22,7 +22,7 @@ struct Xgt_desc_struct { extern struct Xgt_desc_struct idt_descr; DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); - +extern struct Xgt_desc_struct early_gdt_descr; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { -- cgit v1.2.3 From 40d22c1b5675e428b3f3f9a945d0bd62e94ca2f1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 13 Feb 2007 13:26:26 +0100 Subject: [PATCH] i386: Remove extern declaration from mm/discontig.c, put in header. Extern declarations belong in headers. Times, they are a'changin. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen =================================================================== --- include/asm-i386/setup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 76316275d6f..0e8077cbfda 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -77,6 +77,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map); void __init add_memory_region(unsigned long long start, unsigned long long size, int type); +extern unsigned long init_pg_tables_end; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 126b1922367fbe5513daa675a2abd13ed3917f4e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 13 Feb 2007 13:26:26 +0100 Subject: [PATCH] x86-64: Remove mk_pte_phys() - Convert last user to pfn_pte - Remove mk_pte_phys Suggested by Jan Beulich Signed-off-by: Andi Kleen --- include/asm-x86_64/pgtable.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 59901c690a0..730bd602841 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -359,15 +359,6 @@ static inline int pmd_large(pmd_t pte) { #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) #define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) -/* physical address -> PTE */ -static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) -{ - pte_t pte; - pte_val(pte) = physpage | pgprot_val(pgprot); - pte_val(pte) &= __supported_pte_mask; - return pte; -} - /* Change flags of a PTE */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { -- cgit v1.2.3