diff options
Diffstat (limited to 'arch')
63 files changed, 827 insertions, 506 deletions
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c index a85cb611ecd..f1268b8e6f9 100644 --- a/arch/ia64/kvm/kvm_lib.c +++ b/arch/ia64/kvm/kvm_lib.c @@ -11,5 +11,11 @@ * */ #undef CONFIG_MODULES +#include <linux/module.h> +#undef CONFIG_KALLSYMS +#undef EXPORT_SYMBOL +#undef EXPORT_SYMBOL_GPL +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) #include "../../../lib/vsprintf.c" #include "../../../lib/ctype.c" diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bf6cedfa05d..d00131ca083 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -62,7 +62,6 @@ config HAVE_LATENCYTOP_SUPPORT config TRACE_IRQFLAGS_SUPPORT bool - depends on PPC64 default y config LOCKDEP_SUPPORT diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 2f50acd11a6..3d80c3e9cf6 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -36,3 +36,13 @@ zImage.pseries zconf.h zlib.h zutil.h +fdt.c +fdt.h +fdt_ro.c +fdt_rw.c +fdt_strerror.c +fdt_sw.c +fdt_wip.c +libfdt.h +libfdt_internal.h + diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts index 26549fca2ed..49ac36b16dd 100644 --- a/arch/powerpc/boot/dts/amigaone.dts +++ b/arch/powerpc/boot/dts/amigaone.dts @@ -70,8 +70,8 @@ devsel-speed = <0x00000001>; min-grant = <0>; max-latency = <0>; - /* First 64k for I/O at 0x0 on PCI mapped to 0x0 on ISA. */ - ranges = <0x00000001 0 0x01000000 0 0x00000000 0x00010000>; + /* First 4k for I/O at 0x0 on PCI mapped to 0x0 on ISA. */ + ranges = <0x00000001 0 0x01000000 0 0x00000000 0x00001000>; interrupt-parent = <&i8259>; #interrupt-cells = <2>; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/mpc8569mds.dts b/arch/powerpc/boot/dts/mpc8569mds.dts index a8dcb018c4a..a680165292f 100644 --- a/arch/powerpc/boot/dts/mpc8569mds.dts +++ b/arch/powerpc/boot/dts/mpc8569mds.dts @@ -253,6 +253,7 @@ /* Filled in by U-Boot */ clock-frequency = <0>; status = "disabled"; + sdhci,1-bit-only; }; crypto@30000 { diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index 2ff798744c1..7685ffde882 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -598,8 +598,6 @@ typedef struct risc_timer_pram { #define CICR_IEN ((uint)0x00000080) /* Int. enable */ #define CICR_SPS ((uint)0x00000001) /* SCC Spread */ -#define IMAP_ADDR (get_immrbase()) - #define CPM_PIN_INPUT 0 #define CPM_PIN_OUTPUT 1 #define CPM_PIN_PRIMARY 0 diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 3d9e887c3c0..b44aaabdd1a 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -309,7 +309,9 @@ static inline void dma_sync_single_for_cpu(struct device *dev, struct dma_mapping_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); - dma_ops->sync_single_range_for_cpu(dev, dma_handle, 0, + + if (dma_ops->sync_single_range_for_cpu) + dma_ops->sync_single_range_for_cpu(dev, dma_handle, 0, size, direction); } @@ -320,7 +322,9 @@ static inline void dma_sync_single_for_device(struct device *dev, struct dma_mapping_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); - dma_ops->sync_single_range_for_device(dev, dma_handle, + + if (dma_ops->sync_single_range_for_device) + dma_ops->sync_single_range_for_device(dev, dma_handle, 0, size, direction); } @@ -331,7 +335,9 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct dma_mapping_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); - dma_ops->sync_sg_for_cpu(dev, sgl, nents, direction); + + if (dma_ops->sync_sg_for_cpu) + dma_ops->sync_sg_for_cpu(dev, sgl, nents, direction); } static inline void dma_sync_sg_for_device(struct device *dev, @@ -341,7 +347,9 @@ static inline void dma_sync_sg_for_device(struct device *dev, struct dma_mapping_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); - dma_ops->sync_sg_for_device(dev, sgl, nents, direction); + + if (dma_ops->sync_sg_for_device) + dma_ops->sync_sg_for_device(dev, sgl, nents, direction); } static inline void dma_sync_single_range_for_cpu(struct device *dev, @@ -351,7 +359,9 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev, struct dma_mapping_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); - dma_ops->sync_single_range_for_cpu(dev, dma_handle, + + if (dma_ops->sync_single_range_for_cpu) + dma_ops->sync_single_range_for_cpu(dev, dma_handle, offset, size, direction); } @@ -362,7 +372,9 @@ static inline void dma_sync_single_range_for_device(struct device *dev, struct dma_mapping_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); - dma_ops->sync_single_range_for_device(dev, dma_handle, offset, + + if (dma_ops->sync_single_range_for_device) + dma_ops->sync_single_range_for_device(dev, dma_handle, offset, size, direction); } #else /* CONFIG_PPC_NEED_DMA_SYNC_OPS */ diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 684a73f4324..a74c4ee6c02 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -22,9 +22,7 @@ #ifdef __KERNEL__ -#include <linux/init.h> #include <linux/interrupt.h> -#include <linux/highmem.h> #include <asm/kmap_types.h> #include <asm/tlbflush.h> #include <asm/page.h> @@ -62,6 +60,9 @@ extern pte_t *pkmap_page_table; extern void *kmap_high(struct page *page); extern void kunmap_high(struct page *page); +extern void *kmap_atomic_prot(struct page *page, enum km_type type, + pgprot_t prot); +extern void kunmap_atomic(void *kvaddr, enum km_type type); static inline void *kmap(struct page *page) { @@ -79,62 +80,11 @@ static inline void kunmap(struct page *page) kunmap_high(page); } -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -static inline void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) -{ - unsigned int idx; - unsigned long vaddr; - - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - - debug_kmap_atomic(type); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif - __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); - local_flush_tlb_page(NULL, vaddr); - - return (void*) vaddr; -} - static inline void *kmap_atomic(struct page *page, enum km_type type) { return kmap_atomic_prot(page, type, kmap_prot); } -static inline void kunmap_atomic(void *kvaddr, enum km_type type) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - - if (vaddr < __fix_to_virt(FIX_KMAP_END)) { - pagefault_enable(); - return; - } - - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(NULL, vaddr); -#endif - pagefault_enable(); -} - static inline struct page *kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long) ptr; @@ -148,6 +98,7 @@ static inline struct page *kmap_atomic_to_page(void *ptr) return pte_page(*pte); } + #define flush_cache_kmaps() flush_cache_all() #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 867ab8ed69b..8b505eaaa38 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -68,13 +68,13 @@ static inline int irqs_disabled_flags(unsigned long flags) #if defined(CONFIG_BOOKE) #define SET_MSR_EE(x) mtmsr(x) -#define local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") +#define raw_local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") #else #define SET_MSR_EE(x) mtmsr(x) -#define local_irq_restore(flags) mtmsr(flags) +#define raw_local_irq_restore(flags) mtmsr(flags) #endif -static inline void local_irq_disable(void) +static inline void raw_local_irq_disable(void) { #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 0": : :"memory"); @@ -86,7 +86,7 @@ static inline void local_irq_disable(void) #endif } -static inline void local_irq_enable(void) +static inline void raw_local_irq_enable(void) { #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 1": : :"memory"); @@ -98,7 +98,7 @@ static inline void local_irq_enable(void) #endif } -static inline void local_irq_save_ptr(unsigned long *flags) +static inline void raw_local_irq_save_ptr(unsigned long *flags) { unsigned long msr; msr = mfmsr(); @@ -110,12 +110,12 @@ static inline void local_irq_save_ptr(unsigned long *flags) #endif } -#define local_save_flags(flags) ((flags) = mfmsr()) -#define local_irq_save(flags) local_irq_save_ptr(&flags) -#define irqs_disabled() ((mfmsr() & MSR_EE) == 0) +#define raw_local_save_flags(flags) ((flags) = mfmsr()) +#define raw_local_irq_save(flags) raw_local_irq_save_ptr(&flags) +#define raw_irqs_disabled() ((mfmsr() & MSR_EE) == 0) +#define raw_irqs_disabled_flags(flags) (((flags) & MSR_EE) == 0) -#define hard_irq_enable() local_irq_enable() -#define hard_irq_disable() local_irq_disable() +#define hard_irq_disable() raw_local_irq_disable() static inline int irqs_disabled_flags(unsigned long flags) { diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index e05d26fa372..82b72207c51 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -47,7 +47,8 @@ * generic accessors and iterators here */ #define __real_pte(e,p) ((real_pte_t) { \ - (e), pte_val(*((p) + PTRS_PER_PTE)) }) + (e), ((e) & _PAGE_COMBO) ? \ + (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) #define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) #define __rpte_to_pte(r) ((r).pte) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 01c12339b30..168fce72620 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -58,7 +58,7 @@ struct rtas_t { unsigned long entry; /* physical address pointer */ unsigned long base; /* physical address pointer */ unsigned long size; - spinlock_t lock; + raw_spinlock_t lock; struct rtas_args args; struct device_node *dev; /* virtual address pointer */ }; @@ -245,5 +245,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg) (devfn << 8) | (reg & 0xff); } +extern void __cpuinit rtas_give_timebase(void); +extern void __cpuinit rtas_take_timebase(void); + #endif /* __KERNEL__ */ #endif /* _POWERPC_RTAS_H */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4dd38f12915..3cadba60a4b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -191,11 +191,49 @@ transfer_to_handler_cont: mflr r9 lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ +#ifdef CONFIG_TRACE_IRQFLAGS + lis r12,reenable_mmu@h + ori r12,r12,reenable_mmu@l + mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r10 + SYNC + RFI +reenable_mmu: /* re-enable mmu so we can */ + mfmsr r10 + lwz r12,_MSR(r1) + xor r10,r10,r12 + andi. r10,r10,MSR_EE /* Did EE change? */ + beq 1f + + /* Save handler and return address into the 2 unused words + * of the STACK_FRAME_OVERHEAD (sneak sneak sneak). Everything + * else can be recovered from the pt_regs except r3 which for + * normal interrupts has been set to pt_regs and for syscalls + * is an argument, so we temporarily use ORIG_GPR3 to save it + */ + stw r9,8(r1) + stw r11,12(r1) + stw r3,ORIG_GPR3(r1) + bl trace_hardirqs_off + lwz r0,GPR0(r1) + lwz r3,ORIG_GPR3(r1) + lwz r4,GPR4(r1) + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + lwz r9,8(r1) + lwz r11,12(r1) +1: mtctr r11 + mtlr r9 + bctr /* jump to handler */ +#else /* CONFIG_TRACE_IRQFLAGS */ mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 mtlr r9 SYNC RFI /* jump to handler, enable MMU */ +#endif /* CONFIG_TRACE_IRQFLAGS */ #if defined (CONFIG_6xx) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -251,6 +289,31 @@ _GLOBAL(DoSyscall) #ifdef SHOW_SYSCALLS bl do_show_syscall #endif /* SHOW_SYSCALLS */ +#ifdef CONFIG_TRACE_IRQFLAGS + /* Return from syscalls can (and generally will) hard enable + * interrupts. You aren't supposed to call a syscall with + * interrupts disabled in the first place. However, to ensure + * that we get it right vs. lockdep if it happens, we force + * that hard enable here with appropriate tracing if we see + * that we have been called with interrupts off + */ + mfmsr r11 + andi. r12,r11,MSR_EE + bne+ 1f + /* We came in with interrupts disabled, we enable them now */ + bl trace_hardirqs_on + mfmsr r11 + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + lwz r4,GPR4(r1) + ori r11,r11,MSR_EE + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + mtmsr r11 +1: +#endif /* CONFIG_TRACE_IRQFLAGS */ rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_T_OR_A @@ -275,6 +338,7 @@ ret_from_syscall: rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ /* disable interrupts so current_thread_info()->flags can't change */ LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + /* Note: We don't bother telling lockdep about it */ SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) @@ -288,6 +352,19 @@ ret_from_syscall: oris r11,r11,0x1000 /* Set SO bit in CR */ stw r11,_CCR(r1) syscall_exit_cont: + lwz r8,_MSR(r1) +#ifdef CONFIG_TRACE_IRQFLAGS + /* If we are going to return from the syscall with interrupts + * off, we trace that here. It shouldn't happen though but we + * want to catch the bugger if it does right ? + */ + andi. r10,r8,MSR_EE + bne+ 1f + stw r3,GPR3(r1) + bl trace_hardirqs_off + lwz r3,GPR3(r1) +1: +#endif /* CONFIG_TRACE_IRQFLAGS */ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) /* If the process has its own DBCR0 value, load it up. The internal debug mode bit tells us that dbcr0 should be loaded. */ @@ -311,7 +388,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtlr r4 mtcr r5 lwz r7,_NIP(r1) - lwz r8,_MSR(r1) FIX_SRR1(r8, r0) lwz r2,GPR2(r1) lwz r1,GPR1(r1) @@ -394,7 +470,9 @@ syscall_exit_work: andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq ret_from_except - /* Re-enable interrupts */ + /* Re-enable interrupts. There is no need to trace that with + * lockdep as we are supposed to have IRQs on at this point + */ ori r10,r10,MSR_EE SYNC MTMSRD(r10) @@ -705,6 +783,7 @@ ret_from_except: /* Hard-disable interrupts so that current_thread_info()->flags * can't change between when we test it and when we return * from the interrupt. */ + /* Note: We don't bother telling lockdep about it */ LOAD_MSR_KERNEL(r10,MSR_KERNEL) SYNC /* Some chip revs have problems here... */ MTMSRD(r10) /* disable interrupts */ @@ -744,11 +823,24 @@ resume_kernel: beq+ restore andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ +#ifdef CONFIG_TRACE_IRQFLAGS + /* Lockdep thinks irqs are enabled, we need to call + * preempt_schedule_irq with IRQs off, so we inform lockdep + * now that we -did- turn them off already + */ + bl trace_hardirqs_off +#endif 1: bl preempt_schedule_irq rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r3,TI_FLAGS(r9) andi. r0,r3,_TIF_NEED_RESCHED bne- 1b +#ifdef CONFIG_TRACE_IRQFLAGS + /* And now, to properly rebalance the above, we tell lockdep they + * are being turned back on, which will happen when we return + */ + bl trace_hardirqs_on +#endif #else resume_kernel: #endif /* CONFIG_PREEMPT */ @@ -765,6 +857,28 @@ restore: stw r6,icache_44x_need_flush@l(r4) 1: #endif /* CONFIG_44x */ + + lwz r9,_MSR(r1) +#ifdef CONFIG_TRACE_IRQFLAGS + /* Lockdep doesn't know about the fact that IRQs are temporarily turned + * off in this assembly code while peeking at TI_FLAGS() and such. However + * we need to inform it if the exception turned interrupts off, and we + * are about to trun them back on. + * + * The problem here sadly is that we don't know whether the exceptions was + * one that turned interrupts off or not. So we always tell lockdep about + * turning them on here when we go back to wherever we came from with EE + * on, even if that may meen some redudant calls being tracked. Maybe later + * we could encode what the exception did somewhere or test the exception + * type in the pt_regs but that sounds overkill + */ + andi. r10,r9,MSR_EE + beq 1f + bl trace_hardirqs_on + lwz r9,_MSR(r1) +1: +#endif /* CONFIG_TRACE_IRQFLAGS */ + lwz r0,GPR0(r1) lwz r2,GPR2(r1) REST_4GPRS(3, r1) @@ -782,7 +896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - lwz r9,_MSR(r1) andi. r10,r9,MSR_RI /* check if this exception occurred */ beql nonrecoverable /* at a bad place (MSR:RI = 0) */ @@ -805,7 +918,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) MTMSRD(r10) /* clear the RI bit */ .globl exc_exit_restart exc_exit_restart: - lwz r9,_MSR(r1) lwz r12,_NIP(r1) FIX_SRR1(r9,r10) mtspr SPRN_SRR0,r12 @@ -1035,11 +1147,18 @@ do_work: /* r10 contains MSR_KERNEL here */ beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ + /* Note: We don't need to inform lockdep that we are enabling + * interrupts here. As far as it knows, they are already enabled + */ ori r10,r10,MSR_EE SYNC MTMSRD(r10) /* hard-enable interrupts */ bl schedule recheck: + /* Note: And we don't tell it we are disabling them again + * neither. Those disable/enable cycles used to peek at + * TI_FLAGS aren't advertised. + */ LOAD_MSR_KERNEL(r10,MSR_KERNEL) SYNC MTMSRD(r10) /* disable interrupts */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 48469463f89..fc213294275 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -1124,9 +1124,8 @@ mmu_off: RFI /* - * Use the first pair of BAT registers to map the 1st 16MB - * of RAM to PAGE_OFFSET. From this point on we can't safely - * call OF any more. + * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET + * (we keep one for debugging) and on others, we use one 256M BAT. */ initial_bats: lis r11,PAGE_OFFSET@h @@ -1136,12 +1135,16 @@ initial_bats: bne 4f ori r11,r11,4 /* set up BAT registers for 601 */ li r8,0x7f /* valid, block length = 8MB */ - oris r9,r11,0x800000@h /* set up BAT reg for 2nd 8M */ - oris r10,r8,0x800000@h /* set up BAT reg for 2nd 8M */ mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */ mtspr SPRN_IBAT0L,r8 /* lower BAT register */ - mtspr SPRN_IBAT1U,r9 - mtspr SPRN_IBAT1L,r10 + addis r11,r11,0x800000@h + addis r8,r8,0x800000@h + mtspr SPRN_IBAT1U,r11 + mtspr SPRN_IBAT1L,r8 + addis r11,r11,0x800000@h + addis r8,r8,0x800000@h + mtspr SPRN_IBAT2U,r11 + mtspr SPRN_IBAT2L,r8 isync blr diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index fa983a59c4c..a359cb08e90 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -76,7 +76,7 @@ struct of_device *of_device_alloc(struct device_node *np, dev->dev.archdata.of_node = np; if (bus_id) - dev_set_name(&dev->dev, bus_id); + dev_set_name(&dev->dev, "%s", bus_id); else of_device_make_bus_id(dev); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3e7135bbe40..892a9f2e6d7 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -528,7 +528,7 @@ void show_regs(struct pt_regs * regs) for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) - printk("\n" KERN_INFO "GPR%02d: ", i); + printk("\nGPR%02d: ", i); printk(REG " ", regs->gpr[i]); if (i == LAST_VOLATILE && !FULL_REGS(regs)) break; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ee4c7609b64..c434823b8c8 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -38,9 +38,10 @@ #include <asm/syscalls.h> #include <asm/smp.h> #include <asm/atomic.h> +#include <asm/time.h> struct rtas_t rtas = { - .lock = SPIN_LOCK_UNLOCKED + .lock = __RAW_SPIN_LOCK_UNLOCKED }; EXPORT_SYMBOL(rtas); @@ -67,6 +68,28 @@ unsigned long rtas_rmo_buf; void (*rtas_flash_term_hook)(int); EXPORT_SYMBOL(rtas_flash_term_hook); +/* RTAS use home made raw locking instead of spin_lock_irqsave + * because those can be called from within really nasty contexts + * such as having the timebase stopped which would lockup with + * normal locks and spinlock debugging enabled + */ +static unsigned long lock_rtas(void) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + __raw_spin_lock_flags(&rtas.lock, flags); + return flags; +} + +static void unlock_rtas(unsigned long flags) +{ + __raw_spin_unlock(&rtas.lock); + local_irq_restore(flags); + preempt_enable(); +} + /* * call_rtas_display_status and call_rtas_display_status_delay * are designed only for very early low-level debugging, which @@ -79,7 +102,7 @@ static void call_rtas_display_status(char c) if (!rtas.base) return; - spin_lock_irqsave(&rtas.lock, s); + s = lock_rtas(); args->token = 10; args->nargs = 1; @@ -89,7 +112,7 @@ static void call_rtas_display_status(char c) enter_rtas(__pa(args)); - spin_unlock_irqrestore(&rtas.lock, s); + unlock_rtas(s); } static void call_rtas_display_status_delay(char c) @@ -411,8 +434,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) return -1; - /* Gotta do something different here, use global lock for now... */ - spin_lock_irqsave(&rtas.lock, s); + s = lock_rtas(); rtas_args = &rtas.args; rtas_args->token = token; @@ -439,8 +461,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) outputs[i] = rtas_args->rets[i+1]; ret = (nret > 0)? rtas_args->rets[0]: 0; - /* Gotta do something different here, use global lock for now... */ - spin_unlock_irqrestore(&rtas.lock, s); + unlock_rtas(s); if (buff_copy) { log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); @@ -837,7 +858,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) buff_copy = get_errorlog_buffer(); - spin_lock_irqsave(&rtas.lock, flags); + flags = lock_rtas(); rtas.args = args; enter_rtas(__pa(&rtas.args)); @@ -848,7 +869,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) if (args.rets[0] == -1) errbuf = __fetch_rtas_last_error(buff_copy); - spin_unlock_irqrestore(&rtas.lock, flags); + unlock_rtas(flags); if (buff_copy) { if (errbuf) @@ -951,3 +972,33 @@ int __init early_init_dt_scan_rtas(unsigned long node, /* break now */ return 1; } + +static raw_spinlock_t timebase_lock; +static u64 timebase = 0; + +void __cpuinit rtas_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + hard_irq_disable(); + __raw_spin_lock(&timebase_lock); + rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + timebase = get_tb(); + __raw_spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + local_irq_restore(flags); +} + +void __cpuinit rtas_take_timebase(void) +{ + while (!timebase) + barrier(); + __raw_spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + __raw_spin_unlock(&timebase_lock); +} diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1d154248cf4..e1e3059cf34 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -119,6 +119,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) */ notrace void __init machine_init(unsigned long dt_ptr) { + lockdep_init(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 65484b2200b..0b47de07302 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -68,7 +68,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; -static volatile unsigned int cpu_callin_map[NR_CPUS]; +/* Can't be static due to PowerMac hackery */ +volatile unsigned int cpu_callin_map[NR_CPUS]; int smt_enabled_at_boot = 1; diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 0362a891e54..acb74a17bbb 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -219,7 +219,7 @@ void udbg_init_pas_realmode(void) #ifdef CONFIG_PPC_EARLY_DEBUG_44x #include <platforms/44x/44x.h> -static int udbg_44x_as1_flush(void) +static void udbg_44x_as1_flush(void) { if (udbg_comport) { while ((as1_readb(&udbg_comport->lsr) & LSR_THRE) == 0) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 2d2192e48de..3e68363405b 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c new file mode 100644 index 00000000000..c2186c74c85 --- /dev/null +++ b/arch/powerpc/mm/highmem.c @@ -0,0 +1,77 @@ +/* + * highmem.c: virtual kernel memory mappings for high memory + * + * PowerPC version, stolen from the i386 version. + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terrabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + * + * Reworked for PowerPC by various contributors. Moved from + * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp. + */ + +#include <linux/highmem.h> +#include <linux/module.h> + +/* + * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap + * gives a more generic (and caching) interface. But kmap_atomic can + * be used in IRQ contexts, so in some (very limited) cases we need + * it. + */ +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +{ + unsigned int idx; + unsigned long vaddr; + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + debug_kmap_atomic(type); + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(kmap_pte-idx))); +#endif + __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); + local_flush_tlb_page(NULL, vaddr); + + return (void*) vaddr; +} +EXPORT_SYMBOL(kmap_atomic_prot); + +void kunmap_atomic(void *kvaddr, enum km_type type) +{ +#ifdef CONFIG_DEBUG_HIGHMEM + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + if (vaddr < __fix_to_virt(FIX_KMAP_END)) { + pagefault_enable(); + return; + } + + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(&init_mm, vaddr, kmap_pte-idx); + local_flush_tlb_page(NULL, vaddr); +#endif + pagefault_enable(); +} +EXPORT_SYMBOL(kunmap_atomic); diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 42e09a9f77e..0362c88f47d 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -16,6 +16,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/of_gpio.h> +#include <linux/of_i2c.h> #include <asm/machdep.h> #include <asm/prom.h> @@ -65,7 +66,6 @@ define_machine(warp) { static u32 post_info; -/* I am not sure this is the best place for this... */ static int __init warp_post_info(void) { struct device_node *np; @@ -194,9 +194,9 @@ static int pika_setup_leds(void) return 0; } -static void pika_setup_critical_temp(struct i2c_client *client) +static void pika_setup_critical_temp(struct device_node *np, + struct i2c_client *client) { - struct device_node *np; int irq, rc; /* Do this before enabling critical temp interrupt since we @@ -208,14 +208,7 @@ static void pika_setup_critical_temp(struct i2c_client *client) i2c_smbus_write_byte_data(client, 2, 65); /* Thigh */ i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */ - np = of_find_compatible_node(NULL, NULL, "adi,ad7414"); - if (np == NULL) { - printk(KERN_ERR __FILE__ ": Unable to find ad7414\n"); - return; - } - irq = irq_of_parse_and_map(np, 0); - of_node_put(np); if (irq == NO_IRQ) { printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n"); return; @@ -244,32 +237,24 @@ static inline void pika_dtm_check_fan(void __iomem *fpga) static int pika_dtm_thread(void __iomem *fpga) { - struct i2c_adapter *adap; + struct device_node *np; struct i2c_client *client; - /* We loop in case either driver was compiled as a module and - * has not been insmoded yet. - */ - while (!(adap = i2c_get_adapter(0))) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - } - - while (1) { - list_for_each_entry(client, &adap->clients, list) - if (client->addr == 0x4a) - goto found_it; + np = of_find_compatible_node(NULL, NULL, "adi,ad7414"); + if (np == NULL) + return -ENOENT; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); + client = of_find_i2c_device_by_node(np); + if (client == NULL) { + of_node_put(np); + return -ENOENT; } -found_it: - pika_setup_critical_temp(client); + pika_setup_critical_temp(np, client); - i2c_put_adapter(adap); + of_node_put(np); - printk(KERN_INFO "PIKA DTM thread running.\n"); + printk(KERN_INFO "Warp DTM thread running.\n"); while (!kthread_should_stop()) { int val; @@ -291,7 +276,6 @@ found_it: return 0; } - static int __init pika_dtm_start(void) { struct task_struct *dtm_thread; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 77f90b35635..60ed9c067b1 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -285,6 +285,7 @@ static struct of_device_id mpc85xx_ids[] = { { .type = "qe", }, { .compatible = "fsl,qe", }, { .compatible = "gianfar", }, + { .compatible = "fsl,rapidio-delta", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index cc0b0db8a6f..62c592ede64 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -52,20 +52,19 @@ smp_85xx_kick_cpu(int nr) pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr); - local_irq_save(flags); - np = of_get_cpu_node(nr, NULL); cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL); if (cpu_rel_addr == NULL) { printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr); - local_irq_restore(flags); return; } /* Map the spin table */ bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY); + local_irq_save(flags); + out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr); out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start)); @@ -73,10 +72,10 @@ smp_85xx_kick_cpu(int nr) while ((__secondary_hold_acknowledge != nr) && (++n < 1000)) mdelay(1); - iounmap(bptr_vaddr); - local_irq_restore(flags); + iounmap(bptr_vaddr); + pr_debug("waited %d msecs for CPU #%d.\n", n, nr); } diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c index d0e8443b12c..747d8fb3ab8 100644 --- a/arch/powerpc/platforms/85xx/socrates.c +++ b/arch/powerpc/platforms/85xx/socrates.c @@ -102,10 +102,11 @@ static struct of_device_id __initdata socrates_of_bus_ids[] = { {}, }; -static void __init socrates_init(void) +static int __init socrates_publish_devices(void) { - of_platform_bus_probe(NULL, socrates_of_bus_ids, NULL); + return of_platform_bus_probe(NULL, socrates_of_bus_ids, NULL); } +machine_device_initcall(socrates, socrates_publish_devices); /* * Called very early, device-tree isn't unflattened @@ -124,7 +125,6 @@ define_machine(socrates) { .name = "Socrates", .probe = socrates_probe, .setup_arch = socrates_setup_arch, - .init = socrates_init, .init_IRQ = socrates_pic_init, .get_irq = mpic_get_irq, .restart = fsl_rstcr_restart, diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index ee01532786e..1b426050a2f 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -32,7 +32,6 @@ #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> -#include <linux/of_platform.h> /* A few bit definitions needed for fixups on some boards */ #define MPC85xx_L2CTL_L2E 0x80000000 /* L2 enable */ diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index 9046803c827..bc97fada48c 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -36,7 +36,6 @@ #include <asm/prom.h> #include <asm/smp.h> #include <asm/paca.h> -#include <asm/time.h> #include <asm/machdep.h> #include <asm/cputable.h> #include <asm/firmware.h> @@ -140,31 +139,6 @@ static void __devinit smp_cell_setup_cpu(int cpu) mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); } -static DEFINE_SPINLOCK(timebase_lock); -static unsigned long timebase = 0; - -static void __devinit cell_give_timebase(void) -{ - spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); - timebase = get_tb(); - spin_unlock(&timebase_lock); - - while (timebase) - barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); -} - -static void __devinit cell_take_timebase(void) -{ - while (!timebase) - barrier(); - spin_lock(&timebase_lock); - set_tb(timebase >> 32, timebase & 0xffffffff); - timebase = 0; - spin_unlock(&timebase_lock); -} - static void __devinit smp_cell_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -224,8 +198,8 @@ void __init smp_init_cell(void) /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { - smp_ops->give_timebase = cell_give_timebase; - smp_ops->take_timebase = cell_take_timebase; + smp_ops->give_timebase = rtas_give_timebase; + smp_ops->take_timebase = rtas_take_timebase; } DBG(" <- smp_init_cell()\n"); diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index 10a4a4d063b..02cafecc90e 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -26,7 +26,6 @@ #include <asm/io.h> #include <asm/prom.h> #include <asm/smp.h> -#include <asm/time.h> #include <asm/machdep.h> #include <asm/mpic.h> #include <asm/rtas.h> @@ -42,40 +41,12 @@ static void __devinit smp_chrp_setup_cpu(int cpu_nr) mpic_setup_this_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); -static unsigned int timebase_upper = 0, timebase_lower = 0; - -void __devinit smp_chrp_give_timebase(void) -{ - spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); - timebase_upper = get_tbu(); - timebase_lower = get_tbl(); - spin_unlock(&timebase_lock); - - while (timebase_upper || timebase_lower) - barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); -} - -void __devinit smp_chrp_take_timebase(void) -{ - while (!(timebase_upper || timebase_lower)) - barrier(); - spin_lock(&timebase_lock); - set_tb(timebase_upper, timebase_lower); - timebase_upper = 0; - timebase_lower = 0; - spin_unlock(&timebase_lock); - printk("CPU %i taken timebase\n", smp_processor_id()); -} - /* CHRP with openpic */ struct smp_ops_t chrp_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_chrp_kick_cpu, .setup_cpu = smp_chrp_setup_cpu, - .give_timebase = smp_chrp_give_timebase, - .take_timebase = smp_chrp_take_timebase, + .give_timebase = rtas_give_timebase, + .take_timebase = rtas_take_timebase, }; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 153051eb6d9..a4619347aa7 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -71,20 +71,25 @@ static void pas_restart(char *cmd) } #ifdef CONFIG_SMP -static DEFINE_SPINLOCK(timebase_lock); +static raw_spinlock_t timebase_lock; static unsigned long timebase; static void __devinit pas_give_timebase(void) { - spin_lock(&timebase_lock); + unsigned long flags; + + local_irq_save(flags); + hard_irq_disable(); + __raw_spin_lock(&timebase_lock); mtspr(SPRN_TBCTL, TBCTL_FREEZE); isync(); timebase = get_tb(); - spin_unlock(&timebase_lock); + __raw_spin_unlock(&timebase_lock); while (timebase) barrier(); mtspr(SPRN_TBCTL, TBCTL_RESTART); + local_irq_restore(flags); } static void __devinit pas_take_timebase(void) @@ -92,10 +97,10 @@ static void __devinit pas_take_timebase(void) while (!timebase) smp_rmb(); - spin_lock(&timebase_lock); + __raw_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - spin_unlock(&timebase_lock); + __raw_spin_unlock(&timebase_lock); } struct smp_ops_t pas_smp_ops = { diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 86f69a4eb49..c2052265636 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -103,11 +103,6 @@ unsigned long smu_cmdbuf_abs; EXPORT_SYMBOL(smu_cmdbuf_abs); #endif -#ifdef CONFIG_SMP -extern struct smp_ops_t psurge_smp_ops; -extern struct smp_ops_t core99_smp_ops; -#endif /* CONFIG_SMP */ - static void pmac_show_cpuinfo(struct seq_file *m) { struct device_node *np; @@ -341,34 +336,6 @@ static void __init pmac_setup_arch(void) ROOT_DEV = DEFAULT_ROOT_DEVICE; #endif -#ifdef CONFIG_SMP - /* Check for Core99 */ - ic = of_find_node_by_name(NULL, "uni-n"); - if (!ic) - ic = of_find_node_by_name(NULL, "u3"); - if (!ic) - ic = of_find_node_by_name(NULL, "u4"); - if (ic) { - of_node_put(ic); - smp_ops = &core99_smp_ops; - } -#ifdef CONFIG_PPC32 - else { - /* - * We have to set bits in cpu_possible_map here since the - * secondary CPU(s) aren't in the device tree, and - * setup_per_cpu_areas only allocates per-cpu data for - * CPUs in the cpu_possible_map. - */ - int cpu; - - for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu) - cpu_set(cpu, cpu_possible_map); - smp_ops = &psurge_smp_ops; - } -#endif -#endif /* CONFIG_SMP */ - #ifdef CONFIG_ADB if (strstr(cmd_line, "adb_sync")) { extern int __adb_probe_sync; @@ -512,6 +479,14 @@ static void __init pmac_init_early(void) #ifdef CONFIG_PPC64 iommu_init_early_dart(); #endif + + /* SMP Init has to be done early as we need to patch up + * cpu_possible_map before interrupt stacks are allocated + * or kaboom... + */ +#ifdef CONFIG_SMP + pmac_setup_smp(); +#endif } static int __init pmac_declare_of_platform_devices(void) diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index cf1dbe75889..6d4da7b46b4 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -64,10 +64,11 @@ extern void __secondary_start_pmac_0(void); extern int pmac_pfunc_base_install(void); -#ifdef CONFIG_PPC32 +static void (*pmac_tb_freeze)(int freeze); +static u64 timebase; +static int tb_req; -/* Sync flag for HW tb sync */ -static volatile int sec_tb_reset = 0; +#ifdef CONFIG_PPC32 /* * Powersurge (old powermac SMP) support. @@ -294,6 +295,9 @@ static int __init smp_psurge_probe(void) psurge_quad_init(); /* All released cards using this HW design have 4 CPUs */ ncpus = 4; + /* No sure how timebase sync works on those, let's use SW */ + smp_ops->give_timebase = smp_generic_give_timebase; + smp_ops->take_timebase = smp_generic_take_timebase; } else { iounmap(quad_base); if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) { @@ -308,18 +312,15 @@ static int __init smp_psurge_probe(void) psurge_start = ioremap(PSURGE_START, 4); psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); - /* - * This is necessary because OF doesn't know about the + /* This is necessary because OF doesn't know about the * secondary cpu(s), and thus there aren't nodes in the * device tree for them, and smp_setup_cpu_maps hasn't - * set their bits in cpu_possible_map and cpu_present_map. + * set their bits in cpu_present_map. */ if (ncpus > NR_CPUS) ncpus = NR_CPUS; - for (i = 1; i < ncpus ; ++i) { + for (i = 1; i < ncpus ; ++i) cpu_set(i, cpu_present_map); - set_hard_smp_processor_id(i, i); - } if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352); @@ -329,8 +330,14 @@ static int __init smp_psurge_probe(void) static void __init smp_psurge_kick_cpu(int nr) { unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; - unsigned long a; - int i; + unsigned long a, flags; + int i, j; + + /* Defining this here is evil ... but I prefer hiding that + * crap to avoid giving people ideas that they can do the + * same. + */ + extern volatile unsigned int cpu_callin_map[NR_CPUS]; /* may need to flush here if secondary bats aren't setup */ for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32) @@ -339,47 +346,52 @@ static void __init smp_psurge_kick_cpu(int nr) if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353); + /* This is going to freeze the timeebase, we disable interrupts */ + local_irq_save(flags); + out_be32(psurge_start, start); mb(); psurge_set_ipi(nr); + /* * We can't use udelay here because the timebase is now frozen. */ for (i = 0; i < 2000; ++i) - barrier(); + asm volatile("nop" : : : "memory"); psurge_clr_ipi(nr); - if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); -} - -/* - * With the dual-cpu powersurge board, the decrementers and timebases - * of both cpus are frozen after the secondary cpu is started up, - * until we give the secondary cpu another interrupt. This routine - * uses this to get the timebases synchronized. - * -- paulus. - */ -static void __init psurge_dual_sync_tb(int cpu_nr) -{ - int t; - - set_dec(tb_ticks_per_jiffy); - /* XXX fixme */ - set_tb(0, 0); - - if (cpu_nr > 0) { + /* + * Also, because the timebase is frozen, we must not return to the + * caller which will try to do udelay's etc... Instead, we wait -here- + * for the CPU to callin. + */ + for (i = 0; i < 100000 && !cpu_callin_map[nr]; ++i) { + for (j = 1; j < 10000; j++) + asm volatile("nop" : : : "memory"); + asm volatile("sync" : : : "memory"); + } + if (!cpu_callin_map[nr]) + goto stuck; + + /* And we do the TB sync here too for standard dual CPU cards */ + if (psurge_type == PSURGE_DUAL) { + while(!tb_req) + barrier(); + tb_req = 0; + mb(); + timebase = get_tb(); + mb(); + while (timebase) + barrier(); mb(); - sec_tb_reset = 1; - return; } + stuck: + /* now interrupt the secondary, restarting both TBs */ + if (psurge_type == PSURGE_DUAL) + psurge_set_ipi(1); - /* wait for the secondary to have reset its TB before proceeding */ - for (t = 10000000; t > 0 && !sec_tb_reset; --t) - ; - - /* now interrupt the secondary, starting both TBs */ - psurge_set_ipi(1); + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); } static struct irqaction psurge_irqaction = { @@ -390,36 +402,35 @@ static struct irqaction psurge_irqaction = { static void __init smp_psurge_setup_cpu(int cpu_nr) { + if (cpu_nr != 0) + return; - if (cpu_nr == 0) { - /* If we failed to start the second CPU, we should still - * send it an IPI to start the timebase & DEC or we might - * have them stuck. - */ - if (num_online_cpus() < 2) { - if (psurge_type == PSURGE_DUAL) - psurge_set_ipi(1); - return; - } - /* reset the entry point so if we get another intr we won't - * try to startup again */ - out_be32(psurge_start, 0x100); - if (setup_irq(30, &psurge_irqaction)) - printk(KERN_ERR "Couldn't get primary IPI interrupt"); - } - - if (psurge_type == PSURGE_DUAL) - psurge_dual_sync_tb(cpu_nr); + /* reset the entry point so if we get another intr we won't + * try to startup again */ + out_be32(psurge_start, 0x100); + if (setup_irq(30, &psurge_irqaction)) + printk(KERN_ERR "Couldn't get primary IPI interrupt"); } void __init smp_psurge_take_timebase(void) { - /* Dummy implementation */ + if (psurge_type != PSURGE_DUAL) + return; + + tb_req = 1; + mb(); + while (!timebase) + barrier(); + mb(); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + mb(); + set_dec(tb_ticks_per_jiffy/2); } void __init smp_psurge_give_timebase(void) { - /* Dummy implementation */ + /* Nothing to do here */ } /* PowerSurge-style Macs */ @@ -437,9 +448,6 @@ struct smp_ops_t psurge_smp_ops = { * Core 99 and later support */ -static void (*pmac_tb_freeze)(int freeze); -static u64 timebase; -static int tb_req; static void smp_core99_give_timebase(void) { @@ -478,7 +486,6 @@ static void __devinit smp_core99_take_timebase(void) set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; mb(); - set_dec(tb_ticks_per_jiffy/2); local_irq_restore(flags); } @@ -920,3 +927,34 @@ struct smp_ops_t core99_smp_ops = { # endif #endif }; + +void __init pmac_setup_smp(void) +{ + struct device_node *np; + + /* Check for Core99 */ + np = of_find_node_by_name(NULL, "uni-n"); + if (!np) + np = of_find_node_by_name(NULL, "u3"); + if (!np) + np = of_find_node_by_name(NULL, "u4"); + if (np) { + of_node_put(np); + smp_ops = &core99_smp_ops; + } +#ifdef CONFIG_PPC32 + else { + /* We have to set bits in cpu_possible_map here since the + * secondary CPU(s) aren't in the device tree. Various + * things won't be initialized for CPUs not in the possible + * map, so we really need to fix it up here. + */ + int cpu; + + for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu) + cpu_set(cpu, cpu_possible_map); + smp_ops = &psurge_smp_ops; + } +#endif /* CONFIG_PPC32 */ +} + diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 1a231c389ba..1f8f6cfb94f 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -35,7 +35,6 @@ #include <asm/prom.h> #include <asm/smp.h> #include <asm/paca.h> -#include <asm/time.h> #include <asm/machdep.h> #include <asm/cputable.h> #include <asm/firmware.h> @@ -118,31 +117,6 @@ static void __devinit smp_xics_setup_cpu(int cpu) } #endif /* CONFIG_XICS */ -static DEFINE_SPINLOCK(timebase_lock); -static unsigned long timebase = 0; - -static void __devinit pSeries_give_timebase(void) -{ - spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); - timebase = get_tb(); - spin_unlock(&timebase_lock); - - while (timebase) - barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); -} - -static void __devinit pSeries_take_timebase(void) -{ - while (!timebase) - barrier(); - spin_lock(&timebase_lock); - set_tb(timebase >> 32, timebase & 0xffffffff); - timebase = 0; - spin_unlock(&timebase_lock); -} - static void __devinit smp_pSeries_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -209,8 +183,8 @@ static void __init smp_init_pseries(void) /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { - smp_ops->give_timebase = pSeries_give_timebase; - smp_ops->take_timebase = pSeries_take_timebase; + smp_ops->give_timebase = rtas_give_timebase; + smp_ops->take_timebase = rtas_take_timebase; } pr_debug(" <- smp_init_pSeries()\n"); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 9c3af504549..d46de1f0f3e 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -279,28 +279,29 @@ static void _mpic_map_mmio(struct mpic *mpic, phys_addr_t phys_addr, } #ifdef CONFIG_PPC_DCR -static void _mpic_map_dcr(struct mpic *mpic, struct mpic_reg_bank *rb, +static void _mpic_map_dcr(struct mpic *mpic, struct device_node *node, + struct mpic_reg_bank *rb, unsigned int offset, unsigned int size) { const u32 *dbasep; - dbasep = of_get_property(mpic->irqhost->of_node, "dcr-reg", NULL); + dbasep = of_get_property(node, "dcr-reg", NULL); - rb->dhost = dcr_map(mpic->irqhost->of_node, *dbasep + offset, size); + rb->dhost = dcr_map(node, *dbasep + offset, size); BUG_ON(!DCR_MAP_OK(rb->dhost)); } -static inline void mpic_map(struct mpic *mpic, phys_addr_t phys_addr, - struct mpic_reg_bank *rb, unsigned int offset, - unsigned int size) +static inline void mpic_map(struct mpic *mpic, struct device_node *node, + phys_addr_t phys_addr, struct mpic_reg_bank *rb, + unsigned int offset, unsigned int size) { if (mpic->flags & MPIC_USES_DCR) - _mpic_map_dcr(mpic, rb, offset, size); + _mpic_map_dcr(mpic, node, rb, offset, size); else _mpic_map_mmio(mpic, phys_addr, rb, offset, size); } #else /* CONFIG_PPC_DCR */ -#define mpic_map(m,p,b,o,s) _mpic_map_mmio(m,p,b,o,s) +#define mpic_map(m,n,p,b,o,s) _mpic_map_mmio(m,p,b,o,s) #endif /* !CONFIG_PPC_DCR */ @@ -1052,11 +1053,10 @@ struct mpic * __init mpic_alloc(struct device_node *node, int intvec_top; u64 paddr = phys_addr; - mpic = alloc_bootmem(sizeof(struct mpic)); + mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL); if (mpic == NULL) return NULL; - - memset(mpic, 0, sizeof(struct mpic)); + mpic->name = name; mpic->hc_irq = mpic_irq_chip; @@ -1152,8 +1152,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, } /* Map the global registers */ - mpic_map(mpic, paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000); - mpic_map(mpic, paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); + mpic_map(mpic, node, paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000); + mpic_map(mpic, node, paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); /* Reset */ if (flags & MPIC_WANTS_RESET) { @@ -1194,7 +1194,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, /* Map the per-CPU registers */ for (i = 0; i < mpic->num_cpus; i++) { - mpic_map(mpic, paddr, &mpic->cpuregs[i], + mpic_map(mpic, node, paddr, &mpic->cpuregs[i], MPIC_INFO(CPU_BASE) + i * MPIC_INFO(CPU_STRIDE), 0x1000); } @@ -1202,7 +1202,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, /* Initialize main ISU if none provided */ if (mpic->isu_size == 0) { mpic->isu_size = mpic->num_sources; - mpic_map(mpic, paddr, &mpic->isus[0], + mpic_map(mpic, node, paddr, &mpic->isus[0], MPIC_INFO(IRQ_BASE), MPIC_INFO(IRQ_STRIDE) * mpic->isu_size); } mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1); @@ -1256,8 +1256,10 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, BUG_ON(isu_num >= MPIC_MAX_ISU); - mpic_map(mpic, paddr, &mpic->isus[isu_num], 0, + mpic_map(mpic, mpic->irqhost->of_node, + paddr, &mpic->isus[isu_num], 0, MPIC_INFO(IRQ_STRIDE) * mpic->isu_size); + if ((isu_first + mpic->isu_size) > mpic->num_sources) mpic->num_sources = isu_first + mpic->isu_size; } diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index b28b0e512d6..237e3654f48 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -112,6 +112,7 @@ int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input) { unsigned long flags; u8 mcn_shift = 0, dev_shift = 0; + u32 ret; spin_lock_irqsave(&qe_lock, flags); if (cmd == QE_RESET) { @@ -139,11 +140,13 @@ int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input) } /* wait for the QE_CR_FLG to clear */ - while(in_be32(&qe_immr->cp.cecr) & QE_CR_FLG) - cpu_relax(); + ret = spin_event_timeout((in_be32(&qe_immr->cp.cecr) & QE_CR_FLG) == 0, + 100, 0); + /* On timeout (e.g. failure), the expression will be false (ret == 0), + otherwise it will be true (ret == 1). */ spin_unlock_irqrestore(&qe_lock, flags); - return 0; + return ret == 1; } EXPORT_SYMBOL(qe_issue_cmd); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a27d0d5a6f8..1cd02f6073a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -99,7 +99,9 @@ struct kvm_s390_sie_block { __u8 reservedd0[48]; /* 0x00d0 */ __u64 gcr[16]; /* 0x0100 */ __u64 gbea; /* 0x0180 */ - __u8 reserved188[120]; /* 0x0188 */ + __u8 reserved188[24]; /* 0x0188 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[92]; /* 0x01a4 */ } __attribute__((packed)); struct kvm_vcpu_stat { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c18b21d6991..90d9d1ba258 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -25,6 +25,7 @@ #include <asm/lowcore.h> #include <asm/pgtable.h> #include <asm/nmi.h> +#include <asm/system.h> #include "kvm-s390.h" #include "gaccess.h" @@ -69,6 +70,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +static unsigned long long *facilities; /* Section: not file related */ void kvm_arch_hardware_enable(void *garbage) @@ -288,6 +290,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->eca = 0xC1002001U; + vcpu->arch.sie_block->fac = (int) (long) facilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, (unsigned long) vcpu); @@ -739,11 +742,29 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) static int __init kvm_s390_init(void) { - return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); + int ret; + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); + if (ret) + return ret; + + /* + * guests can ask for up to 255+1 double words, we need a full page + * to hold the maximum amount of facilites. On the other hand, we + * only set facilities that are known to work in KVM. + */ + facilities = (unsigned long long *) get_zeroed_page(GFP_DMA); + if (!facilities) { + kvm_exit(); + return -ENOMEM; + } + stfle(facilities, 1); + facilities[0] &= 0xff00fff3f0700000ULL; + return 0; } static void __exit kvm_s390_exit(void) { + free_page((unsigned long) facilities); kvm_exit(); } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 93ecd06e1a7..d426aac8095 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -158,7 +158,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stfl++; /* only pass the facility bits, which we can handle */ - facility_list &= 0xfe00fff3; + facility_list &= 0xff00fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile index 96041a8d39e..1ff0fd92475 100644 --- a/arch/sparc/boot/Makefile +++ b/arch/sparc/boot/Makefile @@ -15,7 +15,7 @@ quiet_cmd_elftoaout = ELFTOAOUT $@ ifeq ($(CONFIG_SPARC32),y) quiet_cmd_piggy = PIGGY $@ - cmd_piggy = $(obj)/piggyback_32 $@ $(obj)/System.map $(ROOT_IMG) + cmd_piggy = $(obj)/piggyback_32 $@ System.map $(ROOT_IMG) quiet_cmd_btfix = BTFIX $@ cmd_btfix = $(OBJDUMP) -x vmlinux | $(obj)/btfixupprep > $@ quiet_cmd_sysmap = SYSMAP $(obj)/System.map @@ -58,7 +58,7 @@ $(obj)/image: $(obj)/btfix.o FORCE $(obj)/zImage: $(obj)/image $(call if_changed,strip) -$(obj)/tftpboot.img: $(obj)/piggyback $(obj)/System.map $(obj)/image FORCE +$(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback_32 System.map $(ROOT_IMG) FORCE $(call if_changed,elftoaout) $(call if_changed,piggy) @@ -79,7 +79,7 @@ $(obj)/image: vmlinux FORCE $(call if_changed,strip) @echo ' kernel: $@ is ready' -$(obj)/tftpboot.img: vmlinux $(obj)/piggyback_64 System.map $(ROOT_IMG) FORCE +$(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback_64 System.map $(ROOT_IMG) FORCE $(call if_changed,elftoaout) $(call if_changed,piggy) @echo ' kernel: $@ is ready' diff --git a/arch/sparc/boot/piggyback_32.c b/arch/sparc/boot/piggyback_32.c index c9f500c1a8b..e8dc9adfcd6 100644 --- a/arch/sparc/boot/piggyback_32.c +++ b/arch/sparc/boot/piggyback_32.c @@ -70,7 +70,7 @@ void die(char *str) int main(int argc,char **argv) { static char aout_magic[] = { 0x01, 0x03, 0x01, 0x07 }; - unsigned char buffer[1024], *q, *r; + char buffer[1024], *q, *r; unsigned int i, j, k, start, end, offset; FILE *map; struct stat s; @@ -84,7 +84,7 @@ int main(int argc,char **argv) while (fgets (buffer, 1024, map)) { if (!strcmp (buffer + 8, " T start\n") || !strcmp (buffer + 16, " T start\n")) start = strtoul (buffer, NULL, 16); - else if (!strcmp (buffer + 8, " A end\n") || !strcmp (buffer + 16, " A end\n")) + else if (!strcmp (buffer + 8, " A _end\n") || !strcmp (buffer + 16, " A _end\n")) end = strtoul (buffer, NULL, 16); } fclose (map); diff --git a/arch/sparc/boot/piggyback_64.c b/arch/sparc/boot/piggyback_64.c index de364bfed0b..c63fd1b6bdd 100644 --- a/arch/sparc/boot/piggyback_64.c +++ b/arch/sparc/boot/piggyback_64.c @@ -46,6 +46,7 @@ int main(int argc,char **argv) struct stat s; int image, tail; + start = end = 0; if (stat (argv[3], &s) < 0) die (argv[3]); map = fopen (argv[2], "r"); if (!map) die(argv[2]); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index bd075054942..f0ee7905540 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -20,7 +20,6 @@ #include <linux/delay.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/bootmem.h> #include <linux/irq.h> #include <asm/ptrace.h> @@ -914,25 +913,19 @@ void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu) tb->nonresum_qmask); } -static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask) -{ - unsigned long size = PAGE_ALIGN(qmask + 1); - void *p = __alloc_bootmem(size, size, 0); - if (!p) { - prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); - prom_halt(); - } - - *pa_ptr = __pa(p); -} - -static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask) +/* Each queue region must be a power of 2 multiple of 64 bytes in + * size. The base real address must be aligned to the size of the + * region. Thus, an 8KB queue must be 8KB aligned, for example. + */ +static void __init alloc_one_queue(unsigned long *pa_ptr, unsigned long qmask) { unsigned long size = PAGE_ALIGN(qmask + 1); - void *p = __alloc_bootmem(size, size, 0); + unsigned long order = get_order(size); + unsigned long p; + p = __get_free_pages(GFP_KERNEL, order); if (!p) { - prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); + prom_printf("SUN4V: Error, cannot allocate queue.\n"); prom_halt(); } @@ -942,11 +935,11 @@ static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask) static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) { #ifdef CONFIG_SMP - void *page; + unsigned long page; BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); - page = alloc_bootmem_pages(PAGE_SIZE); + page = get_zeroed_page(GFP_KERNEL); if (!page) { prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); prom_halt(); @@ -965,13 +958,13 @@ static void __init sun4v_init_mondo_queues(void) for_each_possible_cpu(cpu) { struct trap_per_cpu *tb = &trap_block[cpu]; - alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask); - alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask); - alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask); - alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask); - alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); - alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, - tb->nonresum_qmask); + alloc_one_queue(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask); + alloc_one_queue(&tb->dev_mondo_pa, tb->dev_mondo_qmask); + alloc_one_queue(&tb->resum_mondo_pa, tb->resum_qmask); + alloc_one_queue(&tb->resum_kernel_buf_pa, tb->resum_qmask); + alloc_one_queue(&tb->nonresum_mondo_pa, tb->nonresum_qmask); + alloc_one_queue(&tb->nonresum_kernel_buf_pa, + tb->nonresum_qmask); } } @@ -999,7 +992,7 @@ void __init init_IRQ(void) kill_prom_timer(); size = sizeof(struct ino_bucket) * NUM_IVECS; - ivector_table = alloc_bootmem(size); + ivector_table = kzalloc(size, GFP_KERNEL); if (!ivector_table) { prom_printf("Fatal error, cannot allocate ivector_table\n"); prom_halt(); diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c index 5ec17563142..dd2aadc14af 100644 --- a/arch/um/drivers/slip_kern.c +++ b/arch/um/drivers/slip_kern.c @@ -30,7 +30,6 @@ static void slip_init(struct net_device *dev, void *data) slip_proto_init(&spri->slip); - dev->init = NULL; dev->hard_header_len = 0; dev->header_ops = NULL; dev->addr_len = 0; diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c index f15a6e7654f..e376284f0fb 100644 --- a/arch/um/drivers/slirp_kern.c +++ b/arch/um/drivers/slirp_kern.c @@ -32,7 +32,6 @@ void slirp_init(struct net_device *dev, void *data) slip_proto_init(&spri->slip); - dev->init = NULL; dev->hard_header_len = 0; dev->header_ops = NULL; dev->addr_len = 0; diff --git a/arch/um/include/asm/dma-mapping.h b/arch/um/include/asm/dma-mapping.h index 90fc708b320..378de4bbf49 100644 --- a/arch/um/include/asm/dma-mapping.h +++ b/arch/um/include/asm/dma-mapping.h @@ -79,14 +79,14 @@ dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, } static inline void -dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size, +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { BUG(); } static inline void -dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { BUG(); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 418e632d4a8..7a1065958ba 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -8,7 +8,7 @@ #ifdef __KERNEL__ -#include <asm/page_types.h> +#include <asm/pgtable_types.h> /* Physical address where kernel should be loaded. */ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ @@ -16,10 +16,10 @@ & ~(CONFIG_PHYSICAL_ALIGN - 1)) /* Minimum kernel alignment, as a power of two */ -#ifdef CONFIG_x86_64 +#ifdef CONFIG_X86_64 #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT #else -#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) #endif #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 02ecb30982a..103f1ddb0d8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -42,6 +42,7 @@ #else /* ...!ASSEMBLY */ +#include <linux/kernel.h> #include <linux/stringify.h> #ifdef CONFIG_SMP @@ -155,6 +156,15 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); +#ifdef CONFIG_NEED_MULTIPLE_NODES +void *pcpu_lpage_remapped(void *kaddr); +#else +static inline void *pcpu_lpage_remapped(void *kaddr) +{ + return NULL; +} +#endif + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e5b27d8f1b4..28e5f595604 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT unsigned bits; + int cpu = smp_processor_id(); bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; + /* use socket ID also for last level cache */ + per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b26d4deada..f1961c07af9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -848,9 +848,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif - - /* Cap the iomem address space to what is addressable on all CPUs */ - iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1; } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 284d1de968b..af425b83202 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1117,7 +1117,7 @@ static void mcheck_timer(unsigned long data) *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); t->expires = jiffies + *n; - add_timer(t); + add_timer_on(t, smp_processor_id()); } static void mce_do_trigger(struct work_struct *work) @@ -1321,7 +1321,7 @@ static void mce_init_timer(void) return; setup_timer(t, mcheck_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); - add_timer(t); + add_timer_on(t, smp_processor_id()); } /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 95ea5fa7d44..c8405718a4c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -22,6 +22,7 @@ #include "dumpstack.h" int panic_on_unrecovered_nmi; +int panic_on_io_nmi; unsigned int code_bytes = 64; int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; static int die_counter; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index be5ae80f897..de2cab13284 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align) return ret; } +#ifdef CONFIG_X86_64 +static void __init init_gbpages(void) +{ + if (direct_gbpages && cpu_has_gbpages) + printk(KERN_INFO "Using GB pages for direct mapping\n"); + else + direct_gbpages = 0; +} +#else +static inline void init_gbpages(void) +{ +} +#endif + static void __init reserve_brk(void) { if (_brk_end > _brk_start) @@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p) reserve_brk(); + init_gbpages(); + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); max_pfn_mapped = max_low_pfn_mapped; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9c3f0823e6a..29a3eef7cf4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } /* - * Remap allocator + * Large page remap allocator * * This allocator uses PMD page as unit. A PMD page is allocated for * each cpu and each is remapped into vmalloc area using PMD mapping. @@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, * better than only using 4k mappings while still being NUMA friendly. */ #ifdef CONFIG_NEED_MULTIPLE_NODES -static size_t pcpur_size __initdata; -static void **pcpur_ptrs __initdata; +struct pcpul_ent { + unsigned int cpu; + void *ptr; +}; + +static size_t pcpul_size; +static struct pcpul_ent *pcpul_map; +static struct vm_struct pcpul_vm; -static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) +static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) { size_t off = (size_t)pageno << PAGE_SHIFT; - if (off >= pcpur_size) + if (off >= pcpul_size) return NULL; - return virt_to_page(pcpur_ptrs[cpu] + off); + return virt_to_page(pcpul_map[cpu].ptr + off); } -static ssize_t __init setup_pcpu_remap(size_t static_size) +static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { - static struct vm_struct vm; - size_t ptrs_size, dyn_size; + size_t map_size, dyn_size; unsigned int cpu; + int i, j; ssize_t ret; - /* - * If large page isn't supported, there's no benefit in doing - * this. Also, on non-NUMA, embedding is better. - * - * NOTE: disabled for now. - */ - if (true || !cpu_has_pse || !pcpu_need_numa()) + if (!chosen) { + size_t vm_size = VMALLOC_END - VMALLOC_START; + size_t tot_size = num_possible_cpus() * PMD_SIZE; + + /* on non-NUMA, embedding is better */ + if (!pcpu_need_numa()) + return -EINVAL; + + /* don't consume more than 20% of vmalloc area */ + if (tot_size > vm_size / 5) { + pr_info("PERCPU: too large chunk size %zuMB for " + "large page remap\n", tot_size >> 20); + return -EINVAL; + } + } + + /* need PSE */ + if (!cpu_has_pse) { + pr_warning("PERCPU: lpage allocator requires PSE\n"); return -EINVAL; + } /* * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE); - if (pcpur_size > PMD_SIZE) { + if (pcpul_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } - dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; + dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); - pcpur_ptrs = alloc_bootmem(ptrs_size); + map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + pcpul_map = alloc_bootmem(map_size); for_each_possible_cpu(cpu) { - pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); - if (!pcpur_ptrs[cpu]) + pcpul_map[cpu].cpu = cpu; + pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, + PMD_SIZE); + if (!pcpul_map[cpu].ptr) { + pr_warning("PERCPU: failed to allocate large page " + "for cpu%u\n", cpu); goto enomem; + } /* - * Only use pcpur_size bytes and give back the rest. + * Only use pcpul_size bytes and give back the rest. * * Ingo: The 2MB up-rounding bootmem is needed to make * sure the partial 2MB page is still fully RAM - it's * not well-specified to have a PAT-incompatible area * (unmapped RAM, device memory, etc.) in that hole. */ - free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), - PMD_SIZE - pcpur_size); + free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), + PMD_SIZE - pcpul_size); - memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); + memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); } /* allocate address and map */ - vm.flags = VM_ALLOC; - vm.size = num_possible_cpus() * PMD_SIZE; - vm_area_register_early(&vm, PMD_SIZE); + pcpul_vm.flags = VM_ALLOC; + pcpul_vm.size = num_possible_cpus() * PMD_SIZE; + vm_area_register_early(&pcpul_vm, PMD_SIZE); for_each_possible_cpu(cpu) { - pmd_t *pmd; + pmd_t *pmd, pmd_v; - pmd = populate_extra_pmd((unsigned long)vm.addr - + cpu * PMD_SIZE); - set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), - PAGE_KERNEL_LARGE)); + pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + + cpu * PMD_SIZE); + pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), + PAGE_KERNEL_LARGE); + set_pmd(pmd, pmd_v); } /* we're ready, commit */ pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", vm.addr, static_size); + "%zu bytes\n", pcpul_vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, + ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, vm.addr, NULL); - goto out_free_ar; + PMD_SIZE, pcpul_vm.addr, NULL); + + /* sort pcpul_map array for pcpu_lpage_remapped() */ + for (i = 0; i < num_possible_cpus() - 1; i++) + for (j = i + 1; j < num_possible_cpus(); j++) + if (pcpul_map[i].ptr > pcpul_map[j].ptr) { + struct pcpul_ent tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + return ret; enomem: for_each_possible_cpu(cpu) - if (pcpur_ptrs[cpu]) - free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpur_ptrs), ptrs_size); - return ret; + if (pcpul_map[cpu].ptr) + free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); + free_bootmem(__pa(pcpul_map), map_size); + return -ENOMEM; +} + +/** + * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area + * @kaddr: the kernel address in question + * + * Determine whether @kaddr falls in the pcpul recycled area. This is + * used by pageattr to detect VM aliases and break up the pcpu PMD + * mapping such that the same physical page is not mapped under + * different attributes. + * + * The recycled area is always at the tail of a partially used PMD + * page. + * + * RETURNS: + * Address of corresponding remapped pcpu address if match is found; + * otherwise, NULL. + */ +void *pcpu_lpage_remapped(void *kaddr) +{ + void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); + unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; + int left = 0, right = num_possible_cpus() - 1; + int pos; + + /* pcpul in use at all? */ + if (!pcpul_map) + return NULL; + + /* okay, perform binary search */ + while (left <= right) { + pos = (left + right) / 2; + + if (pcpul_map[pos].ptr < pmd_addr) + left = pos + 1; + else if (pcpul_map[pos].ptr > pmd_addr) + right = pos - 1; + else { + /* it shouldn't be in the area for the first chunk */ + WARN_ON(offset < pcpul_size); + + return pcpul_vm.addr + + pcpul_map[pos].cpu * PMD_SIZE + offset; + } + } + + return NULL; } #else -static ssize_t __init setup_pcpu_remap(size_t static_size) +static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { return -EINVAL; } @@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(size_t static_size) +static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) * this. Also, embedding allocation doesn't play well with * NUMA. */ - if (!cpu_has_pse || pcpu_need_numa()) + if (!chosen && (!cpu_has_pse || pcpu_need_numa())) return -EINVAL; return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, @@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) void *ptr; ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) + if (!ptr) { + pr_warning("PERCPU: failed to allocate " + "4k page for cpu%u\n", cpu); goto enomem; + } memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); pcpu4k_pages[j++] = virt_to_page(ptr); @@ -333,6 +416,16 @@ out_free_ar: return ret; } +/* for explicit first chunk allocator selection */ +static char pcpu_chosen_alloc[16] __initdata; + +static int __init percpu_alloc_setup(char *str) +{ + strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); + return 0; +} +early_param("percpu_alloc", percpu_alloc_setup); + static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu) #endif } -/* - * Great future plan: - * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. - * Always point %gs to its beginning - */ void __init setup_per_cpu_areas(void) { size_t static_size = __per_cpu_end - __per_cpu_start; @@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void) * of large page mappings. Please read comments on top of * each allocator for details. */ - ret = setup_pcpu_remap(static_size); - if (ret < 0) - ret = setup_pcpu_embed(static_size); + ret = -EINVAL; + if (strlen(pcpu_chosen_alloc)) { + if (strcmp(pcpu_chosen_alloc, "4k")) { + if (!strcmp(pcpu_chosen_alloc, "lpage")) + ret = setup_pcpu_lpage(static_size, true); + else if (!strcmp(pcpu_chosen_alloc, "embed")) + ret = setup_pcpu_embed(static_size, true); + else + pr_warning("PERCPU: unknown allocator %s " + "specified\n", pcpu_chosen_alloc); + if (ret < 0) + pr_warning("PERCPU: %s allocator failed (%zd), " + "falling back to 4k\n", + pcpu_chosen_alloc, ret); + } + } else { + ret = setup_pcpu_lpage(static_size, false); + if (ret < 0) + ret = setup_pcpu_embed(static_size, false); + } if (ret < 0) ret = setup_pcpu_4k(static_size); if (ret < 0) diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 124d40c575d..8ccabb8a2f6 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode) unsigned long pa; unsigned long m; unsigned long n; - unsigned long mmr_image; struct bau_desc *adp; struct bau_desc *ad2; @@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode) n = pa >> uv_nshift; m = pa & uv_mmask; - mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); - if (mmr_image) { - uv_write_global_mmr64(pnode, (unsigned long) - UVH_LB_BAU_SB_DESCRIPTOR_BASE, - (n << UV_DESC_BASE_PNODE_SHIFT | m)); - } + uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, + (n << UV_DESC_BASE_PNODE_SHIFT | m)); /* * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a0f48f5671c..5204332f475 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); + if (panic_on_io_nmi) + panic("NMI IOCK error: Not continuing"); + /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & 0xf) | 8; outb(reason, 0x61); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5c3d6e81a7d..7030b5f911b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2157,7 +2157,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) else /* 32 bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); - context->rsvd_bits_mask[1][0] = ~0ull; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; case PT32E_ROOT_LEVEL: context->rsvd_bits_mask[0][2] = @@ -2170,7 +2170,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = ~0ull; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; case PT64_ROOT_LEVEL: context->rsvd_bits_mask[0][3] = exb_bit_rsvd | @@ -2186,7 +2186,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = ~0ull; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; } } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 258e4591e1c..67785f63539 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -281,7 +281,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, { unsigned access = gw->pt_access; struct kvm_mmu_page *shadow_page; - u64 spte, *sptep; + u64 spte, *sptep = NULL; int direct; gfn_t table_gfn; int r; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e770bf349ec..356a0ce85c6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3012,6 +3012,12 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -3198,6 +3204,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_HLT] = handle_halt, [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, + [EXIT_REASON_VMCLEAR] = handle_vmx_insn, + [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, + [EXIT_REASON_VMPTRLD] = handle_vmx_insn, + [EXIT_REASON_VMPTRST] = handle_vmx_insn, + [EXIT_REASON_VMREAD] = handle_vmx_insn, + [EXIT_REASON_VMRESUME] = handle_vmx_insn, + [EXIT_REASON_VMWRITE] = handle_vmx_insn, + [EXIT_REASON_VMOFF] = handle_vmx_insn, + [EXIT_REASON_VMON] = handle_vmx_insn, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 249540f9851..fe5474aec41 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -898,6 +898,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_VM_HSAVE_PA: case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: + case MSR_K7_EVNTSEL0: data = 0; break; case MSR_MTRRcap: diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index c1b6c232e02..616de4628d6 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1361,7 +1361,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return 0; } -void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) +static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) { u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); /* diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index f4568605d7d..ff485d36118 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -55,8 +55,10 @@ static void delay_tsc(unsigned long loops) preempt_disable(); cpu = smp_processor_id(); + rdtsc_barrier(); rdtscl(bclock); for (;;) { + rdtsc_barrier(); rdtscl(now); if ((now - bclock) >= loops) break; @@ -78,6 +80,7 @@ static void delay_tsc(unsigned long loops) if (unlikely(cpu != smp_processor_id())) { loops -= (now - bclock); cpu = smp_processor_id(); + rdtsc_barrier(); rdtscl(bclock); } } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f53b57e4086..47ce9a2ce5e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -177,20 +177,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, return nr_range; } -#ifdef CONFIG_X86_64 -static void __init init_gbpages(void) -{ - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -} -#else -static inline void init_gbpages(void) -{ -} -#endif - /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from @@ -210,9 +196,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); - if (!after_bootmem) - init_gbpages(); - #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3cfe9ced8a4..1b734d7a896 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -11,6 +11,7 @@ #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/pfn.h> #include <asm/e820.h> #include <asm/processor.h> @@ -681,8 +682,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; - int ret = 0; - unsigned long temp_cpa_vaddr, vaddr; + unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); + unsigned long vaddr, remapped; + int ret; if (cpa->pfn >= max_pfn_mapped) return 0; @@ -706,42 +708,55 @@ static int cpa_process_alias(struct cpa_data *cpa) PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { alias_cpa = *cpa; - temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); - alias_cpa.vaddr = &temp_cpa_vaddr; + alias_cpa.vaddr = &laddr; alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; } #ifdef CONFIG_X86_64 - if (ret) - return ret; /* - * No need to redo, when the primary call touched the high - * mapping already: - */ - if (within(vaddr, (unsigned long) _text, _brk_end)) - return 0; - - /* - * If the physical address is inside the kernel map, we need + * If the primary call didn't touch the high mapping already + * and the physical address is inside the kernel map, we need * to touch the high mapped kernel as well: */ - if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) - return 0; + if (!within(vaddr, (unsigned long)_text, _brk_end) && + within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { + unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + + __START_KERNEL_map - phys_base; + alias_cpa = *cpa; + alias_cpa.vaddr = &temp_cpa_vaddr; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - alias_cpa = *cpa; - temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; - alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + /* + * The high mapping range is imprecise, so ignore the + * return value. + */ + __change_page_attr_set_clr(&alias_cpa, 0); + } +#endif /* - * The high mapping range is imprecise, so ignore the return value. + * If the PMD page was partially used for per-cpu remapping, + * the recycled area needs to be split and modified. Because + * the area is always proper subset of a PMD page + * cpa->numpages is guaranteed to be 1 for these areas, so + * there's no need to loop over and check for further remaps. */ - __change_page_attr_set_clr(&alias_cpa, 0); -#endif - return ret; + remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); + if (remapped) { + WARN_ON(cpa->numpages > 1); + alias_cpa = *cpa; + alias_cpa.vaddr = &remapped; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; + } + + return 0; } static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d277ef1eea5..b3d20b9cac6 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -244,7 +244,7 @@ static void __restore_processor_state(struct saved_context *ctxt) do_fpu_end(); mtrr_ap_init(); -#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_OLD_MCE mcheck_init(&boot_cpu_data); #endif } |