aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kvm/kvm_lib.c6
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/boot/.gitignore10
-rw-r--r--arch/powerpc/boot/dts/amigaone.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc8569mds.dts1
-rw-r--r--arch/powerpc/include/asm/cpm1.h2
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h24
-rw-r--r--arch/powerpc/include/asm/highmem.h57
-rw-r--r--arch/powerpc/include/asm/hw_irq.h20
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h3
-rw-r--r--arch/powerpc/include/asm/rtas.h5
-rw-r--r--arch/powerpc/kernel/entry_32.S127
-rw-r--r--arch/powerpc/kernel/head_32.S17
-rw-r--r--arch/powerpc/kernel/of_device.c2
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/rtas.c69
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/smp.c3
-rw-r--r--arch/powerpc/kernel/udbg_16550.c2
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/highmem.c77
-rw-r--r--arch/powerpc/platforms/44x/warp.c44
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c1
-rw-r--r--arch/powerpc/platforms/85xx/smp.c9
-rw-r--r--arch/powerpc/platforms/85xx/socrates.c6
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c1
-rw-r--r--arch/powerpc/platforms/cell/smp.c30
-rw-r--r--arch/powerpc/platforms/chrp/smp.c33
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c15
-rw-r--r--arch/powerpc/platforms/powermac/setup.c41
-rw-r--r--arch/powerpc/platforms/powermac/smp.c166
-rw-r--r--arch/powerpc/platforms/pseries/smp.c30
-rw-r--r--arch/powerpc/sysdev/mpic.c34
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe.c9
-rw-r--r--arch/s390/include/asm/kvm_host.h4
-rw-r--r--arch/s390/kvm/kvm-s390.c23
-rw-r--r--arch/s390/kvm/priv.c2
-rw-r--r--arch/sparc/boot/Makefile6
-rw-r--r--arch/sparc/boot/piggyback_32.c4
-rw-r--r--arch/sparc/boot/piggyback_64.c1
-rw-r--r--arch/sparc/kernel/irq_64.c45
-rw-r--r--arch/um/drivers/slip_kern.c1
-rw-r--r--arch/um/drivers/slirp_kern.c1
-rw-r--r--arch/um/include/asm/dma-mapping.h4
-rw-r--r--arch/x86/include/asm/boot.h6
-rw-r--r--arch/x86/include/asm/percpu.h10
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/setup.c16
-rw-r--r--arch/x86/kernel/setup_percpu.c219
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c15
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--arch/x86/kvm/x86_emulate.c2
-rw-r--r--arch/x86/lib/delay.c3
-rw-r--r--arch/x86/mm/init.c17
-rw-r--r--arch/x86/mm/pageattr.c65
-rw-r--r--arch/x86/power/cpu.c2
63 files changed, 827 insertions, 506 deletions
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
index a85cb611ecd..f1268b8e6f9 100644
--- a/arch/ia64/kvm/kvm_lib.c
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -11,5 +11,11 @@
*
*/
#undef CONFIG_MODULES
+#include <linux/module.h>
+#undef CONFIG_KALLSYMS
+#undef EXPORT_SYMBOL
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
#include "../../../lib/vsprintf.c"
#include "../../../lib/ctype.c"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bf6cedfa05d..d00131ca083 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -62,7 +62,6 @@ config HAVE_LATENCYTOP_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
bool
- depends on PPC64
default y
config LOCKDEP_SUPPORT
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 2f50acd11a6..3d80c3e9cf6 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -36,3 +36,13 @@ zImage.pseries
zconf.h
zlib.h
zutil.h
+fdt.c
+fdt.h
+fdt_ro.c
+fdt_rw.c
+fdt_strerror.c
+fdt_sw.c
+fdt_wip.c
+libfdt.h
+libfdt_internal.h
+
diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts
index 26549fca2ed..49ac36b16dd 100644
--- a/arch/powerpc/boot/dts/amigaone.dts
+++ b/arch/powerpc/boot/dts/amigaone.dts
@@ -70,8 +70,8 @@
devsel-speed = <0x00000001>;
min-grant = <0>;
max-latency = <0>;
- /* First 64k for I/O at 0x0 on PCI mapped to 0x0 on ISA. */
- ranges = <0x00000001 0 0x01000000 0 0x00000000 0x00010000>;
+ /* First 4k for I/O at 0x0 on PCI mapped to 0x0 on ISA. */
+ ranges = <0x00000001 0 0x01000000 0 0x00000000 0x00001000>;
interrupt-parent = <&i8259>;
#interrupt-cells = <2>;
#address-cells = <2>;
diff --git a/arch/powerpc/boot/dts/mpc8569mds.dts b/arch/powerpc/boot/dts/mpc8569mds.dts
index a8dcb018c4a..a680165292f 100644
--- a/arch/powerpc/boot/dts/mpc8569mds.dts
+++ b/arch/powerpc/boot/dts/mpc8569mds.dts
@@ -253,6 +253,7 @@
/* Filled in by U-Boot */
clock-frequency = <0>;
status = "disabled";
+ sdhci,1-bit-only;
};
crypto@30000 {
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 2ff798744c1..7685ffde882 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -598,8 +598,6 @@ typedef struct risc_timer_pram {
#define CICR_IEN ((uint)0x00000080) /* Int. enable */
#define CICR_SPS ((uint)0x00000001) /* SCC Spread */
-#define IMAP_ADDR (get_immrbase())
-
#define CPM_PIN_INPUT 0
#define CPM_PIN_OUTPUT 1
#define CPM_PIN_PRIMARY 0
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 3d9e887c3c0..b44aaabdd1a 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -309,7 +309,9 @@ static inline void dma_sync_single_for_cpu(struct device *dev,
struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
BUG_ON(!dma_ops);
- dma_ops->sync_single_range_for_cpu(dev, dma_handle, 0,
+
+ if (dma_ops->sync_single_range_for_cpu)
+ dma_ops->sync_single_range_for_cpu(dev, dma_handle, 0,
size, direction);
}
@@ -320,7 +322,9 @@ static inline void dma_sync_single_for_device(struct device *dev,
struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
BUG_ON(!dma_ops);
- dma_ops->sync_single_range_for_device(dev, dma_handle,
+
+ if (dma_ops->sync_single_range_for_device)
+ dma_ops->sync_single_range_for_device(dev, dma_handle,
0, size, direction);
}
@@ -331,7 +335,9 @@ static inline void dma_sync_sg_for_cpu(struct device *dev,
struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
BUG_ON(!dma_ops);
- dma_ops->sync_sg_for_cpu(dev, sgl, nents, direction);
+
+ if (dma_ops->sync_sg_for_cpu)
+ dma_ops->sync_sg_for_cpu(dev, sgl, nents, direction);
}
static inline void dma_sync_sg_for_device(struct device *dev,
@@ -341,7 +347,9 @@ static inline void dma_sync_sg_for_device(struct device *dev,
struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
BUG_ON(!dma_ops);
- dma_ops->sync_sg_for_device(dev, sgl, nents, direction);
+
+ if (dma_ops->sync_sg_for_device)
+ dma_ops->sync_sg_for_device(dev, sgl, nents, direction);
}
static inline void dma_sync_single_range_for_cpu(struct device *dev,
@@ -351,7 +359,9 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
BUG_ON(!dma_ops);
- dma_ops->sync_single_range_for_cpu(dev, dma_handle,
+
+ if (dma_ops->sync_single_range_for_cpu)
+ dma_ops->sync_single_range_for_cpu(dev, dma_handle,
offset, size, direction);
}
@@ -362,7 +372,9 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
BUG_ON(!dma_ops);
- dma_ops->sync_single_range_for_device(dev, dma_handle, offset,
+
+ if (dma_ops->sync_single_range_for_device)
+ dma_ops->sync_single_range_for_device(dev, dma_handle, offset,
size, direction);
}
#else /* CONFIG_PPC_NEED_DMA_SYNC_OPS */
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index 684a73f4324..a74c4ee6c02 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -22,9 +22,7 @@
#ifdef __KERNEL__
-#include <linux/init.h>
#include <linux/interrupt.h>
-#include <linux/highmem.h>
#include <asm/kmap_types.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
@@ -62,6 +60,9 @@ extern pte_t *pkmap_page_table;
extern void *kmap_high(struct page *page);
extern void kunmap_high(struct page *page);
+extern void *kmap_atomic_prot(struct page *page, enum km_type type,
+ pgprot_t prot);
+extern void kunmap_atomic(void *kvaddr, enum km_type type);
static inline void *kmap(struct page *page)
{
@@ -79,62 +80,11 @@ static inline void kunmap(struct page *page)
kunmap_high(page);
}
-/*
- * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
- * gives a more generic (and caching) interface. But kmap_atomic can
- * be used in IRQ contexts, so in some (very limited) cases we need
- * it.
- */
-static inline void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
-{
- unsigned int idx;
- unsigned long vaddr;
-
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
- pagefault_disable();
- if (!PageHighMem(page))
- return page_address(page);
-
- debug_kmap_atomic(type);
- idx = type + KM_TYPE_NR*smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
- __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
- local_flush_tlb_page(NULL, vaddr);
-
- return (void*) vaddr;
-}
-
static inline void *kmap_atomic(struct page *page, enum km_type type)
{
return kmap_atomic_prot(page, type, kmap_prot);
}
-static inline void kunmap_atomic(void *kvaddr, enum km_type type)
-{
-#ifdef CONFIG_DEBUG_HIGHMEM
- unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
-
- if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
- pagefault_enable();
- return;
- }
-
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-
- /*
- * force other mappings to Oops if they'll try to access
- * this pte without first remap it
- */
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
- local_flush_tlb_page(NULL, vaddr);
-#endif
- pagefault_enable();
-}
-
static inline struct page *kmap_atomic_to_page(void *ptr)
{
unsigned long idx, vaddr = (unsigned long) ptr;
@@ -148,6 +98,7 @@ static inline struct page *kmap_atomic_to_page(void *ptr)
return pte_page(*pte);
}
+
#define flush_cache_kmaps() flush_cache_all()
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 867ab8ed69b..8b505eaaa38 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -68,13 +68,13 @@ static inline int irqs_disabled_flags(unsigned long flags)
#if defined(CONFIG_BOOKE)
#define SET_MSR_EE(x) mtmsr(x)
-#define local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory")
+#define raw_local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory")
#else
#define SET_MSR_EE(x) mtmsr(x)
-#define local_irq_restore(flags) mtmsr(flags)
+#define raw_local_irq_restore(flags) mtmsr(flags)
#endif
-static inline void local_irq_disable(void)
+static inline void raw_local_irq_disable(void)
{
#ifdef CONFIG_BOOKE
__asm__ __volatile__("wrteei 0": : :"memory");
@@ -86,7 +86,7 @@ static inline void local_irq_disable(void)
#endif
}
-static inline void local_irq_enable(void)
+static inline void raw_local_irq_enable(void)
{
#ifdef CONFIG_BOOKE
__asm__ __volatile__("wrteei 1": : :"memory");
@@ -98,7 +98,7 @@ static inline void local_irq_enable(void)
#endif
}
-static inline void local_irq_save_ptr(unsigned long *flags)
+static inline void raw_local_irq_save_ptr(unsigned long *flags)
{
unsigned long msr;
msr = mfmsr();
@@ -110,12 +110,12 @@ static inline void local_irq_save_ptr(unsigned long *flags)
#endif
}
-#define local_save_flags(flags) ((flags) = mfmsr())
-#define local_irq_save(flags) local_irq_save_ptr(&flags)
-#define irqs_disabled() ((mfmsr() & MSR_EE) == 0)
+#define raw_local_save_flags(flags) ((flags) = mfmsr())
+#define raw_local_irq_save(flags) raw_local_irq_save_ptr(&flags)
+#define raw_irqs_disabled() ((mfmsr() & MSR_EE) == 0)
+#define raw_irqs_disabled_flags(flags) (((flags) & MSR_EE) == 0)
-#define hard_irq_enable() local_irq_enable()
-#define hard_irq_disable() local_irq_disable()
+#define hard_irq_disable() raw_local_irq_disable()
static inline int irqs_disabled_flags(unsigned long flags)
{
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index e05d26fa372..82b72207c51 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -47,7 +47,8 @@
* generic accessors and iterators here
*/
#define __real_pte(e,p) ((real_pte_t) { \
- (e), pte_val(*((p) + PTRS_PER_PTE)) })
+ (e), ((e) & _PAGE_COMBO) ? \
+ (pte_val(*((p) + PTRS_PER_PTE))) : 0 })
#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
(((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
#define __rpte_to_pte(r) ((r).pte)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 01c12339b30..168fce72620 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -58,7 +58,7 @@ struct rtas_t {
unsigned long entry; /* physical address pointer */
unsigned long base; /* physical address pointer */
unsigned long size;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct rtas_args args;
struct device_node *dev; /* virtual address pointer */
};
@@ -245,5 +245,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg)
(devfn << 8) | (reg & 0xff);
}
+extern void __cpuinit rtas_give_timebase(void);
+extern void __cpuinit rtas_take_timebase(void);
+
#endif /* __KERNEL__ */
#endif /* _POWERPC_RTAS_H */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4dd38f12915..3cadba60a4b 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -191,11 +191,49 @@ transfer_to_handler_cont:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ lis r12,reenable_mmu@h
+ ori r12,r12,reenable_mmu@l
+ mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r10
+ SYNC
+ RFI
+reenable_mmu: /* re-enable mmu so we can */
+ mfmsr r10
+ lwz r12,_MSR(r1)
+ xor r10,r10,r12
+ andi. r10,r10,MSR_EE /* Did EE change? */
+ beq 1f
+
+ /* Save handler and return address into the 2 unused words
+ * of the STACK_FRAME_OVERHEAD (sneak sneak sneak). Everything
+ * else can be recovered from the pt_regs except r3 which for
+ * normal interrupts has been set to pt_regs and for syscalls
+ * is an argument, so we temporarily use ORIG_GPR3 to save it
+ */
+ stw r9,8(r1)
+ stw r11,12(r1)
+ stw r3,ORIG_GPR3(r1)
+ bl trace_hardirqs_off
+ lwz r0,GPR0(r1)
+ lwz r3,ORIG_GPR3(r1)
+ lwz r4,GPR4(r1)
+ lwz r5,GPR5(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+ lwz r9,8(r1)
+ lwz r11,12(r1)
+1: mtctr r11
+ mtlr r9
+ bctr /* jump to handler */
+#else /* CONFIG_TRACE_IRQFLAGS */
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r10
mtlr r9
SYNC
RFI /* jump to handler, enable MMU */
+#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined (CONFIG_6xx) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
@@ -251,6 +289,31 @@ _GLOBAL(DoSyscall)
#ifdef SHOW_SYSCALLS
bl do_show_syscall
#endif /* SHOW_SYSCALLS */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Return from syscalls can (and generally will) hard enable
+ * interrupts. You aren't supposed to call a syscall with
+ * interrupts disabled in the first place. However, to ensure
+ * that we get it right vs. lockdep if it happens, we force
+ * that hard enable here with appropriate tracing if we see
+ * that we have been called with interrupts off
+ */
+ mfmsr r11
+ andi. r12,r11,MSR_EE
+ bne+ 1f
+ /* We came in with interrupts disabled, we enable them now */
+ bl trace_hardirqs_on
+ mfmsr r11
+ lwz r0,GPR0(r1)
+ lwz r3,GPR3(r1)
+ lwz r4,GPR4(r1)
+ ori r11,r11,MSR_EE
+ lwz r5,GPR5(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+ mtmsr r11
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
@@ -275,6 +338,7 @@ ret_from_syscall:
rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ /* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
lwz r9,TI_FLAGS(r12)
@@ -288,6 +352,19 @@ ret_from_syscall:
oris r11,r11,0x1000 /* Set SO bit in CR */
stw r11,_CCR(r1)
syscall_exit_cont:
+ lwz r8,_MSR(r1)
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* If we are going to return from the syscall with interrupts
+ * off, we trace that here. It shouldn't happen though but we
+ * want to catch the bugger if it does right ?
+ */
+ andi. r10,r8,MSR_EE
+ bne+ 1f
+ stw r3,GPR3(r1)
+ bl trace_hardirqs_off
+ lwz r3,GPR3(r1)
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/* If the process has its own DBCR0 value, load it up. The internal
debug mode bit tells us that dbcr0 should be loaded. */
@@ -311,7 +388,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
mtlr r4
mtcr r5
lwz r7,_NIP(r1)
- lwz r8,_MSR(r1)
FIX_SRR1(r8, r0)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
@@ -394,7 +470,9 @@ syscall_exit_work:
andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
beq ret_from_except
- /* Re-enable interrupts */
+ /* Re-enable interrupts. There is no need to trace that with
+ * lockdep as we are supposed to have IRQs on at this point
+ */
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10)
@@ -705,6 +783,7 @@ ret_from_except:
/* Hard-disable interrupts so that current_thread_info()->flags
* can't change between when we test it and when we return
* from the interrupt. */
+ /* Note: We don't bother telling lockdep about it */
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -744,11 +823,24 @@ resume_kernel:
beq+ restore
andi. r0,r3,MSR_EE /* interrupts off? */
beq restore /* don't schedule if so */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Lockdep thinks irqs are enabled, we need to call
+ * preempt_schedule_irq with IRQs off, so we inform lockdep
+ * now that we -did- turn them off already
+ */
+ bl trace_hardirqs_off
+#endif
1: bl preempt_schedule_irq
rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
lwz r3,TI_FLAGS(r9)
andi. r0,r3,_TIF_NEED_RESCHED
bne- 1b
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* And now, to properly rebalance the above, we tell lockdep they
+ * are being turned back on, which will happen when we return
+ */
+ bl trace_hardirqs_on
+#endif
#else
resume_kernel:
#endif /* CONFIG_PREEMPT */
@@ -765,6 +857,28 @@ restore:
stw r6,icache_44x_need_flush@l(r4)
1:
#endif /* CONFIG_44x */
+
+ lwz r9,_MSR(r1)
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Lockdep doesn't know about the fact that IRQs are temporarily turned
+ * off in this assembly code while peeking at TI_FLAGS() and such. However
+ * we need to inform it if the exception turned interrupts off, and we
+ * are about to trun them back on.
+ *
+ * The problem here sadly is that we don't know whether the exceptions was
+ * one that turned interrupts off or not. So we always tell lockdep about
+ * turning them on here when we go back to wherever we came from with EE
+ * on, even if that may meen some redudant calls being tracked. Maybe later
+ * we could encode what the exception did somewhere or test the exception
+ * type in the pt_regs but that sounds overkill
+ */
+ andi. r10,r9,MSR_EE
+ beq 1f
+ bl trace_hardirqs_on
+ lwz r9,_MSR(r1)
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
lwz r0,GPR0(r1)
lwz r2,GPR2(r1)
REST_4GPRS(3, r1)
@@ -782,7 +896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- lwz r9,_MSR(r1)
andi. r10,r9,MSR_RI /* check if this exception occurred */
beql nonrecoverable /* at a bad place (MSR:RI = 0) */
@@ -805,7 +918,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
exc_exit_restart:
- lwz r9,_MSR(r1)
lwz r12,_NIP(r1)
FIX_SRR1(r9,r10)
mtspr SPRN_SRR0,r12
@@ -1035,11 +1147,18 @@ do_work: /* r10 contains MSR_KERNEL here */
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
+ /* Note: We don't need to inform lockdep that we are enabling
+ * interrupts here. As far as it knows, they are already enabled
+ */
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10) /* hard-enable interrupts */
bl schedule
recheck:
+ /* Note: And we don't tell it we are disabling them again
+ * neither. Those disable/enable cycles used to peek at
+ * TI_FLAGS aren't advertised.
+ */
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 48469463f89..fc213294275 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -1124,9 +1124,8 @@ mmu_off:
RFI
/*
- * Use the first pair of BAT registers to map the 1st 16MB
- * of RAM to PAGE_OFFSET. From this point on we can't safely
- * call OF any more.
+ * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
+ * (we keep one for debugging) and on others, we use one 256M BAT.
*/
initial_bats:
lis r11,PAGE_OFFSET@h
@@ -1136,12 +1135,16 @@ initial_bats:
bne 4f
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
- oris r9,r11,0x800000@h /* set up BAT reg for 2nd 8M */
- oris r10,r8,0x800000@h /* set up BAT reg for 2nd 8M */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
mtspr SPRN_IBAT0L,r8 /* lower BAT register */
- mtspr SPRN_IBAT1U,r9
- mtspr SPRN_IBAT1L,r10
+ addis r11,r11,0x800000@h
+ addis r8,r8,0x800000@h
+ mtspr SPRN_IBAT1U,r11
+ mtspr SPRN_IBAT1L,r8
+ addis r11,r11,0x800000@h
+ addis r8,r8,0x800000@h
+ mtspr SPRN_IBAT2U,r11
+ mtspr SPRN_IBAT2L,r8
isync
blr
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
index fa983a59c4c..a359cb08e90 100644
--- a/arch/powerpc/kernel/of_device.c
+++ b/arch/powerpc/kernel/of_device.c
@@ -76,7 +76,7 @@ struct of_device *of_device_alloc(struct device_node *np,
dev->dev.archdata.of_node = np;
if (bus_id)
- dev_set_name(&dev->dev, bus_id);
+ dev_set_name(&dev->dev, "%s", bus_id);
else
of_device_make_bus_id(dev);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3e7135bbe40..892a9f2e6d7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -528,7 +528,7 @@ void show_regs(struct pt_regs * regs)
for (i = 0; i < 32; i++) {
if ((i % REGS_PER_LINE) == 0)
- printk("\n" KERN_INFO "GPR%02d: ", i);
+ printk("\nGPR%02d: ", i);
printk(REG " ", regs->gpr[i]);
if (i == LAST_VOLATILE && !FULL_REGS(regs))
break;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ee4c7609b64..c434823b8c8 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -38,9 +38,10 @@
#include <asm/syscalls.h>
#include <asm/smp.h>
#include <asm/atomic.h>
+#include <asm/time.h>
struct rtas_t rtas = {
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __RAW_SPIN_LOCK_UNLOCKED
};
EXPORT_SYMBOL(rtas);
@@ -67,6 +68,28 @@ unsigned long rtas_rmo_buf;
void (*rtas_flash_term_hook)(int);
EXPORT_SYMBOL(rtas_flash_term_hook);
+/* RTAS use home made raw locking instead of spin_lock_irqsave
+ * because those can be called from within really nasty contexts
+ * such as having the timebase stopped which would lockup with
+ * normal locks and spinlock debugging enabled
+ */
+static unsigned long lock_rtas(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ preempt_disable();
+ __raw_spin_lock_flags(&rtas.lock, flags);
+ return flags;
+}
+
+static void unlock_rtas(unsigned long flags)
+{
+ __raw_spin_unlock(&rtas.lock);
+ local_irq_restore(flags);
+ preempt_enable();
+}
+
/*
* call_rtas_display_status and call_rtas_display_status_delay
* are designed only for very early low-level debugging, which
@@ -79,7 +102,7 @@ static void call_rtas_display_status(char c)
if (!rtas.base)
return;
- spin_lock_irqsave(&rtas.lock, s);
+ s = lock_rtas();
args->token = 10;
args->nargs = 1;
@@ -89,7 +112,7 @@ static void call_rtas_display_status(char c)
enter_rtas(__pa(args));
- spin_unlock_irqrestore(&rtas.lock, s);
+ unlock_rtas(s);
}
static void call_rtas_display_status_delay(char c)
@@ -411,8 +434,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
return -1;
- /* Gotta do something different here, use global lock for now... */
- spin_lock_irqsave(&rtas.lock, s);
+ s = lock_rtas();
rtas_args = &rtas.args;
rtas_args->token = token;
@@ -439,8 +461,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
outputs[i] = rtas_args->rets[i+1];
ret = (nret > 0)? rtas_args->rets[0]: 0;
- /* Gotta do something different here, use global lock for now... */
- spin_unlock_irqrestore(&rtas.lock, s);
+ unlock_rtas(s);
if (buff_copy) {
log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
@@ -837,7 +858,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
buff_copy = get_errorlog_buffer();
- spin_lock_irqsave(&rtas.lock, flags);
+ flags = lock_rtas();
rtas.args = args;
enter_rtas(__pa(&rtas.args));
@@ -848,7 +869,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
if (args.rets[0] == -1)
errbuf = __fetch_rtas_last_error(buff_copy);
- spin_unlock_irqrestore(&rtas.lock, flags);
+ unlock_rtas(flags);
if (buff_copy) {
if (errbuf)
@@ -951,3 +972,33 @@ int __init early_init_dt_scan_rtas(unsigned long node,
/* break now */
return 1;
}
+
+static raw_spinlock_t timebase_lock;
+static u64 timebase = 0;
+
+void __cpuinit rtas_give_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+ __raw_spin_lock(&timebase_lock);
+ rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
+ timebase = get_tb();
+ __raw_spin_unlock(&timebase_lock);
+
+ while (timebase)
+ barrier();
+ rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
+ local_irq_restore(flags);
+}
+
+void __cpuinit rtas_take_timebase(void)
+{
+ while (!timebase)
+ barrier();
+ __raw_spin_lock(&timebase_lock);
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ timebase = 0;
+ __raw_spin_unlock(&timebase_lock);
+}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1d154248cf4..e1e3059cf34 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -119,6 +119,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
*/
notrace void __init machine_init(unsigned long dt_ptr)
{
+ lockdep_init();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 65484b2200b..0b47de07302 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -68,7 +68,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
-static volatile unsigned int cpu_callin_map[NR_CPUS];
+/* Can't be static due to PowerMac hackery */
+volatile unsigned int cpu_callin_map[NR_CPUS];
int smt_enabled_at_boot = 1;
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 0362a891e54..acb74a17bbb 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -219,7 +219,7 @@ void udbg_init_pas_realmode(void)
#ifdef CONFIG_PPC_EARLY_DEBUG_44x
#include <platforms/44x/44x.h>
-static int udbg_44x_as1_flush(void)
+static void udbg_44x_as1_flush(void)
{
if (udbg_comport) {
while ((as1_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 2d2192e48de..3e68363405b 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
new file mode 100644
index 00000000000..c2186c74c85
--- /dev/null
+++ b/arch/powerpc/mm/highmem.c
@@ -0,0 +1,77 @@
+/*
+ * highmem.c: virtual kernel memory mappings for high memory
+ *
+ * PowerPC version, stolen from the i386 version.
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ * Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * up to 16 Terrabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ *
+ * Reworked for PowerPC by various contributors. Moved from
+ * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need
+ * it.
+ */
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+{
+ unsigned int idx;
+ unsigned long vaddr;
+
+ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ pagefault_disable();
+ if (!PageHighMem(page))
+ return page_address(page);
+
+ debug_kmap_atomic(type);
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(!pte_none(*(kmap_pte-idx)));
+#endif
+ __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
+ local_flush_tlb_page(NULL, vaddr);
+
+ return (void*) vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic_prot);
+
+void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+ if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
+ pagefault_enable();
+ return;
+ }
+
+ BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+
+ /*
+ * force other mappings to Oops if they'll try to access
+ * this pte without first remap it
+ */
+ pte_clear(&init_mm, vaddr, kmap_pte-idx);
+ local_flush_tlb_page(NULL, vaddr);
+#endif
+ pagefault_enable();
+}
+EXPORT_SYMBOL(kunmap_atomic);
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 42e09a9f77e..0362c88f47d 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/of_gpio.h>
+#include <linux/of_i2c.h>
#include <asm/machdep.h>
#include <asm/prom.h>
@@ -65,7 +66,6 @@ define_machine(warp) {
static u32 post_info;
-/* I am not sure this is the best place for this... */
static int __init warp_post_info(void)
{
struct device_node *np;
@@ -194,9 +194,9 @@ static int pika_setup_leds(void)
return 0;
}
-static void pika_setup_critical_temp(struct i2c_client *client)
+static void pika_setup_critical_temp(struct device_node *np,
+ struct i2c_client *client)
{
- struct device_node *np;
int irq, rc;
/* Do this before enabling critical temp interrupt since we
@@ -208,14 +208,7 @@ static void pika_setup_critical_temp(struct i2c_client *client)
i2c_smbus_write_byte_data(client, 2, 65); /* Thigh */
i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */
- np = of_find_compatible_node(NULL, NULL, "adi,ad7414");
- if (np == NULL) {
- printk(KERN_ERR __FILE__ ": Unable to find ad7414\n");
- return;
- }
-
irq = irq_of_parse_and_map(np, 0);
- of_node_put(np);
if (irq == NO_IRQ) {
printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n");
return;
@@ -244,32 +237,24 @@ static inline void pika_dtm_check_fan(void __iomem *fpga)
static int pika_dtm_thread(void __iomem *fpga)
{
- struct i2c_adapter *adap;
+ struct device_node *np;
struct i2c_client *client;
- /* We loop in case either driver was compiled as a module and
- * has not been insmoded yet.
- */
- while (!(adap = i2c_get_adapter(0))) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
- }
-
- while (1) {
- list_for_each_entry(client, &adap->clients, list)
- if (client->addr == 0x4a)
- goto found_it;
+ np = of_find_compatible_node(NULL, NULL, "adi,ad7414");
+ if (np == NULL)
+ return -ENOENT;
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
+ client = of_find_i2c_device_by_node(np);
+ if (client == NULL) {
+ of_node_put(np);
+ return -ENOENT;
}
-found_it:
- pika_setup_critical_temp(client);
+ pika_setup_critical_temp(np, client);
- i2c_put_adapter(adap);
+ of_node_put(np);
- printk(KERN_INFO "PIKA DTM thread running.\n");
+ printk(KERN_INFO "Warp DTM thread running.\n");
while (!kthread_should_stop()) {
int val;
@@ -291,7 +276,6 @@ found_it:
return 0;
}
-
static int __init pika_dtm_start(void)
{
struct task_struct *dtm_thread;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 77f90b35635..60ed9c067b1 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -285,6 +285,7 @@ static struct of_device_id mpc85xx_ids[] = {
{ .type = "qe", },
{ .compatible = "fsl,qe", },
{ .compatible = "gianfar", },
+ { .compatible = "fsl,rapidio-delta", },
{},
};
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index cc0b0db8a6f..62c592ede64 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -52,20 +52,19 @@ smp_85xx_kick_cpu(int nr)
pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr);
- local_irq_save(flags);
-
np = of_get_cpu_node(nr, NULL);
cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL);
if (cpu_rel_addr == NULL) {
printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
- local_irq_restore(flags);
return;
}
/* Map the spin table */
bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY);
+ local_irq_save(flags);
+
out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr);
out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start));
@@ -73,10 +72,10 @@ smp_85xx_kick_cpu(int nr)
while ((__secondary_hold_acknowledge != nr) && (++n < 1000))
mdelay(1);
- iounmap(bptr_vaddr);
-
local_irq_restore(flags);
+ iounmap(bptr_vaddr);
+
pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
}
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index d0e8443b12c..747d8fb3ab8 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -102,10 +102,11 @@ static struct of_device_id __initdata socrates_of_bus_ids[] = {
{},
};
-static void __init socrates_init(void)
+static int __init socrates_publish_devices(void)
{
- of_platform_bus_probe(NULL, socrates_of_bus_ids, NULL);
+ return of_platform_bus_probe(NULL, socrates_of_bus_ids, NULL);
}
+machine_device_initcall(socrates, socrates_publish_devices);
/*
* Called very early, device-tree isn't unflattened
@@ -124,7 +125,6 @@ define_machine(socrates) {
.name = "Socrates",
.probe = socrates_probe,
.setup_arch = socrates_setup_arch,
- .init = socrates_init,
.init_IRQ = socrates_pic_init,
.get_irq = mpic_get_irq,
.restart = fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index ee01532786e..1b426050a2f 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -32,7 +32,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
-#include <linux/of_platform.h>
/* A few bit definitions needed for fixups on some boards */
#define MPC85xx_L2CTL_L2E 0x80000000 /* L2 enable */
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 9046803c827..bc97fada48c 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -36,7 +36,6 @@
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/paca.h>
-#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
@@ -140,31 +139,6 @@ static void __devinit smp_cell_setup_cpu(int cpu)
mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
}
-static DEFINE_SPINLOCK(timebase_lock);
-static unsigned long timebase = 0;
-
-static void __devinit cell_give_timebase(void)
-{
- spin_lock(&timebase_lock);
- rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
- timebase = get_tb();
- spin_unlock(&timebase_lock);
-
- while (timebase)
- barrier();
- rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
-}
-
-static void __devinit cell_take_timebase(void)
-{
- while (!timebase)
- barrier();
- spin_lock(&timebase_lock);
- set_tb(timebase >> 32, timebase & 0xffffffff);
- timebase = 0;
- spin_unlock(&timebase_lock);
-}
-
static void __devinit smp_cell_kick_cpu(int nr)
{
BUG_ON(nr < 0 || nr >= NR_CPUS);
@@ -224,8 +198,8 @@ void __init smp_init_cell(void)
/* Non-lpar has additional take/give timebase */
if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
- smp_ops->give_timebase = cell_give_timebase;
- smp_ops->take_timebase = cell_take_timebase;
+ smp_ops->give_timebase = rtas_give_timebase;
+ smp_ops->take_timebase = rtas_take_timebase;
}
DBG(" <- smp_init_cell()\n");
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index 10a4a4d063b..02cafecc90e 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -26,7 +26,6 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/smp.h>
-#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/mpic.h>
#include <asm/rtas.h>
@@ -42,40 +41,12 @@ static void __devinit smp_chrp_setup_cpu(int cpu_nr)
mpic_setup_this_cpu();
}
-static DEFINE_SPINLOCK(timebase_lock);
-static unsigned int timebase_upper = 0, timebase_lower = 0;
-
-void __devinit smp_chrp_give_timebase(void)
-{
- spin_lock(&timebase_lock);
- rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
- timebase_upper = get_tbu();
- timebase_lower = get_tbl();
- spin_unlock(&timebase_lock);
-
- while (timebase_upper || timebase_lower)
- barrier();
- rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
-}
-
-void __devinit smp_chrp_take_timebase(void)
-{
- while (!(timebase_upper || timebase_lower))
- barrier();
- spin_lock(&timebase_lock);
- set_tb(timebase_upper, timebase_lower);
- timebase_upper = 0;
- timebase_lower = 0;
- spin_unlock(&timebase_lock);
- printk("CPU %i taken timebase\n", smp_processor_id());
-}
-
/* CHRP with openpic */
struct smp_ops_t chrp_smp_ops = {
.message_pass = smp_mpic_message_pass,
.probe = smp_mpic_probe,
.kick_cpu = smp_chrp_kick_cpu,
.setup_cpu = smp_chrp_setup_cpu,
- .give_timebase = smp_chrp_give_timebase,
- .take_timebase = smp_chrp_take_timebase,
+ .give_timebase = rtas_give_timebase,
+ .take_timebase = rtas_take_timebase,
};
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 153051eb6d9..a4619347aa7 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -71,20 +71,25 @@ static void pas_restart(char *cmd)
}
#ifdef CONFIG_SMP
-static DEFINE_SPINLOCK(timebase_lock);
+static raw_spinlock_t timebase_lock;
static unsigned long timebase;
static void __devinit pas_give_timebase(void)
{
- spin_lock(&timebase_lock);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+ __raw_spin_lock(&timebase_lock);
mtspr(SPRN_TBCTL, TBCTL_FREEZE);
isync();
timebase = get_tb();
- spin_unlock(&timebase_lock);
+ __raw_spin_unlock(&timebase_lock);
while (timebase)
barrier();
mtspr(SPRN_TBCTL, TBCTL_RESTART);
+ local_irq_restore(flags);
}
static void __devinit pas_take_timebase(void)
@@ -92,10 +97,10 @@ static void __devinit pas_take_timebase(void)
while (!timebase)
smp_rmb();
- spin_lock(&timebase_lock);
+ __raw_spin_lock(&timebase_lock);
set_tb(timebase >> 32, timebase & 0xffffffff);
timebase = 0;
- spin_unlock(&timebase_lock);
+ __raw_spin_unlock(&timebase_lock);
}
struct smp_ops_t pas_smp_ops = {
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 86f69a4eb49..c2052265636 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -103,11 +103,6 @@ unsigned long smu_cmdbuf_abs;
EXPORT_SYMBOL(smu_cmdbuf_abs);
#endif
-#ifdef CONFIG_SMP
-extern struct smp_ops_t psurge_smp_ops;
-extern struct smp_ops_t core99_smp_ops;
-#endif /* CONFIG_SMP */
-
static void pmac_show_cpuinfo(struct seq_file *m)
{
struct device_node *np;
@@ -341,34 +336,6 @@ static void __init pmac_setup_arch(void)
ROOT_DEV = DEFAULT_ROOT_DEVICE;
#endif
-#ifdef CONFIG_SMP
- /* Check for Core99 */
- ic = of_find_node_by_name(NULL, "uni-n");
- if (!ic)
- ic = of_find_node_by_name(NULL, "u3");
- if (!ic)
- ic = of_find_node_by_name(NULL, "u4");
- if (ic) {
- of_node_put(ic);
- smp_ops = &core99_smp_ops;
- }
-#ifdef CONFIG_PPC32
- else {
- /*
- * We have to set bits in cpu_possible_map here since the
- * secondary CPU(s) aren't in the device tree, and
- * setup_per_cpu_areas only allocates per-cpu data for
- * CPUs in the cpu_possible_map.
- */
- int cpu;
-
- for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
- cpu_set(cpu, cpu_possible_map);
- smp_ops = &psurge_smp_ops;
- }
-#endif
-#endif /* CONFIG_SMP */
-
#ifdef CONFIG_ADB
if (strstr(cmd_line, "adb_sync")) {
extern int __adb_probe_sync;
@@ -512,6 +479,14 @@ static void __init pmac_init_early(void)
#ifdef CONFIG_PPC64
iommu_init_early_dart();
#endif
+
+ /* SMP Init has to be done early as we need to patch up
+ * cpu_possible_map before interrupt stacks are allocated
+ * or kaboom...
+ */
+#ifdef CONFIG_SMP
+ pmac_setup_smp();
+#endif
}
static int __init pmac_declare_of_platform_devices(void)
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index cf1dbe75889..6d4da7b46b4 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -64,10 +64,11 @@
extern void __secondary_start_pmac_0(void);
extern int pmac_pfunc_base_install(void);
-#ifdef CONFIG_PPC32
+static void (*pmac_tb_freeze)(int freeze);
+static u64 timebase;
+static int tb_req;
-/* Sync flag for HW tb sync */
-static volatile int sec_tb_reset = 0;
+#ifdef CONFIG_PPC32
/*
* Powersurge (old powermac SMP) support.
@@ -294,6 +295,9 @@ static int __init smp_psurge_probe(void)
psurge_quad_init();
/* All released cards using this HW design have 4 CPUs */
ncpus = 4;
+ /* No sure how timebase sync works on those, let's use SW */
+ smp_ops->give_timebase = smp_generic_give_timebase;
+ smp_ops->take_timebase = smp_generic_take_timebase;
} else {
iounmap(quad_base);
if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) {
@@ -308,18 +312,15 @@ static int __init smp_psurge_probe(void)
psurge_start = ioremap(PSURGE_START, 4);
psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
- /*
- * This is necessary because OF doesn't know about the
+ /* This is necessary because OF doesn't know about the
* secondary cpu(s), and thus there aren't nodes in the
* device tree for them, and smp_setup_cpu_maps hasn't
- * set their bits in cpu_possible_map and cpu_present_map.
+ * set their bits in cpu_present_map.
*/
if (ncpus > NR_CPUS)
ncpus = NR_CPUS;
- for (i = 1; i < ncpus ; ++i) {
+ for (i = 1; i < ncpus ; ++i)
cpu_set(i, cpu_present_map);
- set_hard_smp_processor_id(i, i);
- }
if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
@@ -329,8 +330,14 @@ static int __init smp_psurge_probe(void)
static void __init smp_psurge_kick_cpu(int nr)
{
unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
- unsigned long a;
- int i;
+ unsigned long a, flags;
+ int i, j;
+
+ /* Defining this here is evil ... but I prefer hiding that
+ * crap to avoid giving people ideas that they can do the
+ * same.
+ */
+ extern volatile unsigned int cpu_callin_map[NR_CPUS];
/* may need to flush here if secondary bats aren't setup */
for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32)
@@ -339,47 +346,52 @@ static void __init smp_psurge_kick_cpu(int nr)
if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353);
+ /* This is going to freeze the timeebase, we disable interrupts */
+ local_irq_save(flags);
+
out_be32(psurge_start, start);
mb();
psurge_set_ipi(nr);
+
/*
* We can't use udelay here because the timebase is now frozen.
*/
for (i = 0; i < 2000; ++i)
- barrier();
+ asm volatile("nop" : : : "memory");
psurge_clr_ipi(nr);
- if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
-}
-
-/*
- * With the dual-cpu powersurge board, the decrementers and timebases
- * of both cpus are frozen after the secondary cpu is started up,
- * until we give the secondary cpu another interrupt. This routine
- * uses this to get the timebases synchronized.
- * -- paulus.
- */
-static void __init psurge_dual_sync_tb(int cpu_nr)
-{
- int t;
-
- set_dec(tb_ticks_per_jiffy);
- /* XXX fixme */
- set_tb(0, 0);
-
- if (cpu_nr > 0) {
+ /*
+ * Also, because the timebase is frozen, we must not return to the
+ * caller which will try to do udelay's etc... Instead, we wait -here-
+ * for the CPU to callin.
+ */
+ for (i = 0; i < 100000 && !cpu_callin_map[nr]; ++i) {
+ for (j = 1; j < 10000; j++)
+ asm volatile("nop" : : : "memory");
+ asm volatile("sync" : : : "memory");
+ }
+ if (!cpu_callin_map[nr])
+ goto stuck;
+
+ /* And we do the TB sync here too for standard dual CPU cards */
+ if (psurge_type == PSURGE_DUAL) {
+ while(!tb_req)
+ barrier();
+ tb_req = 0;
+ mb();
+ timebase = get_tb();
+ mb();
+ while (timebase)
+ barrier();
mb();
- sec_tb_reset = 1;
- return;
}
+ stuck:
+ /* now interrupt the secondary, restarting both TBs */
+ if (psurge_type == PSURGE_DUAL)
+ psurge_set_ipi(1);
- /* wait for the secondary to have reset its TB before proceeding */
- for (t = 10000000; t > 0 && !sec_tb_reset; --t)
- ;
-
- /* now interrupt the secondary, starting both TBs */
- psurge_set_ipi(1);
+ if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
}
static struct irqaction psurge_irqaction = {
@@ -390,36 +402,35 @@ static struct irqaction psurge_irqaction = {
static void __init smp_psurge_setup_cpu(int cpu_nr)
{
+ if (cpu_nr != 0)
+ return;
- if (cpu_nr == 0) {
- /* If we failed to start the second CPU, we should still
- * send it an IPI to start the timebase & DEC or we might
- * have them stuck.
- */
- if (num_online_cpus() < 2) {
- if (psurge_type == PSURGE_DUAL)
- psurge_set_ipi(1);
- return;
- }
- /* reset the entry point so if we get another intr we won't
- * try to startup again */
- out_be32(psurge_start, 0x100);
- if (setup_irq(30, &psurge_irqaction))
- printk(KERN_ERR "Couldn't get primary IPI interrupt");
- }
-
- if (psurge_type == PSURGE_DUAL)
- psurge_dual_sync_tb(cpu_nr);
+ /* reset the entry point so if we get another intr we won't
+ * try to startup again */
+ out_be32(psurge_start, 0x100);
+ if (setup_irq(30, &psurge_irqaction))
+ printk(KERN_ERR "Couldn't get primary IPI interrupt");
}
void __init smp_psurge_take_timebase(void)
{
- /* Dummy implementation */
+ if (psurge_type != PSURGE_DUAL)
+ return;
+
+ tb_req = 1;
+ mb();
+ while (!timebase)
+ barrier();
+ mb();
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ timebase = 0;
+ mb();
+ set_dec(tb_ticks_per_jiffy/2);
}
void __init smp_psurge_give_timebase(void)
{
- /* Dummy implementation */
+ /* Nothing to do here */
}
/* PowerSurge-style Macs */
@@ -437,9 +448,6 @@ struct smp_ops_t psurge_smp_ops = {
* Core 99 and later support
*/
-static void (*pmac_tb_freeze)(int freeze);
-static u64 timebase;
-static int tb_req;
static void smp_core99_give_timebase(void)
{
@@ -478,7 +486,6 @@ static void __devinit smp_core99_take_timebase(void)
set_tb(timebase >> 32, timebase & 0xffffffff);
timebase = 0;
mb();
- set_dec(tb_ticks_per_jiffy/2);
local_irq_restore(flags);
}
@@ -920,3 +927,34 @@ struct smp_ops_t core99_smp_ops = {
# endif
#endif
};
+
+void __init pmac_setup_smp(void)
+{
+ struct device_node *np;
+
+ /* Check for Core99 */
+ np = of_find_node_by_name(NULL, "uni-n");
+ if (!np)
+ np = of_find_node_by_name(NULL, "u3");
+ if (!np)
+ np = of_find_node_by_name(NULL, "u4");
+ if (np) {
+ of_node_put(np);
+ smp_ops = &core99_smp_ops;
+ }
+#ifdef CONFIG_PPC32
+ else {
+ /* We have to set bits in cpu_possible_map here since the
+ * secondary CPU(s) aren't in the device tree. Various
+ * things won't be initialized for CPUs not in the possible
+ * map, so we really need to fix it up here.
+ */
+ int cpu;
+
+ for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
+ cpu_set(cpu, cpu_possible_map);
+ smp_ops = &psurge_smp_ops;
+ }
+#endif /* CONFIG_PPC32 */
+}
+
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 1a231c389ba..1f8f6cfb94f 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -35,7 +35,6 @@
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/paca.h>
-#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
@@ -118,31 +117,6 @@ static void __devinit smp_xics_setup_cpu(int cpu)
}
#endif /* CONFIG_XICS */
-static DEFINE_SPINLOCK(timebase_lock);
-static unsigned long timebase = 0;
-
-static void __devinit pSeries_give_timebase(void)
-{
- spin_lock(&timebase_lock);
- rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
- timebase = get_tb();
- spin_unlock(&timebase_lock);
-
- while (timebase)
- barrier();
- rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
-}
-
-static void __devinit pSeries_take_timebase(void)
-{
- while (!timebase)
- barrier();
- spin_lock(&timebase_lock);
- set_tb(timebase >> 32, timebase & 0xffffffff);
- timebase = 0;
- spin_unlock(&timebase_lock);
-}
-
static void __devinit smp_pSeries_kick_cpu(int nr)
{
BUG_ON(nr < 0 || nr >= NR_CPUS);
@@ -209,8 +183,8 @@ static void __init smp_init_pseries(void)
/* Non-lpar has additional take/give timebase */
if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
- smp_ops->give_timebase = pSeries_give_timebase;
- smp_ops->take_timebase = pSeries_take_timebase;
+ smp_ops->give_timebase = rtas_give_timebase;
+ smp_ops->take_timebase = rtas_take_timebase;
}
pr_debug(" <- smp_init_pSeries()\n");
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 9c3af504549..d46de1f0f3e 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -279,28 +279,29 @@ static void _mpic_map_mmio(struct mpic *mpic, phys_addr_t phys_addr,
}
#ifdef CONFIG_PPC_DCR
-static void _mpic_map_dcr(struct mpic *mpic, struct mpic_reg_bank *rb,
+static void _mpic_map_dcr(struct mpic *mpic, struct device_node *node,
+ struct mpic_reg_bank *rb,
unsigned int offset, unsigned int size)
{
const u32 *dbasep;
- dbasep = of_get_property(mpic->irqhost->of_node, "dcr-reg", NULL);
+ dbasep = of_get_property(node, "dcr-reg", NULL);
- rb->dhost = dcr_map(mpic->irqhost->of_node, *dbasep + offset, size);
+ rb->dhost = dcr_map(node, *dbasep + offset, size);
BUG_ON(!DCR_MAP_OK(rb->dhost));
}
-static inline void mpic_map(struct mpic *mpic, phys_addr_t phys_addr,
- struct mpic_reg_bank *rb, unsigned int offset,
- unsigned int size)
+static inline void mpic_map(struct mpic *mpic, struct device_node *node,
+ phys_addr_t phys_addr, struct mpic_reg_bank *rb,
+ unsigned int offset, unsigned int size)
{
if (mpic->flags & MPIC_USES_DCR)
- _mpic_map_dcr(mpic, rb, offset, size);
+ _mpic_map_dcr(mpic, node, rb, offset, size);
else
_mpic_map_mmio(mpic, phys_addr, rb, offset, size);
}
#else /* CONFIG_PPC_DCR */
-#define mpic_map(m,p,b,o,s) _mpic_map_mmio(m,p,b,o,s)
+#define mpic_map(m,n,p,b,o,s) _mpic_map_mmio(m,p,b,o,s)
#endif /* !CONFIG_PPC_DCR */
@@ -1052,11 +1053,10 @@ struct mpic * __init mpic_alloc(struct device_node *node,
int intvec_top;
u64 paddr = phys_addr;
- mpic = alloc_bootmem(sizeof(struct mpic));
+ mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL);
if (mpic == NULL)
return NULL;
-
- memset(mpic, 0, sizeof(struct mpic));
+
mpic->name = name;
mpic->hc_irq = mpic_irq_chip;
@@ -1152,8 +1152,8 @@ struct mpic * __init mpic_alloc(struct device_node *node,
}
/* Map the global registers */
- mpic_map(mpic, paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
- mpic_map(mpic, paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
+ mpic_map(mpic, node, paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
+ mpic_map(mpic, node, paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
/* Reset */
if (flags & MPIC_WANTS_RESET) {
@@ -1194,7 +1194,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
/* Map the per-CPU registers */
for (i = 0; i < mpic->num_cpus; i++) {
- mpic_map(mpic, paddr, &mpic->cpuregs[i],
+ mpic_map(mpic, node, paddr, &mpic->cpuregs[i],
MPIC_INFO(CPU_BASE) + i * MPIC_INFO(CPU_STRIDE),
0x1000);
}
@@ -1202,7 +1202,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
/* Initialize main ISU if none provided */
if (mpic->isu_size == 0) {
mpic->isu_size = mpic->num_sources;
- mpic_map(mpic, paddr, &mpic->isus[0],
+ mpic_map(mpic, node, paddr, &mpic->isus[0],
MPIC_INFO(IRQ_BASE), MPIC_INFO(IRQ_STRIDE) * mpic->isu_size);
}
mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
@@ -1256,8 +1256,10 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
BUG_ON(isu_num >= MPIC_MAX_ISU);
- mpic_map(mpic, paddr, &mpic->isus[isu_num], 0,
+ mpic_map(mpic, mpic->irqhost->of_node,
+ paddr, &mpic->isus[isu_num], 0,
MPIC_INFO(IRQ_STRIDE) * mpic->isu_size);
+
if ((isu_first + mpic->isu_size) > mpic->num_sources)
mpic->num_sources = isu_first + mpic->isu_size;
}
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index b28b0e512d6..237e3654f48 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -112,6 +112,7 @@ int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input)
{
unsigned long flags;
u8 mcn_shift = 0, dev_shift = 0;
+ u32 ret;
spin_lock_irqsave(&qe_lock, flags);
if (cmd == QE_RESET) {
@@ -139,11 +140,13 @@ int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input)
}
/* wait for the QE_CR_FLG to clear */
- while(in_be32(&qe_immr->cp.cecr) & QE_CR_FLG)
- cpu_relax();
+ ret = spin_event_timeout((in_be32(&qe_immr->cp.cecr) & QE_CR_FLG) == 0,
+ 100, 0);
+ /* On timeout (e.g. failure), the expression will be false (ret == 0),
+ otherwise it will be true (ret == 1). */
spin_unlock_irqrestore(&qe_lock, flags);
- return 0;
+ return ret == 1;
}
EXPORT_SYMBOL(qe_issue_cmd);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a27d0d5a6f8..1cd02f6073a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -99,7 +99,9 @@ struct kvm_s390_sie_block {
__u8 reservedd0[48]; /* 0x00d0 */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
- __u8 reserved188[120]; /* 0x0188 */
+ __u8 reserved188[24]; /* 0x0188 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[92]; /* 0x01a4 */
} __attribute__((packed));
struct kvm_vcpu_stat {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c18b21d6991..90d9d1ba258 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -25,6 +25,7 @@
#include <asm/lowcore.h>
#include <asm/pgtable.h>
#include <asm/nmi.h>
+#include <asm/system.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -69,6 +70,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+static unsigned long long *facilities;
/* Section: not file related */
void kvm_arch_hardware_enable(void *garbage)
@@ -288,6 +290,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U;
+ vcpu->arch.sie_block->fac = (int) (long) facilities;
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
@@ -739,11 +742,29 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
static int __init kvm_s390_init(void)
{
- return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+ int ret;
+ ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+ if (ret)
+ return ret;
+
+ /*
+ * guests can ask for up to 255+1 double words, we need a full page
+ * to hold the maximum amount of facilites. On the other hand, we
+ * only set facilities that are known to work in KVM.
+ */
+ facilities = (unsigned long long *) get_zeroed_page(GFP_DMA);
+ if (!facilities) {
+ kvm_exit();
+ return -ENOMEM;
+ }
+ stfle(facilities, 1);
+ facilities[0] &= 0xff00fff3f0700000ULL;
+ return 0;
}
static void __exit kvm_s390_exit(void)
{
+ free_page((unsigned long) facilities);
kvm_exit();
}
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 93ecd06e1a7..d426aac8095 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -158,7 +158,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_stfl++;
/* only pass the facility bits, which we can handle */
- facility_list &= 0xfe00fff3;
+ facility_list &= 0xff00fff3;
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
&facility_list, sizeof(facility_list));
diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
index 96041a8d39e..1ff0fd92475 100644
--- a/arch/sparc/boot/Makefile
+++ b/arch/sparc/boot/Makefile
@@ -15,7 +15,7 @@ quiet_cmd_elftoaout = ELFTOAOUT $@
ifeq ($(CONFIG_SPARC32),y)
quiet_cmd_piggy = PIGGY $@
- cmd_piggy = $(obj)/piggyback_32 $@ $(obj)/System.map $(ROOT_IMG)
+ cmd_piggy = $(obj)/piggyback_32 $@ System.map $(ROOT_IMG)
quiet_cmd_btfix = BTFIX $@
cmd_btfix = $(OBJDUMP) -x vmlinux | $(obj)/btfixupprep > $@
quiet_cmd_sysmap = SYSMAP $(obj)/System.map
@@ -58,7 +58,7 @@ $(obj)/image: $(obj)/btfix.o FORCE
$(obj)/zImage: $(obj)/image
$(call if_changed,strip)
-$(obj)/tftpboot.img: $(obj)/piggyback $(obj)/System.map $(obj)/image FORCE
+$(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback_32 System.map $(ROOT_IMG) FORCE
$(call if_changed,elftoaout)
$(call if_changed,piggy)
@@ -79,7 +79,7 @@ $(obj)/image: vmlinux FORCE
$(call if_changed,strip)
@echo ' kernel: $@ is ready'
-$(obj)/tftpboot.img: vmlinux $(obj)/piggyback_64 System.map $(ROOT_IMG) FORCE
+$(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback_64 System.map $(ROOT_IMG) FORCE
$(call if_changed,elftoaout)
$(call if_changed,piggy)
@echo ' kernel: $@ is ready'
diff --git a/arch/sparc/boot/piggyback_32.c b/arch/sparc/boot/piggyback_32.c
index c9f500c1a8b..e8dc9adfcd6 100644
--- a/arch/sparc/boot/piggyback_32.c
+++ b/arch/sparc/boot/piggyback_32.c
@@ -70,7 +70,7 @@ void die(char *str)
int main(int argc,char **argv)
{
static char aout_magic[] = { 0x01, 0x03, 0x01, 0x07 };
- unsigned char buffer[1024], *q, *r;
+ char buffer[1024], *q, *r;
unsigned int i, j, k, start, end, offset;
FILE *map;
struct stat s;
@@ -84,7 +84,7 @@ int main(int argc,char **argv)
while (fgets (buffer, 1024, map)) {
if (!strcmp (buffer + 8, " T start\n") || !strcmp (buffer + 16, " T start\n"))
start = strtoul (buffer, NULL, 16);
- else if (!strcmp (buffer + 8, " A end\n") || !strcmp (buffer + 16, " A end\n"))
+ else if (!strcmp (buffer + 8, " A _end\n") || !strcmp (buffer + 16, " A _end\n"))
end = strtoul (buffer, NULL, 16);
}
fclose (map);
diff --git a/arch/sparc/boot/piggyback_64.c b/arch/sparc/boot/piggyback_64.c
index de364bfed0b..c63fd1b6bdd 100644
--- a/arch/sparc/boot/piggyback_64.c
+++ b/arch/sparc/boot/piggyback_64.c
@@ -46,6 +46,7 @@ int main(int argc,char **argv)
struct stat s;
int image, tail;
+ start = end = 0;
if (stat (argv[3], &s) < 0) die (argv[3]);
map = fopen (argv[2], "r");
if (!map) die(argv[2]);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index bd075054942..f0ee7905540 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -20,7 +20,6 @@
#include <linux/delay.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/bootmem.h>
#include <linux/irq.h>
#include <asm/ptrace.h>
@@ -914,25 +913,19 @@ void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu)
tb->nonresum_qmask);
}
-static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
-{
- unsigned long size = PAGE_ALIGN(qmask + 1);
- void *p = __alloc_bootmem(size, size, 0);
- if (!p) {
- prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
- prom_halt();
- }
-
- *pa_ptr = __pa(p);
-}
-
-static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
+/* Each queue region must be a power of 2 multiple of 64 bytes in
+ * size. The base real address must be aligned to the size of the
+ * region. Thus, an 8KB queue must be 8KB aligned, for example.
+ */
+static void __init alloc_one_queue(unsigned long *pa_ptr, unsigned long qmask)
{
unsigned long size = PAGE_ALIGN(qmask + 1);
- void *p = __alloc_bootmem(size, size, 0);
+ unsigned long order = get_order(size);
+ unsigned long p;
+ p = __get_free_pages(GFP_KERNEL, order);
if (!p) {
- prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
+ prom_printf("SUN4V: Error, cannot allocate queue.\n");
prom_halt();
}
@@ -942,11 +935,11 @@ static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
{
#ifdef CONFIG_SMP
- void *page;
+ unsigned long page;
BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
- page = alloc_bootmem_pages(PAGE_SIZE);
+ page = get_zeroed_page(GFP_KERNEL);
if (!page) {
prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
prom_halt();
@@ -965,13 +958,13 @@ static void __init sun4v_init_mondo_queues(void)
for_each_possible_cpu(cpu) {
struct trap_per_cpu *tb = &trap_block[cpu];
- alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
- alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
- alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
- alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
- alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
- alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
- tb->nonresum_qmask);
+ alloc_one_queue(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+ alloc_one_queue(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+ alloc_one_queue(&tb->resum_mondo_pa, tb->resum_qmask);
+ alloc_one_queue(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+ alloc_one_queue(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+ alloc_one_queue(&tb->nonresum_kernel_buf_pa,
+ tb->nonresum_qmask);
}
}
@@ -999,7 +992,7 @@ void __init init_IRQ(void)
kill_prom_timer();
size = sizeof(struct ino_bucket) * NUM_IVECS;
- ivector_table = alloc_bootmem(size);
+ ivector_table = kzalloc(size, GFP_KERNEL);
if (!ivector_table) {
prom_printf("Fatal error, cannot allocate ivector_table\n");
prom_halt();
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 5ec17563142..dd2aadc14af 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -30,7 +30,6 @@ static void slip_init(struct net_device *dev, void *data)
slip_proto_init(&spri->slip);
- dev->init = NULL;
dev->hard_header_len = 0;
dev->header_ops = NULL;
dev->addr_len = 0;
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index f15a6e7654f..e376284f0fb 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -32,7 +32,6 @@ void slirp_init(struct net_device *dev, void *data)
slip_proto_init(&spri->slip);
- dev->init = NULL;
dev->hard_header_len = 0;
dev->header_ops = NULL;
dev->addr_len = 0;
diff --git a/arch/um/include/asm/dma-mapping.h b/arch/um/include/asm/dma-mapping.h
index 90fc708b320..378de4bbf49 100644
--- a/arch/um/include/asm/dma-mapping.h
+++ b/arch/um/include/asm/dma-mapping.h
@@ -79,14 +79,14 @@ dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
}
static inline void
-dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction)
{
BUG();
}
static inline void
-dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
enum dma_data_direction direction)
{
BUG();
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 418e632d4a8..7a1065958ba 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,7 +8,7 @@
#ifdef __KERNEL__
-#include <asm/page_types.h>
+#include <asm/pgtable_types.h>
/* Physical address where kernel should be loaded. */
#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
@@ -16,10 +16,10 @@
& ~(CONFIG_PHYSICAL_ALIGN - 1))
/* Minimum kernel alignment, as a power of two */
-#ifdef CONFIG_x86_64
+#ifdef CONFIG_X86_64
#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
#else
-#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1)
+#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER)
#endif
#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 02ecb30982a..103f1ddb0d8 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -42,6 +42,7 @@
#else /* ...!ASSEMBLY */
+#include <linux/kernel.h>
#include <linux/stringify.h>
#ifdef CONFIG_SMP
@@ -155,6 +156,15 @@ do { \
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+void *pcpu_lpage_remapped(void *kaddr);
+#else
+static inline void *pcpu_lpage_remapped(void *kaddr)
+{
+ return NULL;
+}
+#endif
+
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e5b27d8f1b4..28e5f595604 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_HT
unsigned bits;
+ int cpu = smp_processor_id();
bits = c->x86_coreid_bits;
-
/* Low order bits define the core id (index of core in socket) */
c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
/* Convert the initial APIC ID into the socket ID */
c->phys_proc_id = c->initial_apicid >> bits;
+ /* use socket ID also for last level cache */
+ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
#endif
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6b26d4deada..f1961c07af9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -848,9 +848,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
numa_add_cpu(smp_processor_id());
#endif
-
- /* Cap the iomem address space to what is addressable on all CPUs */
- iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1;
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 284d1de968b..af425b83202 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1117,7 +1117,7 @@ static void mcheck_timer(unsigned long data)
*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
t->expires = jiffies + *n;
- add_timer(t);
+ add_timer_on(t, smp_processor_id());
}
static void mce_do_trigger(struct work_struct *work)
@@ -1321,7 +1321,7 @@ static void mce_init_timer(void)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
- add_timer(t);
+ add_timer_on(t, smp_processor_id());
}
/*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 95ea5fa7d44..c8405718a4c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,6 +22,7 @@
#include "dumpstack.h"
int panic_on_unrecovered_nmi;
+int panic_on_io_nmi;
unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index be5ae80f897..de2cab13284 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align)
return ret;
}
+#ifdef CONFIG_X86_64
+static void __init init_gbpages(void)
+{
+ if (direct_gbpages && cpu_has_gbpages)
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ else
+ direct_gbpages = 0;
+}
+#else
+static inline void init_gbpages(void)
+{
+}
+#endif
+
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
@@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p)
reserve_brk();
+ init_gbpages();
+
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9c3f0823e6a..29a3eef7cf4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
}
/*
- * Remap allocator
+ * Large page remap allocator
*
* This allocator uses PMD page as unit. A PMD page is allocated for
* each cpu and each is remapped into vmalloc area using PMD mapping.
@@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
* better than only using 4k mappings while still being NUMA friendly.
*/
#ifdef CONFIG_NEED_MULTIPLE_NODES
-static size_t pcpur_size __initdata;
-static void **pcpur_ptrs __initdata;
+struct pcpul_ent {
+ unsigned int cpu;
+ void *ptr;
+};
+
+static size_t pcpul_size;
+static struct pcpul_ent *pcpul_map;
+static struct vm_struct pcpul_vm;
-static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
+static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
- if (off >= pcpur_size)
+ if (off >= pcpul_size)
return NULL;
- return virt_to_page(pcpur_ptrs[cpu] + off);
+ return virt_to_page(pcpul_map[cpu].ptr + off);
}
-static ssize_t __init setup_pcpu_remap(size_t static_size)
+static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
- static struct vm_struct vm;
- size_t ptrs_size, dyn_size;
+ size_t map_size, dyn_size;
unsigned int cpu;
+ int i, j;
ssize_t ret;
- /*
- * If large page isn't supported, there's no benefit in doing
- * this. Also, on non-NUMA, embedding is better.
- *
- * NOTE: disabled for now.
- */
- if (true || !cpu_has_pse || !pcpu_need_numa())
+ if (!chosen) {
+ size_t vm_size = VMALLOC_END - VMALLOC_START;
+ size_t tot_size = num_possible_cpus() * PMD_SIZE;
+
+ /* on non-NUMA, embedding is better */
+ if (!pcpu_need_numa())
+ return -EINVAL;
+
+ /* don't consume more than 20% of vmalloc area */
+ if (tot_size > vm_size / 5) {
+ pr_info("PERCPU: too large chunk size %zuMB for "
+ "large page remap\n", tot_size >> 20);
+ return -EINVAL;
+ }
+ }
+
+ /* need PSE */
+ if (!cpu_has_pse) {
+ pr_warning("PERCPU: lpage allocator requires PSE\n");
return -EINVAL;
+ }
/*
* Currently supports only single page. Supporting multiple
* pages won't be too difficult if it ever becomes necessary.
*/
- pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+ pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE);
- if (pcpur_size > PMD_SIZE) {
+ if (pcpul_size > PMD_SIZE) {
pr_warning("PERCPU: static data is larger than large page, "
"can't use large page\n");
return -EINVAL;
}
- dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
+ dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
/* allocate pointer array and alloc large pages */
- ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
- pcpur_ptrs = alloc_bootmem(ptrs_size);
+ map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
+ pcpul_map = alloc_bootmem(map_size);
for_each_possible_cpu(cpu) {
- pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE);
- if (!pcpur_ptrs[cpu])
+ pcpul_map[cpu].cpu = cpu;
+ pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
+ PMD_SIZE);
+ if (!pcpul_map[cpu].ptr) {
+ pr_warning("PERCPU: failed to allocate large page "
+ "for cpu%u\n", cpu);
goto enomem;
+ }
/*
- * Only use pcpur_size bytes and give back the rest.
+ * Only use pcpul_size bytes and give back the rest.
*
* Ingo: The 2MB up-rounding bootmem is needed to make
* sure the partial 2MB page is still fully RAM - it's
* not well-specified to have a PAT-incompatible area
* (unmapped RAM, device memory, etc.) in that hole.
*/
- free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
- PMD_SIZE - pcpur_size);
+ free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
+ PMD_SIZE - pcpul_size);
- memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
+ memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
}
/* allocate address and map */
- vm.flags = VM_ALLOC;
- vm.size = num_possible_cpus() * PMD_SIZE;
- vm_area_register_early(&vm, PMD_SIZE);
+ pcpul_vm.flags = VM_ALLOC;
+ pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
+ vm_area_register_early(&pcpul_vm, PMD_SIZE);
for_each_possible_cpu(cpu) {
- pmd_t *pmd;
+ pmd_t *pmd, pmd_v;
- pmd = populate_extra_pmd((unsigned long)vm.addr
- + cpu * PMD_SIZE);
- set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])),
- PAGE_KERNEL_LARGE));
+ pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
+ cpu * PMD_SIZE);
+ pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
+ PAGE_KERNEL_LARGE);
+ set_pmd(pmd, pmd_v);
}
/* we're ready, commit */
pr_info("PERCPU: Remapped at %p with large pages, static data "
- "%zu bytes\n", vm.addr, static_size);
+ "%zu bytes\n", pcpul_vm.addr, static_size);
- ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+ ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
- PMD_SIZE, vm.addr, NULL);
- goto out_free_ar;
+ PMD_SIZE, pcpul_vm.addr, NULL);
+
+ /* sort pcpul_map array for pcpu_lpage_remapped() */
+ for (i = 0; i < num_possible_cpus() - 1; i++)
+ for (j = i + 1; j < num_possible_cpus(); j++)
+ if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
+ struct pcpul_ent tmp = pcpul_map[i];
+ pcpul_map[i] = pcpul_map[j];
+ pcpul_map[j] = tmp;
+ }
+
+ return ret;
enomem:
for_each_possible_cpu(cpu)
- if (pcpur_ptrs[cpu])
- free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE);
- ret = -ENOMEM;
-out_free_ar:
- free_bootmem(__pa(pcpur_ptrs), ptrs_size);
- return ret;
+ if (pcpul_map[cpu].ptr)
+ free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
+ free_bootmem(__pa(pcpul_map), map_size);
+ return -ENOMEM;
+}
+
+/**
+ * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
+ * @kaddr: the kernel address in question
+ *
+ * Determine whether @kaddr falls in the pcpul recycled area. This is
+ * used by pageattr to detect VM aliases and break up the pcpu PMD
+ * mapping such that the same physical page is not mapped under
+ * different attributes.
+ *
+ * The recycled area is always at the tail of a partially used PMD
+ * page.
+ *
+ * RETURNS:
+ * Address of corresponding remapped pcpu address if match is found;
+ * otherwise, NULL.
+ */
+void *pcpu_lpage_remapped(void *kaddr)
+{
+ void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
+ unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
+ int left = 0, right = num_possible_cpus() - 1;
+ int pos;
+
+ /* pcpul in use at all? */
+ if (!pcpul_map)
+ return NULL;
+
+ /* okay, perform binary search */
+ while (left <= right) {
+ pos = (left + right) / 2;
+
+ if (pcpul_map[pos].ptr < pmd_addr)
+ left = pos + 1;
+ else if (pcpul_map[pos].ptr > pmd_addr)
+ right = pos - 1;
+ else {
+ /* it shouldn't be in the area for the first chunk */
+ WARN_ON(offset < pcpul_size);
+
+ return pcpul_vm.addr +
+ pcpul_map[pos].cpu * PMD_SIZE + offset;
+ }
+ }
+
+ return NULL;
}
#else
-static ssize_t __init setup_pcpu_remap(size_t static_size)
+static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
return -EINVAL;
}
@@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
* mapping so that it can use PMD mapping without additional TLB
* pressure.
*/
-static ssize_t __init setup_pcpu_embed(size_t static_size)
+static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
{
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
@@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
* this. Also, embedding allocation doesn't play well with
* NUMA.
*/
- if (!cpu_has_pse || pcpu_need_numa())
+ if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
return -EINVAL;
return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
@@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
void *ptr;
ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
- if (!ptr)
+ if (!ptr) {
+ pr_warning("PERCPU: failed to allocate "
+ "4k page for cpu%u\n", cpu);
goto enomem;
+ }
memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
pcpu4k_pages[j++] = virt_to_page(ptr);
@@ -333,6 +416,16 @@ out_free_ar:
return ret;
}
+/* for explicit first chunk allocator selection */
+static char pcpu_chosen_alloc[16] __initdata;
+
+static int __init percpu_alloc_setup(char *str)
+{
+ strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
+ return 0;
+}
+early_param("percpu_alloc", percpu_alloc_setup);
+
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
@@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu)
#endif
}
-/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
- */
void __init setup_per_cpu_areas(void)
{
size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void)
* of large page mappings. Please read comments on top of
* each allocator for details.
*/
- ret = setup_pcpu_remap(static_size);
- if (ret < 0)
- ret = setup_pcpu_embed(static_size);
+ ret = -EINVAL;
+ if (strlen(pcpu_chosen_alloc)) {
+ if (strcmp(pcpu_chosen_alloc, "4k")) {
+ if (!strcmp(pcpu_chosen_alloc, "lpage"))
+ ret = setup_pcpu_lpage(static_size, true);
+ else if (!strcmp(pcpu_chosen_alloc, "embed"))
+ ret = setup_pcpu_embed(static_size, true);
+ else
+ pr_warning("PERCPU: unknown allocator %s "
+ "specified\n", pcpu_chosen_alloc);
+ if (ret < 0)
+ pr_warning("PERCPU: %s allocator failed (%zd), "
+ "falling back to 4k\n",
+ pcpu_chosen_alloc, ret);
+ }
+ } else {
+ ret = setup_pcpu_lpage(static_size, false);
+ if (ret < 0)
+ ret = setup_pcpu_embed(static_size, false);
+ }
if (ret < 0)
ret = setup_pcpu_4k(static_size);
if (ret < 0)
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 124d40c575d..8ccabb8a2f6 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode)
unsigned long pa;
unsigned long m;
unsigned long n;
- unsigned long mmr_image;
struct bau_desc *adp;
struct bau_desc *ad2;
@@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode)
n = pa >> uv_nshift;
m = pa & uv_mmask;
- mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE);
- if (mmr_image) {
- uv_write_global_mmr64(pnode, (unsigned long)
- UVH_LB_BAU_SB_DESCRIPTOR_BASE,
- (n << UV_DESC_BASE_PNODE_SHIFT | m));
- }
+ uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
+ (n << UV_DESC_BASE_PNODE_SHIFT | m));
/*
* initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a0f48f5671c..5204332f475 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
+ if (panic_on_io_nmi)
+ panic("NMI IOCK error: Not continuing");
+
/* Re-enable the IOCK line, wait for a few seconds */
reason = (reason & 0xf) | 8;
outb(reason, 0x61);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5c3d6e81a7d..7030b5f911b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2157,7 +2157,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
else
/* 32 bits PSE 4MB page */
context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
- context->rsvd_bits_mask[1][0] = ~0ull;
+ context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
break;
case PT32E_ROOT_LEVEL:
context->rsvd_bits_mask[0][2] =
@@ -2170,7 +2170,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62) |
rsvd_bits(13, 20); /* large page */
- context->rsvd_bits_mask[1][0] = ~0ull;
+ context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
break;
case PT64_ROOT_LEVEL:
context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
@@ -2186,7 +2186,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51) |
rsvd_bits(13, 20); /* large page */
- context->rsvd_bits_mask[1][0] = ~0ull;
+ context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
break;
}
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 258e4591e1c..67785f63539 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -281,7 +281,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
{
unsigned access = gw->pt_access;
struct kvm_mmu_page *shadow_page;
- u64 spte, *sptep;
+ u64 spte, *sptep = NULL;
int direct;
gfn_t table_gfn;
int r;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e770bf349ec..356a0ce85c6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3012,6 +3012,12 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3198,6 +3204,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_HLT] = handle_halt,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_VMCALL] = handle_vmcall,
+ [EXIT_REASON_VMCLEAR] = handle_vmx_insn,
+ [EXIT_REASON_VMLAUNCH] = handle_vmx_insn,
+ [EXIT_REASON_VMPTRLD] = handle_vmx_insn,
+ [EXIT_REASON_VMPTRST] = handle_vmx_insn,
+ [EXIT_REASON_VMREAD] = handle_vmx_insn,
+ [EXIT_REASON_VMRESUME] = handle_vmx_insn,
+ [EXIT_REASON_VMWRITE] = handle_vmx_insn,
+ [EXIT_REASON_VMOFF] = handle_vmx_insn,
+ [EXIT_REASON_VMON] = handle_vmx_insn,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_WBINVD] = handle_wbinvd,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 249540f9851..fe5474aec41 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -898,6 +898,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_VM_HSAVE_PA:
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
+ case MSR_K7_EVNTSEL0:
data = 0;
break;
case MSR_MTRRcap:
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index c1b6c232e02..616de4628d6 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1361,7 +1361,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
return 0;
}
-void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
+static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
{
u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
/*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f4568605d7d..ff485d36118 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -55,8 +55,10 @@ static void delay_tsc(unsigned long loops)
preempt_disable();
cpu = smp_processor_id();
+ rdtsc_barrier();
rdtscl(bclock);
for (;;) {
+ rdtsc_barrier();
rdtscl(now);
if ((now - bclock) >= loops)
break;
@@ -78,6 +80,7 @@ static void delay_tsc(unsigned long loops)
if (unlikely(cpu != smp_processor_id())) {
loops -= (now - bclock);
cpu = smp_processor_id();
+ rdtsc_barrier();
rdtscl(bclock);
}
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f53b57e4086..47ce9a2ce5e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -177,20 +177,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
return nr_range;
}
-#ifdef CONFIG_X86_64
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
-#endif
-
/*
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
* This runs before bootmem is initialized and gets pages directly from
@@ -210,9 +196,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
- if (!after_bootmem)
- init_gbpages();
-
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3cfe9ced8a4..1b734d7a896 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/pfn.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -681,8 +682,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
static int cpa_process_alias(struct cpa_data *cpa)
{
struct cpa_data alias_cpa;
- int ret = 0;
- unsigned long temp_cpa_vaddr, vaddr;
+ unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+ unsigned long vaddr, remapped;
+ int ret;
if (cpa->pfn >= max_pfn_mapped)
return 0;
@@ -706,42 +708,55 @@ static int cpa_process_alias(struct cpa_data *cpa)
PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
alias_cpa = *cpa;
- temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
- alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.vaddr = &laddr;
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-
ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ if (ret)
+ return ret;
}
#ifdef CONFIG_X86_64
- if (ret)
- return ret;
/*
- * No need to redo, when the primary call touched the high
- * mapping already:
- */
- if (within(vaddr, (unsigned long) _text, _brk_end))
- return 0;
-
- /*
- * If the physical address is inside the kernel map, we need
+ * If the primary call didn't touch the high mapping already
+ * and the physical address is inside the kernel map, we need
* to touch the high mapped kernel as well:
*/
- if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
- return 0;
+ if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+ within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+ unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+ __START_KERNEL_map - phys_base;
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
- alias_cpa = *cpa;
- temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
- alias_cpa.vaddr = &temp_cpa_vaddr;
- alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ /*
+ * The high mapping range is imprecise, so ignore the
+ * return value.
+ */
+ __change_page_attr_set_clr(&alias_cpa, 0);
+ }
+#endif
/*
- * The high mapping range is imprecise, so ignore the return value.
+ * If the PMD page was partially used for per-cpu remapping,
+ * the recycled area needs to be split and modified. Because
+ * the area is always proper subset of a PMD page
+ * cpa->numpages is guaranteed to be 1 for these areas, so
+ * there's no need to loop over and check for further remaps.
*/
- __change_page_attr_set_clr(&alias_cpa, 0);
-#endif
- return ret;
+ remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
+ if (remapped) {
+ WARN_ON(cpa->numpages > 1);
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = &remapped;
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d277ef1eea5..b3d20b9cac6 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -244,7 +244,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
do_fpu_end();
mtrr_ap_init();
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_OLD_MCE
mcheck_init(&boot_cpu_data);
#endif
}