diff options
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/Kconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/Kconfig.debug | 50 | ||||
-rw-r--r-- | arch/powerpc/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/idle.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/kgdb.c | 410 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_32.c | 16 | ||||
-rw-r--r-- | arch/powerpc/kernel/suspend.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/sysfs.c | 15 | ||||
-rw-r--r-- | arch/powerpc/lib/code-patching.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 51 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 341 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/52xx/Kconfig | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/cbe_thermal.c | 45 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/file.c | 23 | ||||
-rw-r--r-- | arch/powerpc/platforms/chrp/pci.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/iseries/setup.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powermac/setup.c | 6 |
23 files changed, 822 insertions, 205 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4c22242b396..a487671c282 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -111,7 +111,9 @@ config PPC select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_IDE + select HAVE_IOREMAP_PROT select HAVE_KPROBES + select HAVE_ARCH_KGDB select HAVE_KRETPROBES select HAVE_LMB select HAVE_DMA_ATTRS if PPC64 @@ -842,6 +844,7 @@ source "crypto/Kconfig" config PPC_CLOCK bool default n + select HAVE_CLK config PPC_LIB_RHEAP bool diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 2840ab69ef4..8c8aadbe956 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -41,22 +41,6 @@ config HCALL_STATS This option will add a small amount of overhead to all hypervisor calls. -config DEBUGGER - bool "Enable debugger hooks" - depends on DEBUG_KERNEL - help - Include in-kernel hooks for kernel debuggers. Unless you are - intending to debug the kernel, say N here. - -config KGDB - bool "Include kgdb kernel debugger" - depends on DEBUGGER && (BROKEN || PPC_GEN550 || 4xx) - select DEBUG_INFO - help - Include in-kernel hooks for kgdb, the Linux kernel source level - debugger. See <http://kgdb.sourceforge.net/> for more information. - Unless you are intending to debug the kernel, say N here. - config CODE_PATCHING_SELFTEST bool "Run self-tests of the code-patching code." depends on DEBUG_KERNEL @@ -67,36 +51,9 @@ config FTR_FIXUP_SELFTEST depends on DEBUG_KERNEL default n -choice - prompt "Serial Port" - depends on KGDB - default KGDB_TTYS1 - -config KGDB_TTYS0 - bool "ttyS0" - -config KGDB_TTYS1 - bool "ttyS1" - -config KGDB_TTYS2 - bool "ttyS2" - -config KGDB_TTYS3 - bool "ttyS3" - -endchoice - -config KGDB_CONSOLE - bool "Enable serial console thru kgdb port" - depends on KGDB && 8xx || CPM2 - help - If you enable this, all serial console messages will be sent - over the gdb stub. - If unsure, say N. - config XMON bool "Include xmon kernel debugger" - depends on DEBUGGER + depends on DEBUG_KERNEL help Include in-kernel hooks for the xmon kernel monitor/debugger. Unless you are intending to debug the kernel, say N here. @@ -126,6 +83,11 @@ config XMON_DISASSEMBLY to say Y here, unless you're building for a memory-constrained system. +config DEBUGGER + bool + depends on KGDB || XMON + default y + config IRQSTACKS bool "Use separate kernel stacks when processing interrupts" help diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bf0b1fd0ec3..1a4094704b1 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -74,6 +74,7 @@ obj-y += time.o prom.o traps.o setup-common.o \ misc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma_64.o iommu.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index c3cf0e8f3ac..d308a9f70f1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -60,7 +60,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(); + tick_nohz_stop_sched_tick(1); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c new file mode 100644 index 00000000000..b4fdf2f2743 --- /dev/null +++ b/arch/powerpc/kernel/kgdb.c @@ -0,0 +1,410 @@ +/* + * PowerPC backend to the KGDB stub. + * + * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) + * Copyright (C) 2003 Timesys Corporation. + * Copyright (C) 2004-2006 MontaVista Software, Inc. + * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) + * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and + * Sergei Shtylyov <sshtylyov@ru.mvista.com> + * Copyright (C) 2007-2008 Wind River Systems, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/kgdb.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/ptrace.h> +#include <asm/current.h> +#include <asm/processor.h> +#include <asm/machdep.h> + +/* + * This table contains the mapping between PowerPC hardware trap types, and + * signals, which are primarily what GDB understands. GDB and the kernel + * don't always agree on values, so we use constants taken from gdb-6.2. + */ +static struct hard_trap_info +{ + unsigned int tt; /* Trap type code for powerpc */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ + { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ + { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ + { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ + { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ + { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ + { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ + { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ +#if defined(CONFIG_FSL_BOOKE) + { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ + { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ + { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ +#else /* ! CONFIG_FSL_BOOKE */ + { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */ + { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ + { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ +#endif +#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ + { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ +#if defined(CONFIG_8xx) + { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ +#else /* ! CONFIG_8xx */ + { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ + { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ + { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ +#if defined(CONFIG_PPC64) + { 0x1200, 0x05 /* SIGILL */ }, /* system error */ + { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */ + { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */ + { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1800, 0x04 /* SIGILL */ }, /* thermal */ +#else /* ! CONFIG_PPC64 */ + { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ + { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ + { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ +#endif +#endif +#endif + { 0x0000, 0x00 } /* Must be last */ +}; + +static int computeSignal(unsigned int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +static int kgdb_call_nmi_hook(struct pt_regs *regs) +{ + kgdb_nmicallback(raw_smp_processor_id(), regs); + return 0; +} + +#ifdef CONFIG_SMP +void kgdb_roundup_cpus(unsigned long flags) +{ + smp_send_debugger_break(MSG_ALL_BUT_SELF); +} +#endif + +/* KGDB functions to use existing PowerPC64 hooks. */ +static int kgdb_debugger(struct pt_regs *regs) +{ + return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); +} + +static int kgdb_handle_breakpoint(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0) + return 0; + + if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + regs->nip += 4; + + return 1; +} + +static int kgdb_singlestep(struct pt_regs *regs) +{ + struct thread_info *thread_info, *exception_thread_info; + + if (user_mode(regs)) + return 0; + + /* + * On Book E and perhaps other processsors, singlestep is handled on + * the critical exception stack. This causes current_thread_info() + * to fail, since it it locates the thread_info by masking off + * the low bits of the current stack pointer. We work around + * this issue by copying the thread_info from the kernel stack + * before calling kgdb_handle_exception, and copying it back + * afterwards. On most processors the copy is avoided since + * exception_thread_info == thread_info. + */ + thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); + exception_thread_info = current_thread_info(); + + if (thread_info != exception_thread_info) + memcpy(exception_thread_info, thread_info, sizeof *thread_info); + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (thread_info != exception_thread_info) + memcpy(thread_info, exception_thread_info, sizeof *thread_info); + + return 1; +} + +static int kgdb_iabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0) + return 0; + return 1; +} + +static int kgdb_dabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0) + return 0; + return 1; +} + +#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0) + +#define PACK32(ptr, src) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + *(ptr32++) = (src); \ + ptr = (unsigned long *)ptr32; \ + } while (0) + + +void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + for (reg = 0; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, current->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + + STACK_FRAME_OVERHEAD); + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + /* Regs GPR0-2 */ + for (reg = 0; reg < 3; reg++) + PACK64(ptr, regs->gpr[reg]); + + /* Regs GPR3-13 are caller saved, not in regs->gpr[] */ + ptr += 11; + + /* Regs GPR14-31 */ + for (reg = 14; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, p->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +#define UNPACK64(dest, ptr) do { dest = *(ptr++); } while (0) + +#define UNPACK32(dest, ptr) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + dest = *(ptr32++); \ + ptr = (unsigned long *)ptr32; \ + } while (0) + +void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; +#ifdef CONFIG_SPE + union { + u32 v32[2]; + u64 v64; + } acc; +#endif + + for (reg = 0; reg < 32; reg++) + UNPACK64(regs->gpr[reg], ptr); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + UNPACK64(current->thread.evr[reg], ptr); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(int); +#endif + + UNPACK64(regs->nip, ptr); + UNPACK64(regs->msr, ptr); + UNPACK32(regs->ccr, ptr); + UNPACK64(regs->link, ptr); + UNPACK64(regs->ctr, ptr); + UNPACK32(regs->xer, ptr); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +/* + * This function does PowerPC specific procesing for interfacing to gdb. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr; + + switch (remcom_in_buffer[0]) { + /* + * sAA..AA Step one instruction from AA..AA + * This will return an error to gdb .. + */ + case 's': + case 'c': + /* handle the optional parameter */ + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->nip = addr; + + atomic_set(&kgdb_cpu_doing_single_step, -1); + /* set the trace bit if we're stepping */ + if (remcom_in_buffer[0] == 's') { +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, + mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); + linux_regs->msr |= MSR_DE; +#else + linux_regs->msr |= MSR_SE; +#endif + kgdb_single_step = 1; + if (kgdb_contthread) + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } + return 0; + } + + return -1; +} + +/* + * Global data + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +}; + +static int kgdb_not_implemented(struct pt_regs *regs) +{ + return 0; +} + +static void *old__debugger_ipi; +static void *old__debugger; +static void *old__debugger_bpt; +static void *old__debugger_sstep; +static void *old__debugger_iabr_match; +static void *old__debugger_dabr_match; +static void *old__debugger_fault_handler; + +int kgdb_arch_init(void) +{ + old__debugger_ipi = __debugger_ipi; + old__debugger = __debugger; + old__debugger_bpt = __debugger_bpt; + old__debugger_sstep = __debugger_sstep; + old__debugger_iabr_match = __debugger_iabr_match; + old__debugger_dabr_match = __debugger_dabr_match; + old__debugger_fault_handler = __debugger_fault_handler; + + __debugger_ipi = kgdb_call_nmi_hook; + __debugger = kgdb_debugger; + __debugger_bpt = kgdb_handle_breakpoint; + __debugger_sstep = kgdb_singlestep; + __debugger_iabr_match = kgdb_iabr_match; + __debugger_dabr_match = kgdb_dabr_match; + __debugger_fault_handler = kgdb_not_implemented; + + return 0; +} + +void kgdb_arch_exit(void) +{ + __debugger_ipi = old__debugger_ipi; + __debugger = old__debugger; + __debugger_bpt = old__debugger_bpt; + __debugger_sstep = old__debugger_sstep; + __debugger_iabr_match = old__debugger_iabr_match; + __debugger_dabr_match = old__debugger_dabr_match; + __debugger_fault_handler = old__debugger_fault_handler; +} diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 4efebe88e64..066e65c59b5 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -43,10 +43,6 @@ #define DBG(fmt...) -#if defined CONFIG_KGDB -#include <asm/kgdb.h> -#endif - extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; @@ -302,18 +298,6 @@ void __init setup_arch(char **cmdline_p) xmon_setup(); -#if defined(CONFIG_KGDB) - if (ppc_md.kgdb_map_scc) - ppc_md.kgdb_map_scc(); - set_debug_traps(); - if (strstr(cmd_line, "gdb")) { - if (ppc_md.progress) - ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); - printk("kgdb breakpoint activated\n"); - breakpoint(); - } -#endif - /* * Set cache line size based on type of cpu as a default. * Systems with OF can look in the properties on the cpu node(s) diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index 8cee5710754..6fc6328dc62 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -7,6 +7,7 @@ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> */ +#include <linux/mm.h> #include <asm/page.h> /* References to section boundaries */ diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c8127f832df..aba0ba95f06 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -28,7 +28,9 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); /* Time in microseconds we delay before sleeping in the idle loop */ DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; -static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, +static ssize_t store_smt_snooze_delay(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -44,7 +46,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, return count; } -static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf) +static ssize_t show_smt_snooze_delay(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -152,14 +156,17 @@ static unsigned long write_##NAME(unsigned long val) \ mtspr(ADDRESS, val); \ return 0; \ } \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ - store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ + store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \ + const char *buf, size_t count) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val; \ diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0559fe086eb..7c975d43e3f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/vmalloc.h> #include <linux/init.h> +#include <linux/mm.h> #include <asm/page.h> #include <asm/code-patching.h> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8d3b58ebd38..5ce5a4dcd00 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -68,6 +68,7 @@ #define KB (1024) #define MB (1024*KB) +#define GB (1024L*MB) /* * Note: pte --> Linux PTE @@ -102,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; #ifdef CONFIG_HUGETLB_PAGE -int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; #endif #ifdef CONFIG_PPC_64K_PAGES @@ -329,6 +329,44 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; } +/* Scan for 16G memory blocks that have been set aside for huge pages + * and reserve those blocks for 16G huge pages. + */ +static int __init htab_dt_scan_hugepage_blocks(unsigned long node, + const char *uname, int depth, + void *data) { + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + unsigned long *addr_prop; + u32 *page_count_prop; + unsigned int expected_pages; + long unsigned int phys_addr; + long unsigned int block_size; + + /* We are scanning "memory" nodes only */ + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + /* This property is the log base 2 of the number of virtual pages that + * will represent this memory block. */ + page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); + if (page_count_prop == NULL) + return 0; + expected_pages = (1 << page_count_prop[0]); + addr_prop = of_get_flat_dt_prop(node, "reg", NULL); + if (addr_prop == NULL) + return 0; + phys_addr = addr_prop[0]; + block_size = addr_prop[1]; + if (block_size != (16 * GB)) + return 0; + printk(KERN_INFO "Huge page(16GB) memory: " + "addr = 0x%lX size = 0x%lX pages = %d\n", + phys_addr, block_size, expected_pages); + lmb_reserve(phys_addr, block_size * expected_pages); + add_gpage(phys_addr, block_size, expected_pages); + return 0; +} + static void __init htab_init_page_sizes(void) { int rc; @@ -418,15 +456,18 @@ static void __init htab_init_page_sizes(void) ); #ifdef CONFIG_HUGETLB_PAGE - /* Init large page size. Currently, we pick 16M or 1M depending + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); + +/* Set default large page size. Currently, we pick 16M or 1M depending * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) - set_huge_psize(MMU_PAGE_16M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; /* With 4k/4level pagetables, we can't (for now) cope with a * huge page size < PMD_SIZE */ else if (mmu_psize_defs[MMU_PAGE_1M].shift) - set_huge_psize(MMU_PAGE_1M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; #endif /* CONFIG_HUGETLB_PAGE */ } @@ -847,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (HPAGE_SHIFT && psize == mmu_huge_psize) { + if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0d12fba31bc..fb42c4dd321 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,21 +24,43 @@ #include <asm/cputable.h> #include <asm/spu.h> -#define HPAGE_SHIFT_64K 16 -#define HPAGE_SHIFT_16M 24 +#define PAGE_SHIFT_64K 16 +#define PAGE_SHIFT_16M 24 +#define PAGE_SHIFT_16G 34 #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) +#define MAX_NUMBER_GPAGES 1024 -unsigned int hugepte_shift; -#define PTRS_PER_HUGEPTE (1 << hugepte_shift) -#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) +/* Tracks the 16G pages after the device tree is scanned and before the + * huge_boot_pages list is ready. */ +static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; +static unsigned nr_gpages; -#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) -#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) -#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) +/* Array of valid huge page sizes - non-zero value(hugepte_shift) is + * stored for the huge page sizes that are valid. + */ +unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ + +#define hugepte_shift mmu_huge_psizes +#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) +#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) + +#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ + + hugepte_shift[psize]) +#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) +#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) + +/* Subtract one from array size because we don't need a cache for 4K since + * is not a huge page size */ +#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ + + psize-1]) +#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) -#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) +static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { + "unused_4K", "hugepte_cache_64K", "unused_64K_AP", + "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" +}; /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() * will choke on pointers to hugepte tables, which is handy for @@ -49,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t; #define hugepd_none(hpd) ((hpd).pd == 0) +static inline int shift_to_mmu_psize(unsigned int shift) +{ + switch (shift) { +#ifndef CONFIG_PPC_64K_PAGES + case PAGE_SHIFT_64K: + return MMU_PAGE_64K; +#endif + case PAGE_SHIFT_16M: + return MMU_PAGE_16M; + case PAGE_SHIFT_16G: + return MMU_PAGE_16G; + } + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!(hpd.pd & HUGEPD_OK)); return (pte_t *)(hpd.pd & ~HUGEPD_OK); } -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + struct hstate *hstate) { - unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); + unsigned int shift = huge_page_shift(hstate); + int psize = shift_to_mmu_psize(shift); + unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); pte_t *dir = hugepd_page(*hpdp); return dir + idx; } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, - unsigned long address) + unsigned long address, unsigned int psize) { - pte_t *new = kmem_cache_alloc(huge_pgtable_cache, + pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize), GFP_KERNEL|__GFP_REPEAT); if (! new) @@ -74,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, spin_lock(&mm->page_table_lock); if (!hugepd_none(*hpdp)) - kmem_cache_free(huge_pgtable_cache, new); + kmem_cache_free(huge_pgtable_cache(psize), new); else hpdp->pd = (unsigned long)new | HUGEPD_OK; spin_unlock(&mm->page_table_lock); @@ -83,27 +130,60 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, /* Base page size affects how we walk hugetlb page tables */ #ifdef CONFIG_PPC_64K_PAGES -#define hpmd_offset(pud, addr) pmd_offset(pud, addr) -#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) +#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr) +#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr) #else static inline -pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) +pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) { - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_offset(pud, addr); else return (pmd_t *) pud; } static inline -pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) +pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, + struct hstate *hstate) { - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_alloc(mm, pud, addr); else return (pmd_t *) pud; } #endif +/* Build list of addresses of gigantic pages. This function is used in early + * boot before the buddy or bootmem allocator is setup. + */ +void add_gpage(unsigned long addr, unsigned long page_size, + unsigned long number_of_pages) +{ + if (!addr) + return; + while (number_of_pages > 0) { + gpage_freearray[nr_gpages] = addr; + nr_gpages++; + number_of_pages--; + addr += page_size; + } +} + +/* Moves the gigantic page addresses from the temporary list to the + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) +{ + struct huge_bootmem_page *m; + if (nr_gpages == 0) + return 0; + m = phys_to_virt(gpage_freearray[--nr_gpages]); + gpage_freearray[nr_gpages] = 0; + list_add(&m->list, &huge_boot_pages); + m->hstate = hstate; + return 1; +} + + /* Modelled after find_linux_pte() */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { @@ -111,39 +191,52 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pud_t *pu; pmd_t *pm; - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + unsigned int psize; + unsigned int shift; + unsigned long sz; + struct hstate *hstate; + psize = get_slice_psize(mm, addr); + shift = mmu_psize_to_shift(psize); + sz = ((1UL) << shift); + hstate = size_to_hstate(sz); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { - pm = hpmd_offset(pu, addr); + pm = hpmd_offset(pu, addr, hstate); if (!pmd_none(*pm)) - return hugepte_offset((hugepd_t *)pm, addr); + return hugepte_offset((hugepd_t *)pm, addr, + hstate); } } return NULL; } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pg; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; + struct hstate *hstate; + unsigned int psize; + hstate = size_to_hstate(sz); - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + psize = get_slice_psize(mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); pu = pud_alloc(mm, pg, addr); if (pu) { - pm = hpmd_alloc(mm, pu, addr); + pm = hpmd_alloc(mm, pu, addr, hstate); if (pm) hpdp = (hugepd_t *)pm; } @@ -151,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) if (! hpdp) return NULL; - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) + if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) return NULL; - return hugepte_offset(hpdp, addr); + return hugepte_offset(hpdp, addr, hstate); } int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) @@ -162,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) +static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, + unsigned int psize) { pte_t *hugepte = hugepd_page(*hpdp); hpdp->pd = 0; tlb->need_flush = 1; - pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, + pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, + HUGEPTE_CACHE_NUM+psize-1, PGF_CACHENUM_MASK)); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) + unsigned long floor, unsigned long ceiling, + unsigned int psize) { pmd_t *pmd; unsigned long next; @@ -186,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) continue; - free_hugepte_range(tlb, (hugepd_t *)pmd); + free_hugepte_range(tlb, (hugepd_t *)pmd, psize); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; @@ -212,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, pud_t *pud; unsigned long next; unsigned long start; + unsigned int shift; + unsigned int psize = get_slice_psize(tlb->mm, addr); + shift = mmu_psize_to_shift(psize); start = addr; pud = pud_offset(pgd, addr); @@ -220,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, #ifdef CONFIG_PPC_64K_PAGES if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling, + psize); #else - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { + if (shift == PAGE_SHIFT_64K) { if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling, psize); } else { if (pud_none(*pud)) continue; - free_hugepte_range(tlb, (hugepd_t *)pud); + free_hugepte_range(tlb, (hugepd_t *)pud, psize); } #endif } while (pud++, addr = next, addr != end); @@ -255,7 +356,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, * * Must be called with pagetable lock held. */ -void hugetlb_free_pgd_range(struct mmu_gather **tlb, +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -297,31 +398,33 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, * now has no other vmas using it, so can be freed, we don't * bother to round floor or end up - the tests don't need that. */ + unsigned int psize = get_slice_psize(tlb->mm, addr); - addr &= HUGEPD_MASK; + addr &= HUGEPD_MASK(psize); if (addr < floor) { - addr += HUGEPD_SIZE; + addr += HUGEPD_SIZE(psize); if (!addr) return; } if (ceiling) { - ceiling &= HUGEPD_MASK; + ceiling &= HUGEPD_MASK(psize); if (!ceiling) return; } if (end - 1 > ceiling - 1) - end -= HUGEPD_SIZE; + end -= HUGEPD_SIZE(psize); if (addr > end - 1) return; start = addr; - pgd = pgd_offset((*tlb)->mm, addr); + pgd = pgd_offset(tlb->mm, addr); do { - BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); + psize = get_slice_psize(tlb->mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } @@ -334,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, * necessary anymore if we make hpte_need_flush() get the * page size from the slices */ - pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); + unsigned int psize = get_slice_psize(mm, addr); + unsigned int shift = mmu_psize_to_shift(psize); + unsigned long sz = ((1UL) << shift); + struct hstate *hstate = size_to_hstate(sz); + pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); } *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } @@ -351,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; + unsigned int mmu_psize = get_slice_psize(mm, address); - if (get_slice_psize(mm, address) != mmu_huge_psize) + /* Verify it is a huge page else bail. */ + if (!mmu_huge_psizes[mmu_psize]) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); page = pte_page(*ptep); - if (page) - page += (address % HPAGE_SIZE) / PAGE_SIZE; + if (page) { + unsigned int shift = mmu_psize_to_shift(mmu_psize); + unsigned long sz = ((1UL) << shift); + page += (address % sz) / PAGE_SIZE; + } return page; } @@ -368,6 +480,11 @@ int pmd_huge(pmd_t pmd) return 0; } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) @@ -381,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - return slice_get_unmapped_area(addr, len, flags, - mmu_huge_psize, 1, 0); + struct hstate *hstate = hstate_file(file); + int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); } /* * Called by asm hashtable.S for doing lazy icache flush */ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, - pte_t pte, int trap) + pte_t pte, int trap, unsigned long sz) { struct page *page; int i; @@ -402,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, /* page is dirty */ if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { - for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + for (i = 0; i < (sz / PAGE_SIZE); i++) __flush_dcache_icache(page_address(page+i)); set_bit(PG_arch_1, &page->flags); } else { @@ -418,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, { pte_t *ptep; unsigned long old_pte, new_pte; - unsigned long va, rflags, pa; + unsigned long va, rflags, pa, sz; long slot; int err = 1; int ssize = user_segment_size(ea); + unsigned int mmu_psize; + int shift; + mmu_psize = get_slice_psize(mm, ea); + if (!mmu_huge_psizes[mmu_psize]) + goto out; ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ @@ -465,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + shift = mmu_psize_to_shift(mmu_psize); + sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) /* No CPU has hugepages but lacks no execute, so we * don't need to worry about that case */ rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), - trap); + trap, sz); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(va, HPAGE_SHIFT, ssize); + hash = hpt_hash(va, shift, ssize); if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); + unsigned long hash = hpt_hash(va, shift, ssize); unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; @@ -509,7 +634,7 @@ repeat: /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, - mmu_huge_psize, ssize); + mmu_psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { @@ -517,7 +642,7 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - mmu_huge_psize, ssize); + mmu_psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * @@ -549,45 +674,54 @@ void set_huge_psize(int psize) { /* Check that it is a page size supported by the hardware and * that it fits within pagetable limits. */ - if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && + if (mmu_psize_defs[psize].shift && + mmu_psize_defs[psize].shift < SID_SHIFT_1T && (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || - mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { - HPAGE_SHIFT = mmu_psize_defs[psize].shift; - mmu_huge_psize = psize; -#ifdef CONFIG_PPC_64K_PAGES - hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); -#else - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) - hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); - else - hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); -#endif - + mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || + mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { + /* Return if huge page size has already been setup or is the + * same as the base page size. */ + if (mmu_huge_psizes[psize] || + mmu_psize_defs[psize].shift == PAGE_SHIFT) + return; + hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); + + switch (mmu_psize_defs[psize].shift) { + case PAGE_SHIFT_64K: + /* We only allow 64k hpages with 4k base page, + * which was checked above, and always put them + * at the PMD */ + hugepte_shift[psize] = PMD_SHIFT; + break; + case PAGE_SHIFT_16M: + /* 16M pages can be at two different levels + * of pagestables based on base page size */ + if (PAGE_SHIFT == PAGE_SHIFT_64K) + hugepte_shift[psize] = PMD_SHIFT; + else /* 4k base page */ + hugepte_shift[psize] = PUD_SHIFT; + break; + case PAGE_SHIFT_16G: + /* 16G pages are always at PGD level */ + hugepte_shift[psize] = PGDIR_SHIFT; + break; + } + hugepte_shift[psize] -= mmu_psize_defs[psize].shift; } else - HPAGE_SHIFT = 0; + hugepte_shift[psize] = 0; } static int __init hugepage_setup_sz(char *str) { unsigned long long size; - int mmu_psize = -1; + int mmu_psize; int shift; size = memparse(str, &str); shift = __ffs(size); - switch (shift) { -#ifndef CONFIG_PPC_64K_PAGES - case HPAGE_SHIFT_64K: - mmu_psize = MMU_PAGE_64K; - break; -#endif - case HPAGE_SHIFT_16M: - mmu_psize = MMU_PAGE_16M; - break; - } - - if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift) + mmu_psize = shift_to_mmu_psize(shift); + if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) set_huge_psize(mmu_psize); else printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); @@ -603,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr) static int __init hugetlbpage_init(void) { + unsigned int psize; + if (!cpu_has_feature(CPU_FTR_16M_PAGE)) return -ENODEV; - - huge_pgtable_cache = kmem_cache_create("hugepte_cache", - HUGEPTE_TABLE_SIZE, - HUGEPTE_TABLE_SIZE, - 0, - zero_ctor); - if (! huge_pgtable_cache) - panic("hugetlbpage_init(): could not create hugepte cache\n"); + /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE + * and adjust PTE_NONCACHE_NUM if the number of supported huge page + * sizes changes. + */ + set_huge_psize(MMU_PAGE_16M); + set_huge_psize(MMU_PAGE_64K); + set_huge_psize(MMU_PAGE_16G); + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + if (mmu_huge_psizes[psize]) { + huge_pgtable_cache(psize) = kmem_cache_create( + HUGEPTE_CACHE_NAME(psize), + HUGEPTE_TABLE_SIZE(psize), + HUGEPTE_TABLE_SIZE(psize), + 0, + zero_ctor); + if (!huge_pgtable_cache(psize)) + panic("hugetlbpage_init(): could not create %s"\ + "\n", HUGEPTE_CACHE_NAME(psize)); + } + } return 0; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6ef63caca68..a41bc5aa204 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { }; #ifdef CONFIG_HUGETLB_PAGE -/* Hugepages need one extra cache, initialized in hugetlbpage.c. We - * can't put into the tables above, because HPAGE_SHIFT is not compile - * time constant. */ -struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; +/* Hugepages need an extra cache per hugepagesize, initialized in + * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT + * is not compile time constant. */ +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; #else struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index cf4bffba6f7..d9a18135133 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(numa_cpumask_lookup_table); EXPORT_SYMBOL(node_data); -static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; @@ -816,7 +815,7 @@ void __init do_init_bootmem(void) dbg("node %d\n", nid); dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index c7584072dfc..2001abdb191 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -145,13 +145,20 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage) void __iomem * ioremap(phys_addr_t addr, unsigned long size) { - return __ioremap(addr, size, _PAGE_NO_CACHE); + return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); } EXPORT_SYMBOL(ioremap); void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) { + /* writeable implies dirty for kernel addresses */ + if (flags & _PAGE_RW) + flags |= _PAGE_DIRTY | _PAGE_HWWRITE; + + /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ + flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); + return __ioremap(addr, size, flags); } EXPORT_SYMBOL(ioremap_flags); @@ -163,6 +170,14 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) phys_addr_t p; int err; + /* Make sure we have the base flags */ + if ((flags & _PAGE_PRESENT) == 0) + flags |= _PAGE_KERNEL; + + /* Non-cacheable page cannot be coherent */ + if (flags & _PAGE_NO_CACHE) + flags &= ~_PAGE_COHERENT; + /* * Choose an address to map it to. * Once the vmalloc system is running, we use it. @@ -219,11 +234,6 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) v = (ioremap_bot -= size); } - if ((flags & _PAGE_PRESENT) == 0) - flags |= _PAGE_KERNEL; - if (flags & _PAGE_NO_CACHE) - flags |= _PAGE_GUARDED; - /* * Should check if it is a candidate for a BAT mapping */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3ef0ad2f9ca..365e61ae5db 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -107,9 +107,18 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, { unsigned long i; + /* Make sure we have the base flags */ if ((flags & _PAGE_PRESENT) == 0) flags |= pgprot_val(PAGE_KERNEL); + /* Non-cacheable page cannot be coherent */ + if (flags & _PAGE_NO_CACHE) + flags &= ~_PAGE_COHERENT; + + /* We don't support the 4K PFN hack with ioremap */ + if (flags & _PAGE_4K_PFN) + return NULL; + WARN_ON(pa & ~PAGE_MASK); WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); @@ -190,6 +199,13 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size) void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) { + /* writeable implies dirty for kernel addresses */ + if (flags & _PAGE_RW) + flags |= _PAGE_DIRTY; + + /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ + flags &= ~(_PAGE_USER | _PAGE_EXEC); + if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags); return __ioremap(addr, size, flags); diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index a01b5c608ff..409fcc7b63c 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE - psize = mmu_huge_psize; + psize = get_slice_psize(mm, addr);; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index d664b1bce38..9a51675beb8 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -1,7 +1,6 @@ config PPC_MPC52xx bool "52xx-based boards" depends on PPC_MULTIPLATFORM && PPC32 - select FSL_SOC select PPC_CLOCK select PPC_PCI_CHOICE @@ -48,6 +47,7 @@ config PPC_MPC5200_BUGFIX config PPC_MPC5200_GPIO bool "MPC5200 GPIO support" depends on PPC_MPC52xx + select GENERIC_GPIO select HAVE_GPIO_LIB help Enable gpiolib support for mpc5200 based boards diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 4852bf312d8..4d4c8c16912 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -97,7 +97,8 @@ static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iom return value.spe[spu->spe_id]; } -static ssize_t spu_show_temp(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_temp(struct sys_device *sysdev, struct sysdev_attribute *attr, + char *buf) { u8 value; struct cbe_pmd_regs __iomem *pmd_regs; @@ -146,32 +147,38 @@ static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char return size; } -static ssize_t spu_show_throttle_end(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(get_pmd_regs(sysdev), buf, 0); } -static ssize_t spu_show_throttle_begin(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(get_pmd_regs(sysdev), buf, 8); } -static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(get_pmd_regs(sysdev), buf, 16); } -static ssize_t spu_store_throttle_end(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t spu_store_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(get_pmd_regs(sysdev), buf, size, 0); } -static ssize_t spu_store_throttle_begin(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t spu_store_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(get_pmd_regs(sysdev), buf, size, 8); } -static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(get_pmd_regs(sysdev), buf, size, 16); } @@ -192,43 +199,51 @@ static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos) /* shows the temperature of the DTS on the PPE, * located near the linear thermal sensor */ -static ssize_t ppe_show_temp0(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_temp0(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return ppe_show_temp(sysdev, buf, 32); } /* shows the temperature of the second DTS on the PPE */ -static ssize_t ppe_show_temp1(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_temp1(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return ppe_show_temp(sysdev, buf, 0); } -static ssize_t ppe_show_throttle_end(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 32); } -static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 40); } -static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 48); } -static ssize_t ppe_store_throttle_end(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t ppe_store_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 32); } -static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 40); } -static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 48); } diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 78f905bc6a4..a5bdb89a17c 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -703,7 +703,8 @@ static unsigned long long spu_acct_time(struct spu *spu, } -static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) +static ssize_t spu_stat_show(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { struct spu *spu = container_of(sysdev, struct spu, sysdev); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 99c73066b82..010a51f5979 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -288,9 +288,32 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } +static int spufs_mem_mmap_access(struct vm_area_struct *vma, + unsigned long address, + void *buf, int len, int write) +{ + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long offset = address - vma->vm_start; + char *local_store; + + if (write && !(vma->vm_flags & VM_WRITE)) + return -EACCES; + if (spu_acquire(ctx)) + return -EINTR; + if ((offset + len) > vma->vm_end) + len = vma->vm_end - offset; + local_store = ctx->ops->get_ls(ctx); + if (write) + memcpy_toio(local_store + offset, buf, len); + else + memcpy_fromio(buf, local_store + offset, len); + spu_release(ctx); + return len; +} static struct vm_operations_struct spufs_mem_mmap_vmops = { .fault = spufs_mem_mmap_fault, + .access = spufs_mem_mmap_access, }; static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 609c46db4a1..768c262b936 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -367,7 +367,7 @@ static void chrp_pci_fixup_vt8231_ata(struct pci_dev *viaide) viaisa = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL); if (!viaisa) return; - printk("Fixing VIA IDE, force legacy mode on '%s'\n", viaide->dev.bus_id); + dev_info(&viaide->dev, "Fixing VIA IDE, force legacy mode on\n"); pci_read_config_byte(viaide, PCI_CLASS_PROG, &progif); pci_write_config_byte(viaide, PCI_CLASS_PROG, progif & ~0x5); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index b72120751bb..70b688c1aef 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -561,7 +561,7 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - tick_nohz_stop_sched_tick(); + tick_nohz_stop_sched_tick(1); while (!need_resched() && !hvlpevent_is_pending()) { local_irq_disable(); ppc64_runlatch_off(); @@ -591,7 +591,7 @@ static void iseries_dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(); + tick_nohz_stop_sched_tick(1); if (!need_resched()) { while (!need_resched()) { ppc64_runlatch_off(); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 00bd0166d07..31635446901 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -97,8 +97,6 @@ extern struct machdep_calls pmac_md; int sccdbg; #endif -extern void zs_kgdb_hook(int tty_num); - sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; EXPORT_SYMBOL(sys_ctrler); @@ -329,10 +327,6 @@ static void __init pmac_setup_arch(void) l2cr_init(); #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_KGDB - zs_kgdb_hook(0); -#endif - find_via_cuda(); find_via_pmu(); smu_init(); |