diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-09-28 08:29:59 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-09-28 08:29:59 -0400 |
commit | 185a257f2f73bcd89050ad02da5bedbc28fc43fa (patch) | |
tree | 5e32586114534ed3f2165614cba3d578f5d87307 /arch/ia64 | |
parent | 3f1a9aaeffd8d1cbc5ab9776c45cbd66af1c9699 (diff) | |
parent | a77c64c1a641950626181b4857abb701d8f38ccc (diff) |
Merge branch 'master' into gfs2
Diffstat (limited to 'arch/ia64')
27 files changed, 833 insertions, 286 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index db274da7dba..0b7f701d5cf 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR -config DMA_IS_DMA32 - bool - default y - -config DMA_IS_NORMAL - bool - depends on IA64_SGI_SN2 - default y - config AUDIT_ARCH bool default y @@ -365,6 +356,9 @@ config NODES_SHIFT MAX_NUMNODES will be 2^(This value). If in doubt, use the default. +config ARCH_POPULATES_NODE_MAP + def_bool y + # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. # VIRTUAL_MEM_MAP has been retained for historical reasons. config VIRTUAL_MEM_MAP @@ -429,6 +423,14 @@ config IA64_PALINFO config SGI_SN def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) +config IA64_ESI + bool "ESI (Extensible SAL Interface) support" + help + If you say Y here, support is built into the kernel to + make ESI calls. ESI calls are used to support vendor-specific + firmware extensions, such as the ability to inject memory-errors + for test-purposes. If you're unsure, say N. + source "drivers/sn/Kconfig" source "drivers/firmware/Kconfig" diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 6aa3c51619c..bddbd22706e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1942,7 +1942,7 @@ struct sysctl32 { unsigned int __unused[4]; }; -#ifdef CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL_SYSCALL asmlinkage long sys32_sysctl (struct sysctl32 __user *args) { diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index ad8215a3c58..31497496eb4 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -32,6 +32,11 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_ESI) += esi.o +ifneq ($(CONFIG_IA64_ESI),) +obj-y += esi_stub.o # must be in kernel proper +endif + # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 0176556aeec..32c3abededc 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) { #ifdef CONFIG_ACPI_NUMA int pxm_id; + int nid; pxm_id = acpi_get_pxm(handle); - /* - * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag + * We don't have cpu-only-node hotadd. But if the system equips + * SRAT table, pxm is already found and node is ready. + * So, just pxm_to_nid(pxm) is OK. + * This code here is for the system which doesn't have full SRAT + * table for possible cpus. */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); - + nid = acpi_map_pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; + node_cpuid[cpu].nid = nid; #endif return (0); } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index fef06571be9..12701cf32d9 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1605,8 +1605,8 @@ sys_call_table: data8 sys_ni_syscall // 1295 reserved for ppoll data8 sys_unshare data8 sys_splice - data8 sys_ni_syscall // reserved for set_robust_list - data8 sys_ni_syscall // reserved for get_robust_list + data8 sys_set_robust_list + data8 sys_get_robust_list data8 sys_sync_file_range // 1300 data8 sys_tee data8 sys_vmsplice diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c new file mode 100644 index 00000000000..ebf4e988e78 --- /dev/null +++ b/arch/ia64/kernel/esi.c @@ -0,0 +1,205 @@ +/* + * Extensible SAL Interface (ESI) support routines. + * + * Copyright (C) 2006 Hewlett-Packard Co + * Alex Williamson <alex.williamson@hp.com> + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <asm/esi.h> +#include <asm/sal.h> + +MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>"); +MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "esi" + +#define ESI_TABLE_GUID \ + EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \ + 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4) + +enum esi_systab_entry_type { + ESI_DESC_ENTRY_POINT = 0 +}; + +/* + * Entry type: Size: + * 0 48 + */ +#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)] + +typedef struct ia64_esi_desc_entry_point { + u8 type; + u8 reserved1[15]; + u64 esi_proc; + u64 gp; + efi_guid_t guid; +} ia64_esi_desc_entry_point_t; + +struct pdesc { + void *addr; + void *gp; +}; + +static struct ia64_sal_systab *esi_systab; + +static int __init esi_init (void) +{ + efi_config_table_t *config_tables; + struct ia64_sal_systab *systab; + unsigned long esi = 0; + char *p; + int i; + + config_tables = __va(efi.systab->tables); + + for (i = 0; i < (int) efi.systab->nr_tables; ++i) { + if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) { + esi = config_tables[i].table; + break; + } + } + + if (!esi) + return -ENODEV;; + + systab = __va(esi); + + if (strncmp(systab->signature, "ESIT", 4) != 0) { + printk(KERN_ERR "bad signature in ESI system table!"); + return -ENODEV; + } + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case ESI_DESC_ENTRY_POINT: + break; + default: + printk(KERN_WARNING "Unkown table type %d found in " + "ESI table, ignoring rest of table\n", *p); + return -ENODEV; + } + + p += ESI_DESC_SIZE(*p); + } + + esi_systab = systab; + return 0; +} + + +int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp, + enum esi_proc_type proc_type, u64 func, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags = 0; + int i; + char *p; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = __va(esi->esi_proc); + pdesc.gp = __va(esi->gp); + + esi_proc = (ia64_sal_handler) &pdesc; + + ia64_save_scratch_fpregs(fr); + if (proc_type == ESI_PROC_SERIALIZED) + spin_lock_irqsave(&sal_lock, flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_save(flags); + else + preempt_disable(); + *isrvp = (*esi_proc)(func, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + if (proc_type == ESI_PROC_SERIALIZED) + spin_unlock_irqrestore(&sal_lock, + flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_restore(flags); + else + preempt_enable(); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call); + +int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp, + u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4, + u64 arg5, u64 arg6, u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags; + u64 esi_params[8]; + char *p; + int i; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = (void *)esi->esi_proc; + pdesc.gp = (void *)esi->gp; + + esi_proc = (ia64_sal_handler) &pdesc; + + esi_params[0] = func; + esi_params[1] = arg1; + esi_params[2] = arg2; + esi_params[3] = arg3; + esi_params[4] = arg4; + esi_params[5] = arg5; + esi_params[6] = arg6; + esi_params[7] = arg7; + ia64_save_scratch_fpregs(fr); + spin_lock_irqsave(&sal_lock, flags); + *isrvp = esi_call_phys(esi_proc, esi_params); + spin_unlock_irqrestore(&sal_lock, flags); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call_phys); + +static void __exit esi_exit (void) +{ +} + +module_init(esi_init); +module_exit(esi_exit); /* makes module removable... */ diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S new file mode 100644 index 00000000000..6b3d6c1f99b --- /dev/null +++ b/arch/ia64/kernel/esi_stub.S @@ -0,0 +1,96 @@ +/* + * ESI call stub. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Alex Williamson <alex.williamson@hp.com> + * + * Based on EFI call stub by David Mosberger. The stub is virtually + * identical to the one for EFI phys-mode calls, except that ESI + * calls may have up to 8 arguments, so they get passed to this routine + * through memory. + * + * This stub allows us to make ESI calls in physical mode with interrupts + * turned off. ESI calls may not support calling from virtual mode. + * + * Google for "Extensible SAL specification" for a document describing the + * ESI standard. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include <asm/processor.h> +#include <asm/asmmacro.h> + +/* + * Inputs: + * in0 = address of function descriptor of ESI routine to call + * in1 = address of array of ESI parameters + * + * Outputs: + * r8 = result returned by called function + */ +GLOBAL_ENTRY(esi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,7,8,0 + ld8 r2=[in0],8 // load ESI function's entry point + mov loc0=rp + .body + ;; + ld8 out0=[in1],8 // ESI params loaded from array + ;; // passing all as inputs doesn't work + ld8 out1=[in1],8 + ;; + ld8 out2=[in1],8 + ;; + ld8 out3=[in1],8 + ;; + ld8 out4=[in1],8 + ;; + ld8 out5=[in1],8 + ;; + ld8 out6=[in1],8 + ;; + ld8 out7=[in1] + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load ESI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov loc5=r19 // old ar.bsp + mov loc6=r20 // old sp + br.call.sptk.many rp=b6 // call the ESI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // save virtual mode psr + mov r19=loc5 // save virtual mode bspstore + mov r20=loc6 // save virtual mode sp + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(esi_call_phys) diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 3ead20fb6f4..879c1817bd1 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -105,5 +105,9 @@ EXPORT_SYMBOL(ia64_spinlock_contention); # endif #endif +#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE) +extern void esi_call_phys (void); +EXPORT_SYMBOL_GPL(esi_call_phys); +#endif extern char ia64_ivt[]; EXPORT_SYMBOL(ia64_ivt); diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 781960f80b6..169ec3a7156 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -136,10 +136,8 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, static int __kprobes unsupported_inst(uint template, uint slot, uint major_opcode, unsigned long kprobe_inst, - struct kprobe *p) + unsigned long addr) { - unsigned long addr = (unsigned long)p->addr; - if (bundle_encoding[template][slot] == I) { switch (major_opcode) { case 0x0: //I_UNIT_MISC_OPCODE: @@ -217,7 +215,7 @@ static void __kprobes prepare_break_inst(uint template, uint slot, struct kprobe *p) { unsigned long break_inst = BREAK_INST; - bundle_t *bundle = &p->ainsn.insn.bundle; + bundle_t *bundle = &p->opcode.bundle; /* * Copy the original kprobe_inst qualifying predicate(qp) @@ -423,11 +421,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); unsigned long kprobe_inst=0; unsigned int slot = addr & 0xf, template, major_opcode = 0; - bundle_t *bundle = &p->ainsn.insn.bundle; - - memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); - memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + bundle_t *bundle; + bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; template = bundle->quad0.template; if(valid_kprobe_addr(template, slot, addr)) @@ -440,20 +436,19 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) /* Get kprobe_inst and major_opcode from the bundle */ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr)) return -EINVAL; - prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); - return 0; -} + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); + memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); -void __kprobes flush_insn_slot(struct kprobe *p) -{ - unsigned long arm_addr; + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); - arm_addr = ((unsigned long)&p->opcode.bundle) & ~0xFULL; - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -461,9 +456,10 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - flush_insn_slot(p); - memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + flush_icache_range((unsigned long)p->ainsn.insn, + (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_disarm_kprobe(struct kprobe *p) @@ -471,11 +467,18 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - /* p->opcode contains the original unaltered bundle */ - memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ + memcpy((char *) arm_addr, (char *) p->ainsn.insn, + sizeof(kprobe_opcode_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + mutex_lock(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + mutex_unlock(&kprobe_mutex); +} /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction @@ -486,12 +489,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) */ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { - unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; + unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle); unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; unsigned long template; int slot = ((unsigned long)p->addr & 0xf); - template = p->opcode.bundle.quad0.template; + template = p->ainsn.insn->bundle.quad0.template; if (slot == 1 && bundle_encoding[template][1] == L) slot = 2; @@ -553,7 +556,7 @@ turn_ss_off: static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) { - unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle; unsigned long slot = (unsigned long)p->addr & 0xf; /* single step inline if break instruction */ @@ -768,6 +771,12 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) */ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (ia64_done_with_exception(regs)) + return 1; /* * Let ia64_do_page_fault() fix it. diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 2fbe4536fe1..bfbd8986153 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -54,6 +54,9 @@ * * 2005-10-07 Keith Owens <kaos@sgi.com> * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> + * Add printing support for MCA/INIT. */ #include <linux/types.h> #include <linux/init.h> @@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init __initdata; +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk(temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk(temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + spin_lock_init(&mlogbuf_rlock); + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} +EXPORT_SYMBOL(ia64_mlogbuf_finish); + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} static void inline ia64_mca_spin(const char *func) { - printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); while (1) cpu_relax(); } @@ -344,9 +511,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CPE error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); - spin_lock(&cpe_history_lock); if (!cpe_poll_enabled && cpe_vector >= 0) { @@ -375,7 +539,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cpe_history[index++] = now; if (index == CPE_HISTORY_LENGTH) @@ -383,6 +547,10 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cpe_history_lock); +out: + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + return IRQ_HANDLED; } @@ -988,18 +1156,22 @@ ia64_wait_for_slaves(int monarch, const char *type) } if (!missing) goto all_in; - printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) - printk(" %d", c); + mprintk(" %d", c); } - printk("\n"); + mprintk("\n"); return; all_in: - printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); return; } @@ -1027,10 +1199,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", - sos->proc_state_param, cpu, sos->monarch); + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; @@ -1066,6 +1236,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, rh->severity = sal_log_severity_corrected; ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; + } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) @@ -1106,9 +1279,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CMC error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); - spin_lock(&cmc_history_lock); if (!cmc_polling_enabled) { int i, count = 1; /* we know 1 happened now */ @@ -1141,7 +1311,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cmc_history[index++] = now; if (index == CMC_HISTORY_LENGTH) @@ -1149,6 +1319,10 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cmc_history_lock); +out: + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + return IRQ_HANDLED; } @@ -1305,6 +1479,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi struct task_struct *g, *t; if (val != DIE_INIT_MONARCH_PROCESS) return NOTIFY_DONE; + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + printk(KERN_ERR "Processes interrupted by INIT -"); for_each_online_cpu(c) { struct ia64_sal_os_state *s; @@ -1326,6 +1509,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi } while_each_thread (g, t); read_unlock(&tasklist_lock); } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); return NOTIFY_DONE; } @@ -1357,12 +1542,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); @@ -1375,7 +1557,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { - printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", __FUNCTION__, cpu); atomic_dec(&slaves); sos->monarch = 1; @@ -1387,7 +1569,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { - printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", __FUNCTION__, cpu); atomic_dec(&monarchs); sos->monarch = 0; @@ -1408,7 +1590,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("Slave on cpu %d returning to normal service.\n", cpu); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; atomic_dec(&slaves); @@ -1426,7 +1608,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * same serial line, the user will need some time to switch out of the BMC before * the dump begins. */ - printk("Delaying for 5 seconds...\n"); + mprintk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu, "INIT"); /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through @@ -1439,7 +1621,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); monarch_cpu = -1; diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 96047491d1b..c6b607c00de 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -1025,18 +1025,13 @@ ia64_old_stack: ia64_set_kernel_registers: add temp3=MCA_SP_OFFSET, r3 - add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3 mov b0=r2 // save return address GET_IA64_MCA_DATA(temp1) ;; - add temp4=temp4, temp1 // &struct ia64_sal_os_state.os_gp add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack add r13=temp1, r3 // set current to start of MCA/INIT stack add r20=temp1, r3 // physical start of MCA/INIT stack ;; - ld8 r1=[temp4] // OS GP from SAL OS state - ;; - DATA_PA_TO_VA(r1,temp1) DATA_PA_TO_VA(r12,temp2) DATA_PA_TO_VA(r13,temp3) ;; @@ -1067,6 +1062,10 @@ ia64_set_kernel_registers: mov cr.itir=r18 mov cr.ifa=r13 mov r20=IA64_TR_CURRENT_STACK + + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value ;; itr.d dtr[r20]=r21 ;; diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 8db6e0cedad..a45009d2bc9 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -79,14 +79,30 @@ static int fatal_mca(const char *fmt, ...) { va_list args; + char buf[256]; va_start(args, fmt); - vprintk(fmt, args); + vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); return MCA_NOT_RECOVERED; } +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + /** * mca_page_isolate - isolate a poisoned page in order not to use it later * @paddr: poisoned memory location @@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) void mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { + ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", raw_smp_processor_id(), current->pid, current->uid, @@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is target address valid? */ if (!pbci->tv) - return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); + return fatal_mca("target address not valid"); /* * cpu read or memory-mapped io read @@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is minstate valid? */ if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) - return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); + return fatal_mca("minstate not valid"); psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); @@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, psr2->bn = 1; psr2->i = 0; - return MCA_RECOVERED; + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); } } - return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," - " iip 0x%lx\n", pmsa->pmsa_iip); + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); } /** @@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * The machine check is corrected. */ if (psp->cm == 1) - return MCA_RECOVERED; + return mca_recovered("machine check is already corrected."); /* * The error was not contained. Software must be reset. */ if (psp->us || psp->ci == 0) - return fatal_mca(KERN_ALERT "MCA: error not contained\n"); + return fatal_mca("error not contained"); /* * The cache check and bus check bits have four possible states @@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * 1 1 Memory error, attempt recovery */ if (psp->bc == 0 || pbci == NULL) - return fatal_mca(KERN_ALERT "MCA: No bus check\n"); + return fatal_mca("No bus check"); /* * Sorry, we cannot handle so many. */ if (peidx_bus_check_num(peidx) > 1) - return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); + return fatal_mca("Too many bus checks"); /* * Well, here is only one bus error. */ if (pbci->ib) - return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); + return fatal_mca("Internal Bus error"); if (pbci->cc) - return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); + return fatal_mca("Cache-cache error"); if (pbci->eb && pbci->bsi > 0) - return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); + return fatal_mca("External bus check fatal status"); /* * This is a local MCA and estimated as recoverble external bus error. @@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, /* * On account of strange SAL error record, we cannot recover. */ - return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); + return fatal_mca("Strange SAL record"); } /** @@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Now, OS can recover when there is one processor error section */ if (n_proc_err > 1) - return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); + return fatal_mca("Too Many Errors"); else if (n_proc_err == 0) - /* Weird SAL record ... We need not to recover */ - return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); /* Make index of processor error section */ mca_make_peidx((sal_log_processor_info_t*) @@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Check whether MCA is global or not */ if (is_mca_global(&peidx, &pbci, sos)) - return fatal_mca(KERN_ALERT "MCA: global MCA\n"); + return fatal_mca("global MCA"); /* Try to recover a processor error */ return recover_from_processor_error(platform_err, &slidx, &peidx, diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index 31a2e52bb16..c85e943ba5f 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h @@ -118,3 +118,7 @@ struct mca_table_entry { extern const struct mca_table_entry *search_mca_tables (unsigned long addr); extern int mca_recover_range(unsigned long); +extern void ia64_mca_printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void ia64_mlogbuf_dump(void); + diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index 1cc360c83e7..20340631179 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -29,6 +29,36 @@ EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +void __cpuinit map_cpu_to_node(int cpu, int nid) +{ + int oldnid; + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + /* sanity check first */ + oldnid = cpu_to_node_map[cpu]; + if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) { + return; /* nothing to do */ + } + /* we don't have cpu-driven node hot add yet... + In usual case, node is created from SRAT at boot time. */ + if (!node_online(nid)) + nid = first_online_node; + cpu_to_node_map[cpu] = nid; + cpu_set(cpu, node_to_cpu_mask[nid]); + return; +} + +void __cpuinit unmap_cpu_from_node(int cpu, int nid) +{ + WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid])); + WARN_ON(cpu_to_node_map[cpu] != nid); + cpu_to_node_map[cpu] = 0; + cpu_clear(cpu, node_to_cpu_mask[nid]); +} + + /** * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays * @@ -49,8 +79,6 @@ void __init build_cpu_to_node_map(void) node = node_cpuid[i].nid; break; } - cpu_to_node_map[cpu] = (node >= 0) ? node : 0; - if (node >= 0) - cpu_set(cpu, node_to_cpu_mask[node]); + map_cpu_to_node(cpu, node); } } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 84a7e52f56f..281004ff7b0 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -34,6 +34,7 @@ #include <linux/file.h> #include <linux/poll.h> #include <linux/vfs.h> +#include <linux/smp.h> #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/bitops.h> @@ -62,6 +63,9 @@ #define PFM_INVALID_ACTIVATION (~0UL) +#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ +#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ + /* * depth of message queue */ @@ -296,14 +300,17 @@ typedef struct pfm_context { unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ - unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ - pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ + + unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ + unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ u64 ctx_saved_psr_up; /* only contains psr.up value */ @@ -867,7 +874,6 @@ static void pfm_mask_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, val, ovfl_mask; int i; @@ -888,7 +894,7 @@ pfm_mask_monitoring(struct task_struct *task) * So in both cases, the live register contains the owner's * state. We can ONLY touch the PMU registers and NOT the PSR. * - * As a consequence to this call, the thread->pmds[] array + * As a consequence to this call, the ctx->th_pmds[] array * contains stale information which must be ignored * when context is reloaded AND monitoring is active (see * pfm_restart). @@ -923,9 +929,9 @@ pfm_mask_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); - th->pmcs[i] &= ~0xfUL; - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); + ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); + ctx->th_pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } /* * make all of this visible @@ -942,7 +948,6 @@ static void pfm_restore_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, ovfl_mask; unsigned long psr, val; int i, is_system; @@ -1008,9 +1013,9 @@ pfm_restore_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - th->pmcs[i] = ctx->ctx_pmcs[i]; - ia64_set_pmc(i, th->pmcs[i]); - DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, ctx->th_pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i])); } ia64_srlz_d(); @@ -1069,7 +1074,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask) static inline void pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long ovfl_val = pmu_conf->ovfl_val; unsigned long mask = ctx->ctx_all_pmds[0]; unsigned long val; @@ -1091,11 +1095,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) ctx->ctx_pmds[i].val = val & ~ovfl_val; val &= ovfl_val; } - thread->pmds[i] = val; + ctx->th_pmds[i] = val; DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", i, - thread->pmds[i], + ctx->th_pmds[i], ctx->ctx_pmds[i].val)); } } @@ -1106,7 +1110,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) static inline void pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long mask = ctx->ctx_all_pmcs[0]; int i; @@ -1114,8 +1117,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) for (i=0; mask; i++, mask>>=1) { /* masking 0 with ovfl_val yields 0 */ - thread->pmcs[i] = ctx->ctx_pmcs[i]; - DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } } @@ -2859,7 +2862,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) static int pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, pmc_pm; @@ -2880,7 +2882,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (is_loaded) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3035,7 +3036,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). * - * The value in thread->pmcs[] may be modified on overflow, i.e., when + * The value in th_pmcs[] may be modified on overflow, i.e., when * monitoring needs to be stopped. */ if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); @@ -3049,7 +3050,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmcs[cnum] = value; + if (is_system == 0) ctx->th_pmcs[cnum] = value; /* * write hardware register if we can @@ -3101,7 +3102,6 @@ error: static int pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, hw_value, ovfl_mask; @@ -3125,7 +3125,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * the owner of the local PMU. */ if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3233,7 +3232,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmds[cnum] = hw_value; + if (is_system == 0) ctx->th_pmds[cnum] = hw_value; /* * write hardware register if we can @@ -3299,7 +3298,6 @@ abort_mission: static int pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; unsigned long val = 0UL, lval, ovfl_mask, sval; pfarg_reg_t *req = (pfarg_reg_t *)arg; @@ -3323,7 +3321,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3385,7 +3382,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * if context is zombie, then task does not exist anymore. * In this case, we use the full value saved in the context (pfm_flush_regs()). */ - val = is_loaded ? thread->pmds[cnum] : 0UL; + val = is_loaded ? ctx->th_pmds[cnum] : 0UL; } rd_func = pmu_conf->pmd_desc[cnum].read_check; @@ -4354,8 +4351,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_copy_pmds(task, ctx); pfm_copy_pmcs(task, ctx); - pmcs_source = thread->pmcs; - pmds_source = thread->pmds; + pmcs_source = ctx->th_pmcs; + pmds_source = ctx->th_pmds; /* * always the case for system-wide @@ -5864,14 +5861,12 @@ void pfm_save_regs(struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; u64 psr; ctx = PFM_GET_CTX(task); if (ctx == NULL) return; - t = &task->thread; /* * we always come here with interrupts ALREADY disabled by @@ -5929,19 +5924,19 @@ pfm_save_regs(struct task_struct *task) * guarantee we will be schedule at that same * CPU again. */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * we will need it on the restore path to check * for pending overflow. */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * finally, allow context access. @@ -5986,7 +5981,6 @@ static void pfm_lazy_save_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; { u64 psr = pfm_get_psr(); @@ -5994,7 +5988,6 @@ pfm_lazy_save_regs (struct task_struct *task) } ctx = PFM_GET_CTX(task); - t = &task->thread; /* * we need to mask PMU overflow here to @@ -6019,19 +6012,19 @@ pfm_lazy_save_regs (struct task_struct *task) /* * save all the pmds we use */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * it is needed to check for pended overflow * on the restore path */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * now get can unmask PMU interrupts, they will @@ -6050,7 +6043,6 @@ void pfm_load_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long pmc_mask = 0UL, pmd_mask = 0UL; unsigned long flags; u64 psr, psr_up; @@ -6061,11 +6053,10 @@ pfm_load_regs (struct task_struct *task) BUG_ON(GET_PMU_OWNER()); - t = &task->thread; /* * possible on unload */ - if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; + if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; /* * we always come here with interrupts ALREADY disabled by @@ -6147,21 +6138,21 @@ pfm_load_regs (struct task_struct *task) * * XXX: optimize here */ - if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); - if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); + if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); + if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt @@ -6214,7 +6205,6 @@ pfm_load_regs (struct task_struct *task) void pfm_load_regs (struct task_struct *task) { - struct thread_struct *t; pfm_context_t *ctx; struct task_struct *owner; unsigned long pmd_mask, pmc_mask; @@ -6223,7 +6213,6 @@ pfm_load_regs (struct task_struct *task) owner = GET_PMU_OWNER(); ctx = PFM_GET_CTX(task); - t = &task->thread; psr = pfm_get_psr(); BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); @@ -6286,22 +6275,22 @@ pfm_load_regs (struct task_struct *task) */ pmc_mask = ctx->ctx_all_pmcs[0]; - pfm_restore_pmds(t->pmds, pmd_mask); - pfm_restore_pmcs(t->pmcs, pmc_mask); + pfm_restore_pmds(ctx->th_pmds, pmd_mask); + pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt @@ -6376,11 +6365,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) */ pfm_unfreeze_pmu(); } else { - pmc0 = task->thread.pmcs[0]; + pmc0 = ctx->th_pmcs[0]; /* * clear whatever overflow status bits there were */ - task->thread.pmcs[0] = 0; + ctx->th_pmcs[0] = 0; } ovfl_val = pmu_conf->ovfl_val; /* @@ -6401,7 +6390,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) /* * can access PMU always true in system wide mode */ - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; + val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; if (PMD_IS_COUNTING(i)) { DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", @@ -6433,7 +6422,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); - if (is_self) task->thread.pmds[i] = pmd_val; + if (is_self) ctx->th_pmds[i] = pmd_val; ctx->ctx_pmds[i].val = val; } @@ -6677,7 +6666,7 @@ pfm_init(void) ffz(pmu_conf->ovfl_val)); /* sanity check */ - if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { + if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); pmu_conf = NULL; return -1; @@ -6752,7 +6741,6 @@ void dump_pmu_state(const char *from) { struct task_struct *task; - struct thread_struct *t; struct pt_regs *regs; pfm_context_t *ctx; unsigned long psr, dcr, info, flags; @@ -6797,16 +6785,14 @@ dump_pmu_state(const char *from) ia64_psr(regs)->up = 0; ia64_psr(regs)->pp = 0; - t = ¤t->thread; - for (i=1; PMC_IS_LAST(i) == 0; i++) { if (PMC_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); } for (i=1; PMD_IS_LAST(i) == 0; i++) { if (PMD_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); } if (ctx) { diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 9065f0f01ba..e63b8ca5344 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) /* Check for outstanding MCA/INIT records every minute (arbitrary) */ #define SALINFO_TIMER_DELAY (60*HZ) static struct timer_list salinfo_timer; +extern void ia64_mlogbuf_dump(void); static void salinfo_timeout_check(struct salinfo_data *data) @@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data) static void salinfo_timeout (unsigned long arg) { + ia64_mlogbuf_dump(); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; @@ -332,6 +334,8 @@ retry: if (cpu == -1) goto retry; + ia64_mlogbuf_dump(); + /* for next read, start checking at next CPU */ data->cpu_check = cpu; if (++data->cpu_check == NR_CPUS) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 7ad0d9cc6db..84f93c0f2c6 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -509,7 +509,7 @@ show_cpuinfo (struct seq_file *m, void *v) { 1UL << 1, "spontaneous deferral"}, { 1UL << 2, "16-byte atomic ops" } }; - char family[32], features[128], *cp, sep; + char features[128], *cp, sep; struct cpuinfo_ia64 *c = v; unsigned long mask; unsigned long proc_freq; @@ -517,12 +517,6 @@ show_cpuinfo (struct seq_file *m, void *v) mask = c->features; - switch (c->family) { - case 0x07: memcpy(family, "Itanium", 8); break; - case 0x1f: memcpy(family, "Itanium 2", 10); break; - default: sprintf(family, "%u", c->family); break; - } - /* build the feature string: */ memcpy(features, " standard", 10); cp = features; @@ -553,8 +547,9 @@ show_cpuinfo (struct seq_file *m, void *v) "processor : %d\n" "vendor : %s\n" "arch : IA-64\n" - "family : %s\n" + "family : %u\n" "model : %u\n" + "model name : %s\n" "revision : %u\n" "archrev : %u\n" "features :%s\n" /* don't change this---it _is_ right! */ @@ -563,7 +558,8 @@ show_cpuinfo (struct seq_file *m, void *v) "cpu MHz : %lu.%06lu\n" "itc MHz : %lu.%06lu\n" "BogoMIPS : %lu.%02lu\n", - cpunum, c->vendor, family, c->model, c->revision, c->archrev, + cpunum, c->vendor, c->family, c->model, + c->model_name, c->revision, c->archrev, features, c->ppn, c->number, proc_freq / 1000, proc_freq % 1000, c->itc_freq / 1000000, c->itc_freq % 1000000, @@ -611,6 +607,31 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; +static char brandname[128]; + +static char * __cpuinit +get_model_name(__u8 family, __u8 model) +{ + char brand[128]; + + if (ia64_pal_get_brand_info(brand)) { + if (family == 0x7) + memcpy(brand, "Merced", 7); + else if (family == 0x1f) switch (model) { + case 0: memcpy(brand, "McKinley", 9); break; + case 1: memcpy(brand, "Madison", 8); break; + case 2: memcpy(brand, "Madison up to 9M cache", 23); break; + } else + memcpy(brand, "Unknown", 8); + } + if (brandname[0] == '\0') + return strcpy(brandname, brand); + else if (strcmp(brandname, brand) == 0) + return brandname; + else + return kstrdup(brand, GFP_KERNEL); +} + static void __cpuinit identify_cpu (struct cpuinfo_ia64 *c) { @@ -640,7 +661,6 @@ identify_cpu (struct cpuinfo_ia64 *c) pal_status_t status; unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ int i; - for (i = 0; i < 5; ++i) cpuid.bits[i] = ia64_get_cpuid(i); @@ -663,6 +683,7 @@ identify_cpu (struct cpuinfo_ia64 *c) c->family = cpuid.field.family; c->archrev = cpuid.field.archrev; c->features = cpuid.field.features; + c->model_name = get_model_name(c->family, c->model); status = ia64_pal_vm_summary(&vm1, &vm2); if (status == PAL_STATUS_SUCCESS) { diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 6203ed4ec8c..f7d7f566814 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -879,3 +879,27 @@ identify_siblings(struct cpuinfo_ia64 *c) c->core_id = info.log1_cid; c->thread_id = info.log1_tid; } + +/* + * returns non zero, if multi-threading is enabled + * on at least one physical package. Due to hotplug cpu + * and (maxcpus=), all threads may not necessarily be enabled + * even though the processor supports multi-threading. + */ +int is_multithreading_enabled(void) +{ + int i, j; + + for_each_present_cpu(i) { + for_each_present_cpu(j) { + if (j == i) + continue; + if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) { + if (cpu_data(j)->core_id == cpu_data(i)->core_id) + return 1; + } + } + } + return 0; +} +EXPORT_SYMBOL_GPL(is_multithreading_enabled); diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index f648c610b10..5629b45e89c 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,6 +36,7 @@ int arch_register_cpu(int num) */ if (!can_cpei_retarget() && is_cpu_cpei_target(num)) sysfs_cpus[num].cpu.no_control = 1; + map_cpu_to_node(num, node_cpuid[num].nid); #endif return register_cpu(&sysfs_cpus[num].cpu, num); @@ -45,7 +46,8 @@ int arch_register_cpu(int num) void arch_unregister_cpu(int num) { - return unregister_cpu(&sysfs_cpus[num].cpu); + unregister_cpu(&sysfs_cpus[num].cpu); + unmap_cpu_from_node(num, cpu_to_node(num)); } EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 4c73a676366..c58e933694d 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ - page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5b0d5f64a9b..b3b2e389d6b 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -184,7 +184,9 @@ SECTIONS *(.data.gate) __stop_gate_section = .; } - . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ + . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose + * kernel data + */ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } @@ -202,7 +204,9 @@ SECTIONS *(.data.percpu) __per_cpu_end = .; } - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits + * into percpu page size + */ data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index e004143ba86..daf977ff292 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -26,7 +26,6 @@ #include <asm/mca.h> #ifdef CONFIG_VIRTUAL_MEM_MAP -static unsigned long num_dma_physpages; static unsigned long max_gap; #endif @@ -41,10 +40,11 @@ show_mem (void) int i, total = 0, reserved = 0; int shared = 0, cached = 0; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; for (i = 0; i < max_mapnr; i++) { if (!pfn_valid(i)) { @@ -63,12 +63,12 @@ show_mem (void) else if (page_count(mem_map + i)) shared += page_count(mem_map + i) - 1; } - printk("%d pages of RAM\n", total); - printk("%d reserved pages\n", reserved); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); - printk("%ld pages in page table cache\n", - pgtable_quicklist_total_size()); + printk(KERN_INFO "%d pages of RAM\n", total); + printk(KERN_INFO "%d reserved pages\n", reserved); + printk(KERN_INFO "%d pages shared\n", shared); + printk(KERN_INFO "%d pages swap cached\n", cached); + printk(KERN_INFO "%ld pages in page table cache\n", + pgtable_quicklist_total_size()); } /* physical address where the bootmem map is located */ @@ -218,18 +218,6 @@ count_pages (u64 start, u64 end, void *arg) return 0; } -#ifdef CONFIG_VIRTUAL_MEM_MAP -static int -count_dma_pages (u64 start, u64 end, void *arg) -{ - unsigned long *count = arg; - - if (start < MAX_DMA_ADDRESS) - *count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT; - return 0; -} -#endif - /* * Set up the page tables. */ @@ -238,45 +226,22 @@ void __init paging_init (void) { unsigned long max_dma; - unsigned long zones_size[MAX_NR_ZONES]; -#ifdef CONFIG_VIRTUAL_MEM_MAP - unsigned long zholes_size[MAX_NR_ZONES]; -#endif - - /* initialize mem_map[] */ - - memset(zones_size, 0, sizeof(zones_size)); + unsigned long nid = 0; + unsigned long max_zone_pfns[MAX_NR_ZONES]; num_physpages = 0; efi_memmap_walk(count_pages, &num_physpages); max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = max_dma; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP - memset(zholes_size, 0, sizeof(zholes_size)); - - num_dma_physpages = 0; - efi_memmap_walk(count_dma_pages, &num_dma_physpages); - - if (max_low_pfn < max_dma) { - zones_size[ZONE_DMA] = max_low_pfn; - zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages; - } else { - zones_size[ZONE_DMA] = max_dma; - zholes_size[ZONE_DMA] = max_dma - num_dma_physpages; - if (num_physpages > num_dma_physpages) { - zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; - zholes_size[ZONE_NORMAL] = - ((max_low_pfn - max_dma) - - (num_physpages - num_dma_physpages)); - } - } - + efi_memmap_walk(register_active_ranges, &nid); efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); if (max_gap < LARGE_GAP) { vmem_map = (struct page *) 0; - free_area_init_node(0, NODE_DATA(0), zones_size, 0, - zholes_size); + free_area_init_nodes(max_zone_pfns); } else { unsigned long map_size; @@ -288,20 +253,19 @@ paging_init (void) vmem_map = (struct page *) vmalloc_end; efi_memmap_walk(create_mem_map_page_table, NULL); - NODE_DATA(0)->node_mem_map = vmem_map; - free_area_init_node(0, NODE_DATA(0), zones_size, - 0, zholes_size); + /* + * alloc_node_mem_map makes an adjustment for mem_map + * which isn't compatible with vmem_map. + */ + NODE_DATA(0)->node_mem_map = vmem_map + + find_min_pfn_with_active_regions(); + free_area_init_nodes(max_zone_pfns); printk("Virtual mem_map starts at 0x%p\n", mem_map); } #else /* !CONFIG_VIRTUAL_MEM_MAP */ - if (max_low_pfn < max_dma) - zones_size[ZONE_DMA] = max_low_pfn; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; - } - free_area_init(zones_size); + add_active_range(0, 0, max_low_pfn); + free_area_init_nodes(max_zone_pfns); #endif /* !CONFIG_VIRTUAL_MEM_MAP */ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index d260bffa01a..d497b6b0f5b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -547,15 +547,16 @@ void show_mem(void) unsigned long total_present = 0; pg_data_t *pgdat; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; - printk("Node ID: %d\n", pgdat->node_id); pgdat_resize_lock(pgdat, &flags); present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { @@ -579,18 +580,17 @@ void show_mem(void) total_reserved += reserved; total_cached += cached; total_shared += shared; - printk("\t%ld pages of RAM\n", present); - printk("\t%d reserved pages\n", reserved); - printk("\t%d pages shared\n", shared); - printk("\t%d pages swap cached\n", cached); + printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " + "shrd: %10d, swpd: %10d\n", pgdat->node_id, + present, reserved, shared, cached); } - printk("%ld pages of RAM\n", total_present); - printk("%d reserved pages\n", total_reserved); - printk("%d pages shared\n", total_shared); - printk("%d pages swap cached\n", total_cached); - printk("Total of %ld pages in page table cache\n", - pgtable_quicklist_total_size()); - printk("%d free buffer pages\n", nr_free_buffer_pages()); + printk(KERN_INFO "%ld pages of RAM\n", total_present); + printk(KERN_INFO "%d reserved pages\n", total_reserved); + printk(KERN_INFO "%d pages shared\n", total_shared); + printk(KERN_INFO "%d pages swap cached\n", total_cached); + printk(KERN_INFO "Total of %ld pages in page table cache\n", + pgtable_quicklist_total_size()); + printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); } /** @@ -654,6 +654,7 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n { unsigned long end = start + len; + add_active_range(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT); mem_data[node].num_physpages += len >> PAGE_SHIFT; if (start <= __pa(MAX_DMA_ADDRESS)) mem_data[node].num_dma_physpages += @@ -678,10 +679,10 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n void __init paging_init(void) { unsigned long max_dma; - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long zholes_size[MAX_NR_ZONES]; unsigned long pfn_offset = 0; + unsigned long max_pfn = 0; int node; + unsigned long max_zone_pfns[MAX_NR_ZONES]; max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; @@ -698,47 +699,20 @@ void __init paging_init(void) #endif for_each_online_node(node) { - memset(zones_size, 0, sizeof(zones_size)); - memset(zholes_size, 0, sizeof(zholes_size)); - num_physpages += mem_data[node].num_physpages; - - if (mem_data[node].min_pfn >= max_dma) { - /* All of this node's memory is above ZONE_DMA */ - zones_size[ZONE_NORMAL] = mem_data[node].max_pfn - - mem_data[node].min_pfn; - zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn - - mem_data[node].min_pfn - - mem_data[node].num_physpages; - } else if (mem_data[node].max_pfn < max_dma) { - /* All of this node's memory is in ZONE_DMA */ - zones_size[ZONE_DMA] = mem_data[node].max_pfn - - mem_data[node].min_pfn; - zholes_size[ZONE_DMA] = mem_data[node].max_pfn - - mem_data[node].min_pfn - - mem_data[node].num_dma_physpages; - } else { - /* This node has memory in both zones */ - zones_size[ZONE_DMA] = max_dma - - mem_data[node].min_pfn; - zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - - mem_data[node].num_dma_physpages; - zones_size[ZONE_NORMAL] = mem_data[node].max_pfn - - max_dma; - zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] - - (mem_data[node].num_physpages - - mem_data[node].num_dma_physpages); - } - pfn_offset = mem_data[node].min_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; #endif - free_area_init_node(node, NODE_DATA(node), zones_size, - pfn_offset, zholes_size); + if (mem_data[node].max_pfn > max_pfn) + max_pfn = mem_data[node].max_pfn; } + max_zone_pfns[ZONE_DMA] = max_dma; + max_zone_pfns[ZONE_NORMAL] = max_pfn; + free_area_init_nodes(max_zone_pfns); + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 30617ccb4f7..ff87a5cba39 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -593,6 +593,18 @@ find_largest_hole (u64 start, u64 end, void *arg) last_end = end; return 0; } + +int __init +register_active_ranges(u64 start, u64 end, void *nid) +{ + BUG_ON(nid == NULL); + BUG_ON(*(unsigned long *)nid >= MAX_NUMNODES); + + add_active_range(*(unsigned long *)nid, + __pa(start) >> PAGE_SHIFT, + __pa(end) >> PAGE_SHIFT); + return 0; +} #endif /* CONFIG_VIRTUAL_MEM_MAP */ static int __init diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 60b45e79f08..15c7c670da3 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -562,7 +562,8 @@ pcibios_enable_device (struct pci_dev *dev, int mask) void pcibios_disable_device (struct pci_dev *dev) { - acpi_pci_irq_disable(dev); + if (dev->is_enabled) + acpi_pci_irq_disable(dev); } void diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 27dee458406..7f73ad4408a 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -277,8 +277,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) } /* temporary buffer used during unaligned transfers */ - bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, - GFP_KERNEL | GFP_DMA); + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); if (bteBlock_unaligned == NULL) { return BTEFAIL_NOTAVAIL; } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 9a8a29339d2..b632b9c1e3b 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -32,9 +32,10 @@ #include <linux/cpumask.h> #include <linux/smp_lock.h> #include <linux/nodemask.h> +#include <linux/smp.h> + #include <asm/processor.h> #include <asm/topology.h> -#include <asm/smp.h> #include <asm/semaphore.h> #include <asm/uaccess.h> #include <asm/sal.h> |