diff options
Diffstat (limited to 'arch/ia64')
62 files changed, 1779 insertions, 914 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a85ea9d37f0..edffe25a477 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -34,6 +34,10 @@ config RWSEM_XCHGADD_ALGORITHM bool default y +config GENERIC_FIND_NEXT_BIT + bool + default y + config GENERIC_CALIBRATE_DELAY bool default y @@ -42,6 +46,10 @@ config TIME_INTERPOLATION bool default y +config DMI + bool + default y + config EFI bool default y @@ -252,6 +260,15 @@ config NR_CPUS than 64 will cause the use of a CPU mask array, causing a small performance hit. +config IA64_NR_NODES + int "Maximum number of NODEs (256-1024)" if (IA64_SGI_SN2 || IA64_GENERIC) + range 256 1024 + depends on IA64_SGI_SN2 || IA64_GENERIC + default "256" + help + This option specifies the maximum number of nodes in your SSI system. + If in doubt, use the default. + config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && EXPERIMENTAL @@ -271,6 +288,25 @@ config SCHED_SMT Intel IA64 chips with MultiThreading at a cost of slightly increased overhead in some places. If unsure say N here. +config PERMIT_BSP_REMOVE + bool "Support removal of Bootstrap Processor" + depends on HOTPLUG_CPU + default n + ---help--- + Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU + support. + +config FORCE_CPEI_RETARGET + bool "Force assumption that CPEI can be re-targetted" + depends on PERMIT_BSP_REMOVE + default n + ---help--- + Say Y if you need to force the assumption that CPEI can be re-targetted to + any cpu in the system. This hint is available via ACPI 3.0 specifications. + Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. + This option it useful to enable this feature on older BIOS's as well. + You can also enable this by using boot command line option force_cpei=1. + config PREEMPT bool "Preemptible Kernel" help diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index f722e1a2594..80ea7506fa1 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -1,6 +1,9 @@ # # ia64/Makefile # +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive # for more details. @@ -62,7 +65,7 @@ drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/ boot := arch/ia64/hp/sim/boot -.PHONY: boot compressed check +PHONY += boot compressed check all: compressed unwcheck diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig index 744fd2f79f6..0d29aa2066b 100644 --- a/arch/ia64/configs/gensparse_defconfig +++ b/arch/ia64/configs/gensparse_defconfig @@ -116,6 +116,7 @@ CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=17 CONFIG_SMP=y CONFIG_NR_CPUS=512 +CONFIG_IA64_NR_NODES=256 CONFIG_HOTPLUG_CPU=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index 8206752161b..a718034d68d 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -116,6 +116,7 @@ CONFIG_IA64_SGI_SN_XP=m CONFIG_FORCE_MAX_ZONEORDER=17 CONFIG_SMP=y CONFIG_NR_CPUS=1024 +CONFIG_IA64_NR_NODES=256 # CONFIG_HOTPLUG_CPU is not set CONFIG_SCHED_SMT=y CONFIG_PREEMPT=y diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 125568118b8..766bf495543 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17 CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y +CONFIG_PERMIT_BSP_REMOVE=y +CONFIG_FORCE_CPEI_RETARGET=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_SELECT_MEMORY_MODEL=y diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 3e767288a74..6cba55da572 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -116,6 +116,7 @@ CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=17 CONFIG_SMP=y CONFIG_NR_CPUS=512 +CONFIG_IA64_NR_NODES=256 CONFIG_HOTPLUG_CPU=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c index c9104bfff66..38aa9c10885 100644 --- a/arch/ia64/dig/setup.c +++ b/arch/ia64/dig/setup.c @@ -69,8 +69,3 @@ dig_setup (char **cmdline_p) screen_info.orig_video_isVGA = 1; /* XXX fake */ screen_info.orig_video_ega_bx = 3; /* XXX fake */ } - -void __init -dig_irq_init (void) -{ -} diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 626cdc83668..0e5c6ae5022 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -46,11 +46,6 @@ #define KEYBOARD_INTR 3 /* must match with simulator! */ #define NR_PORTS 1 /* only one port for now */ -#define SERIAL_INLINE 1 - -#ifdef SERIAL_INLINE -#define _INLINE_ inline -#endif #define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) @@ -237,7 +232,7 @@ static void rs_put_char(struct tty_struct *tty, unsigned char ch) local_irq_restore(flags); } -static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done) +static void transmit_chars(struct async_struct *info, int *intr_done) { int count; unsigned long flags; diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index 68ceb4e690c..ccb98ed48e5 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -29,9 +29,9 @@ struct partial_page { struct partial_page *next; /* linked list, sorted by address */ struct rb_node pp_rb; - /* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32 + /* 64K is the largest "normal" page supported by ia64 ABI. So 4K*64 * should suffice.*/ - unsigned int bitmap; + unsigned long bitmap; unsigned int base; }; diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 70dba1f0e2e..5366b3b23d0 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -25,7 +25,6 @@ #include <linux/resource.h> #include <linux/times.h> #include <linux/utsname.h> -#include <linux/timex.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> @@ -1166,19 +1165,7 @@ put_tv32 (struct compat_timeval __user *o, struct timeval *i) asmlinkage unsigned long sys32_alarm (unsigned int seconds) { - struct itimerval it_new, it_old; - unsigned int oldalarm; - - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; - it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - do_setitimer(ITIMER_REAL, &it_new, &it_old); - oldalarm = it_old.it_value.tv_sec; - /* ehhh.. We can't return 0 if we have an alarm pending.. */ - /* And we'd better return too much than too little anyway */ - if (it_old.it_value.tv_usec) - oldalarm++; - return oldalarm; + return alarm_setitimer(seconds); } /* Translations due to time_t size differences. Which affects all @@ -2603,78 +2590,4 @@ sys32_setresgid(compat_gid_t rgid, compat_gid_t egid, ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid); return sys_setresgid(srgid, segid, ssgid); } - -/* Handle adjtimex compatibility. */ - -struct timex32 { - u32 modes; - s32 offset, freq, maxerror, esterror; - s32 status, constant, precision, tolerance; - struct compat_timeval time; - s32 tick; - s32 ppsfreq, jitter, shift, stabil; - s32 jitcnt, calcnt, errcnt, stbcnt; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; -}; - -extern int do_adjtimex(struct timex *); - -asmlinkage long -sys32_adjtimex(struct timex32 *utp) -{ - struct timex txc; - int ret; - - memset(&txc, 0, sizeof(struct timex)); - - if(get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; - - ret = do_adjtimex(&txc); - - if(put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) - ret = -EFAULT; - - return ret; -} #endif /* NOTYET */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 09a0dbc17fb..59e871dae74 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ - unwind.o mca.o mca_asm.o topology.o + unwind.o mca.o mca_asm.o topology.o dmi_scan.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o @@ -30,6 +30,7 @@ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o +dmi_scan-y += ../../i386/kernel/dmi_scan.o # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index ecd44bdc839..58c93a30348 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header, return 0; } +#ifdef CONFIG_HOTPLUG_CPU unsigned int can_cpei_retarget(void) { extern int cpe_vector; + extern unsigned int force_cpei_retarget; /* * Only if CPEI is supported and the override flag * is present, otherwise return that its re-targettable * if we are in polling mode. */ - if (cpe_vector > 0 && !acpi_cpei_override) - return 0; - else - return 1; + if (cpe_vector > 0) { + if (acpi_cpei_override || force_cpei_retarget) + return 1; + else + return 0; + } + return 1; } unsigned int is_cpu_cpei_target(unsigned int cpu) @@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu) { acpi_cpei_phys_cpuid = cpu_physical_id(cpu); } +#endif unsigned int get_cpei_target_cpu(void) { @@ -414,6 +420,26 @@ int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; int __initdata nid_to_pxm_map[MAX_NUMNODES]; static struct acpi_table_slit __initdata *slit_table; +static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) +{ + int pxm; + + pxm = pa->proximity_domain; + if (ia64_platform_is("sn2")) + pxm += pa->reserved[0] << 8; + return pxm; +} + +static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma) +{ + int pxm; + + pxm = ma->proximity_domain; + if (ia64_platform_is("sn2")) + pxm += ma->reserved1[0] << 8; + return pxm; +} + /* * ACPI 2.0 SLIT (System Locality Information Table) * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf @@ -437,13 +463,20 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) void __init acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) { + int pxm; + + if (!pa->flags.enabled) + return; + + pxm = get_processor_proximity_domain(pa); + /* record this node in proximity bitmap */ - pxm_bit_set(pa->proximity_domain); + pxm_bit_set(pxm); node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); /* nid should be overridden as logical node id later */ - node_cpuid[srat_num_cpus].nid = pa->proximity_domain; + node_cpuid[srat_num_cpus].nid = pxm; srat_num_cpus++; } @@ -451,10 +484,10 @@ void __init acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) { unsigned long paddr, size; - u8 pxm; + int pxm; struct node_memblk_s *p, *q, *pend; - pxm = ma->proximity_domain; + pxm = get_memory_proximity_domain(ma); /* fill node memory chunk structure */ paddr = ma->base_addr_hi; @@ -618,9 +651,9 @@ unsigned long __init acpi_find_rsdp(void) { unsigned long rsdp_phys = 0; - if (efi.acpi20) - rsdp_phys = __pa(efi.acpi20); - else if (efi.acpi) + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + rsdp_phys = efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); return rsdp_phys; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 9990320b6f9..12cfedce73b 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -458,24 +458,33 @@ efi_init (void) printk(KERN_INFO "EFI v%u.%.02u by %s:", efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + efi.mps = EFI_INVALID_TABLE_ADDR; + efi.acpi = EFI_INVALID_TABLE_ADDR; + efi.acpi20 = EFI_INVALID_TABLE_ADDR; + efi.smbios = EFI_INVALID_TABLE_ADDR; + efi.sal_systab = EFI_INVALID_TABLE_ADDR; + efi.boot_info = EFI_INVALID_TABLE_ADDR; + efi.hcdp = EFI_INVALID_TABLE_ADDR; + efi.uga = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < (int) efi.systab->nr_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = __va(config_tables[i].table); + efi.mps = config_tables[i].table; printk(" MPS=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = __va(config_tables[i].table); + efi.acpi20 = config_tables[i].table; printk(" ACPI 2.0=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = __va(config_tables[i].table); + efi.acpi = config_tables[i].table; printk(" ACPI=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = __va(config_tables[i].table); + efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { - efi.sal_systab = __va(config_tables[i].table); + efi.sal_systab = config_tables[i].table; printk(" SALsystab=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = __va(config_tables[i].table); + efi.hcdp = config_tables[i].table; printk(" HCDP=0x%lx", config_tables[i].table); } } @@ -677,27 +686,34 @@ EXPORT_SYMBOL(efi_mem_attributes); /* * Determines whether the memory at phys_addr supports the desired * attribute (WB, UC, etc). If this returns 1, the caller can safely - * access *size bytes at phys_addr with the specified attribute. + * access size bytes at phys_addr with the specified attribute. */ -static int -efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr) +int +efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr) { + unsigned long end = phys_addr + size; efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); - unsigned long md_end; - if (!md || (md->attribute & attr) != attr) + /* + * Some firmware doesn't report MMIO regions in the EFI memory + * map. The Intel BigSur (a.k.a. HP i2000) has this problem. + * On those platforms, we have to assume UC is valid everywhere. + */ + if (!md || (md->attribute & attr) != attr) { + if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio()) + return 1; return 0; + } do { - md_end = efi_md_end(md); - if (phys_addr + *size <= md_end) + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) return 1; md = efi_memory_descriptor(md_end); - if (!md || (md->attribute & attr) != attr) { - *size = md_end - phys_addr; - return 1; - } + if (!md || (md->attribute & attr) != attr) + return 0; } while (md); return 0; } @@ -708,7 +724,7 @@ efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr) * control access size. */ int -valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) +valid_phys_addr_range (unsigned long phys_addr, unsigned long size) { return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB); } @@ -723,7 +739,7 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) * because that doesn't appear in the boot-time EFI memory map. */ int -valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size) +valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size) { if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB)) return 1; @@ -731,14 +747,6 @@ valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size) if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC)) return 1; - /* - * Some firmware doesn't report MMIO regions in the EFI memory map. - * The Intel BigSur (a.k.a. HP i2000) has this problem. In this - * case, we can't use the EFI memory map to validate mmap requests. - */ - if (!efi_memmap_has_mmio()) - return 1; - return 0; } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 930fdfca6dd..750e8e7fbdc 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1102,9 +1102,6 @@ skip_rbs_switch: st8 [r2]=r8 st8 [r3]=r10 .work_pending: - tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? -(p6) br.cond.sptk.few .sigdelayed - ;; tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? (p6) br.cond.sptk.few .notify #ifdef CONFIG_PREEMPT @@ -1131,17 +1128,6 @@ skip_rbs_switch: (pLvSys)br.cond.sptk.few .work_pending_syscall_end br.cond.sptk.many .work_processed_kernel // don't re-check -// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where -// it could not be delivered. Deliver it now. The signal might be for us and -// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed -// signal. - -.sigdelayed: - br.call.sptk.many rp=do_sigdelayed - cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check - .work_pending_syscall_end: adds r2=PT(R8)+16,r12 adds r3=PT(R10)+16,r12 @@ -1619,5 +1605,6 @@ sys_call_table: data8 sys_ni_syscall // reserved for pselect data8 sys_ni_syscall // 1295 reserved for ppoll data8 sys_unshare + data8 sys_splice .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S index e1e4aba9ecd..7c99e6ec3da 100644 --- a/arch/ia64/kernel/gate.lds.S +++ b/arch/ia64/kernel/gate.lds.S @@ -59,6 +59,7 @@ SECTIONS *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) *(__ex_table) + *(__mca_table) } } diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 574084f343f..7956eb9058f 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -9,54 +9,65 @@ * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> * - * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code. - * In particular, we now have separate handlers for edge - * and level triggered interrupts. - * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation - * PCI to vector mapping, shared PCI interrupts. - * 00/10/27 D. Mosberger Document things a bit more to make them more understandable. - * Clean up much of the old IOSAPIC cruft. - * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for - * ACPI S5(SoftOff) support. + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O + * APIC code. In particular, we now have separate + * handlers for edge and level triggered + * interrupts. + * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector + * allocation PCI to vector mapping, shared PCI + * interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more + * understandable. Clean up much of the old + * IOSAPIC cruft. + * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts + * and fixes for ACPI S5(SoftOff) support. * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT - * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in - * iosapic_set_affinity(), initializations for - * /proc/irq/#/smp_affinity + * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt + * vectors in iosapic_set_affinity(), + * initializations for /proc/irq/#/smp_affinity * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq - * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping - * error + * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to + * IOSAPIC mapping error * 02/07/29 T. Kochi Allocate interrupt vectors dynamically - * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.) - * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code. + * 02/08/04 T. Kochi Cleaned up terminology (irq, global system + * interrupt, vector, etc.) + * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's + * pci_irq code. * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. - * Remove iosapic_address & gsi_base from external interfaces. - * Rationalize __init/__devinit attributes. + * Remove iosapic_address & gsi_base from + * external interfaces. Rationalize + * __init/__devinit attributes. * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004 - * Updated to work with irq migration necessary for CPU Hotplug + * Updated to work with irq migration necessary + * for CPU Hotplug */ /* - * Here is what the interrupt logic between a PCI device and the kernel looks like: + * Here is what the interrupt logic between a PCI device and the kernel looks + * like: * - * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The - * device is uniquely identified by its bus--, and slot-number (the function - * number does not matter here because all functions share the same interrupt - * lines). + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, + * INTD). The device is uniquely identified by its bus-, and slot-number + * (the function number does not matter here because all functions share + * the same interrupt lines). * - * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller. - * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level - * triggered and use the same polarity). Each interrupt line has a unique Global - * System Interrupt (GSI) number which can be calculated as the sum of the controller's - * base GSI number and the IOSAPIC pin number to which the line connects. + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC + * controller. Multiple interrupt lines may have to share the same + * IOSAPIC pin (if they're level triggered and use the same polarity). + * Each interrupt line has a unique Global System Interrupt (GSI) number + * which can be calculated as the sum of the controller's base GSI number + * and the IOSAPIC pin number to which the line connects. * - * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin - * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU. + * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the + * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then + * sent to the CPU. * - * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as - * architecture-independent interrupt handling mechanism in Linux. As an - * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number - * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and - * IRQ. A platform can implement platform_irq_to_vector(irq) and + * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is + * used as architecture-independent interrupt handling mechanism in Linux. + * As an IRQ is a number, we have to have + * IA-64 interrupt vector number <-> IRQ number mapping. On smaller + * systems, we use one-to-one mapping between IA-64 vector and IRQ. A + * platform can implement platform_irq_to_vector(irq) and * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. * Please see also include/asm-ia64/hw_irq.h for those APIs. * @@ -64,9 +75,9 @@ * * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ * - * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts. - * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this - * source code. + * Note: The term "IRQ" is loosely used everywhere in Linux kernel to + * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * (isa_irq) is the only exception in this source code. */ #include <linux/config.h> @@ -90,7 +101,6 @@ #include <asm/ptrace.h> #include <asm/system.h> - #undef DEBUG_INTERRUPT_ROUTING #ifdef DEBUG_INTERRUPT_ROUTING @@ -99,36 +109,46 @@ #define DBG(fmt...) #endif -#define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info)) +#define NR_PREALLOCATE_RTE_ENTRIES \ + (PAGE_SIZE / sizeof(struct iosapic_rte_info)) #define RTE_PREALLOCATED (1) static DEFINE_SPINLOCK(iosapic_lock); -/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ +/* + * These tables map IA-64 vectors to the IOSAPIC pin that generates this + * vector. + */ struct iosapic_rte_info { - struct list_head rte_list; /* node in list of RTEs sharing the same vector */ + struct list_head rte_list; /* node in list of RTEs sharing the + * same vector */ char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ + unsigned int gsi_base; /* first GSI assigned to this + * IOSAPIC */ char rte_index; /* IOSAPIC RTE index */ int refcnt; /* reference counter */ unsigned int flags; /* flags */ } ____cacheline_aligned; static struct iosapic_intr_info { - struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */ + struct list_head rtes; /* RTEs using this vector (empty => + * not an IOSAPIC interrupt) */ int count; /* # of RTEs that shares this vector */ - u32 low32; /* current value of low word of Redirection table entry */ + u32 low32; /* current value of low word of + * Redirection table entry */ unsigned int dest; /* destination CPU physical ID */ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ - unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */ + unsigned char polarity: 1; /* interrupt polarity + * (see iosapic.h) */ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ } iosapic_intr_info[IA64_NUM_VECTORS]; static struct iosapic { char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ - unsigned short num_rte; /* number of RTE in this IOSAPIC */ + unsigned int gsi_base; /* first GSI assigned to this + * IOSAPIC */ + unsigned short num_rte; /* # of RTEs on this IOSAPIC */ int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ #ifdef CONFIG_NUMA unsigned short node; /* numa node association via pxm */ @@ -149,7 +169,8 @@ find_iosapic (unsigned int gsi) int i; for (i = 0; i < NR_IOSAPICS; i++) { - if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte) + if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < + iosapic_lists[i].num_rte) return i; } @@ -162,7 +183,8 @@ _gsi_to_vector (unsigned int gsi) struct iosapic_intr_info *info; struct iosapic_rte_info *rte; - for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info) + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info) list_for_each_entry(rte, &info->rtes, rte_list) if (rte->gsi_base + rte->rte_index == gsi) return info - iosapic_intr_info; @@ -185,8 +207,8 @@ gsi_to_irq (unsigned int gsi) unsigned long flags; int irq; /* - * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq - * numbers... + * XXX fix me: this assumes an identity mapping between IA-64 vector + * and Linux irq numbers... */ spin_lock_irqsave(&iosapic_lock, flags); { @@ -197,7 +219,8 @@ gsi_to_irq (unsigned int gsi) return irq; } -static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec) +static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, + unsigned int vec) { struct iosapic_rte_info *rte; @@ -237,7 +260,9 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) for (irq = 0; irq < NR_IRQS; ++irq) if (irq_to_vector(irq) == vector) { - set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); + set_irq_affinity_info(irq, + (int)(dest & 0xffff), + redir); break; } } @@ -259,7 +284,7 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) } static void -nop (unsigned int vector) +nop (unsigned int irq) { /* do nothing... */ } @@ -281,7 +306,8 @@ mask_irq (unsigned int irq) { /* set only the mask bit */ low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { addr = rte->addr; rte_index = rte->rte_index; iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); @@ -306,7 +332,8 @@ unmask_irq (unsigned int irq) spin_lock_irqsave(&iosapic_lock, flags); { low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { addr = rte->addr; rte_index = rte->rte_index; iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); @@ -346,21 +373,25 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) spin_lock_irqsave(&iosapic_lock, flags); { - low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + low32 = iosapic_intr_info[vec].low32 & + ~(7 << IOSAPIC_DELIVERY_SHIFT); if (redir) /* change delivery mode to lowest priority */ - low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + low32 |= (IOSAPIC_LOWEST_PRIORITY << + IOSAPIC_DELIVERY_SHIFT); else /* change delivery mode to fixed */ low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); iosapic_intr_info[vec].low32 = low32; iosapic_intr_info[vec].dest = dest; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { addr = rte->addr; rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), + high32); iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); } } @@ -433,7 +464,8 @@ iosapic_ack_edge_irq (unsigned int irq) * interrupt for real. This prevents IRQ storms from unhandled * devices. */ - if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED)) + if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == + (IRQ_PENDING|IRQ_DISABLED)) mask_irq(irq); } @@ -467,7 +499,8 @@ iosapic_version (char __iomem *addr) return iosapic_read(addr, IOSAPIC_VERSION); } -static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol) +static int iosapic_find_sharable_vector (unsigned long trigger, + unsigned long pol) { int i, vector = -1, min_count = -1; struct iosapic_intr_info *info; @@ -482,7 +515,8 @@ static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long po for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) { info = &iosapic_intr_info[i]; if (info->trigger == trigger && info->polarity == pol && - (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) { + (info->dmode == IOSAPIC_FIXED || info->dmode == + IOSAPIC_LOWEST_PRIORITY)) { if (min_count == -1 || info->count < min_count) { vector = i; min_count = info->count; @@ -506,12 +540,15 @@ iosapic_reassign_vector (int vector) new_vector = assign_irq_vector(AUTO_ASSIGN); if (new_vector < 0) panic("%s: out of interrupt vectors!\n", __FUNCTION__); - printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector); + printk(KERN_INFO "Reassigning vector %d to %d\n", + vector, new_vector); memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], sizeof(struct iosapic_intr_info)); INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes); - list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes); - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + list_move(iosapic_intr_info[vector].rtes.next, + &iosapic_intr_info[new_vector].rtes); + memset(&iosapic_intr_info[vector], 0, + sizeof(struct iosapic_intr_info)); iosapic_intr_info[vector].low32 = IOSAPIC_MASK; INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); } @@ -524,7 +561,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void) int preallocated = 0; if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { - rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES); + rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * + NR_PREALLOCATE_RTE_ENTRIES); if (!rte) return NULL; for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) @@ -532,7 +570,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void) } if (!list_empty(&free_rte_list)) { - rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list); + rte = list_entry(free_rte_list.next, struct iosapic_rte_info, + rte_list); list_del(&rte->rte_list); preallocated++; } else { @@ -575,7 +614,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, index = find_iosapic(gsi); if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi); + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __FUNCTION__, gsi); return -ENODEV; } @@ -586,7 +626,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, if (!rte) { rte = iosapic_alloc_rte(); if (!rte) { - printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__); + printk(KERN_WARNING "%s: cannot allocate memory\n", + __FUNCTION__); return -ENOMEM; } @@ -602,7 +643,9 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, else if (vector_is_shared(vector)) { struct iosapic_intr_info *info = &iosapic_intr_info[vector]; if (info->trigger != trigger || info->polarity != polarity) { - printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__); + printk (KERN_WARNING + "%s: cannot override the interrupt\n", + __FUNCTION__); return -EINVAL; } } @@ -619,8 +662,10 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, idesc = irq_descp(vector); if (idesc->handler != irq_type) { if (idesc->handler != &no_irq_type) - printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", - __FUNCTION__, vector, idesc->handler->typename, irq_type->typename); + printk(KERN_WARNING + "%s: changing vector %d from %s to %s\n", + __FUNCTION__, vector, + idesc->handler->typename, irq_type->typename); idesc->handler = irq_type; } return 0; @@ -631,6 +676,7 @@ get_target_cpu (unsigned int gsi, int vector) { #ifdef CONFIG_SMP static int cpu = -1; + extern int cpe_vector; /* * In case of vector shared by multiple RTEs, all RTEs that @@ -653,6 +699,11 @@ get_target_cpu (unsigned int gsi, int vector) if (!cpu_online(smp_processor_id())) return cpu_physical_id(smp_processor_id()); +#ifdef CONFIG_ACPI + if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); +#endif + #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; @@ -675,7 +726,7 @@ get_target_cpu (unsigned int gsi, int vector) if (!num_cpus) goto skip_numa_setup; - /* Use vector assigment to distribute across cpus in node */ + /* Use vector assignment to distribute across cpus in node */ cpu_index = vector % num_cpus; for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) @@ -697,7 +748,7 @@ skip_numa_setup: } while (!cpu_online(cpu)); return cpu_physical_id(cpu); -#else +#else /* CONFIG_SMP */ return cpu_physical_id(smp_processor_id()); #endif } @@ -749,7 +800,8 @@ again: if (list_empty(&iosapic_intr_info[vector].rtes)) free_irq_vector(vector); spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_descp(vector)->lock, flags); + spin_unlock_irqrestore(&irq_descp(vector)->lock, + flags); goto again; } @@ -758,7 +810,8 @@ again: polarity, trigger); if (err < 0) { spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_descp(vector)->lock, flags); + spin_unlock_irqrestore(&irq_descp(vector)->lock, + flags); return err; } @@ -800,7 +853,8 @@ iosapic_unregister_intr (unsigned int gsi) */ irq = gsi_to_irq(gsi); if (irq < 0) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); WARN_ON(1); return; } @@ -811,7 +865,9 @@ iosapic_unregister_intr (unsigned int gsi) spin_lock(&iosapic_lock); { if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + printk(KERN_ERR + "iosapic_unregister_intr(%u) unbalanced\n", + gsi); WARN_ON(1); goto out; } @@ -821,7 +877,8 @@ iosapic_unregister_intr (unsigned int gsi) /* Mask the interrupt */ low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; - iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32); + iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), + low32); /* Remove the rte entry from the list */ list_del(&rte->rte_list); @@ -834,7 +891,9 @@ iosapic_unregister_intr (unsigned int gsi) trigger = iosapic_intr_info[vector].trigger; polarity = iosapic_intr_info[vector].polarity; dest = iosapic_intr_info[vector].dest; - printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", + printk(KERN_INFO + "GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d unregistered\n", gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, vector); @@ -847,12 +906,15 @@ iosapic_unregister_intr (unsigned int gsi) idesc->handler = &no_irq_type; /* Clear the interrupt information */ - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + memset(&iosapic_intr_info[vector], 0, + sizeof(struct iosapic_intr_info)); iosapic_intr_info[vector].low32 |= IOSAPIC_MASK; INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); if (idesc->action) { - printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq); + printk(KERN_ERR + "interrupt handlers still exist on" + "IRQ %u\n", irq); WARN_ON(1); } @@ -867,7 +929,6 @@ iosapic_unregister_intr (unsigned int gsi) /* * ACPI calls this when it finds an entry for a platform interrupt. - * Note that the irq_base and IOSAPIC address must be set in iosapic_init(). */ int __init iosapic_register_platform_intr (u32 int_type, unsigned int gsi, @@ -901,13 +962,16 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, mask = 1; break; default: - printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type); + printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__, + int_type); return -1; } register_intr(gsi, vector, delivery, polarity, trigger); - printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + printk(KERN_INFO + "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d\n", int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), @@ -917,10 +981,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, return vector; } - /* * ACPI calls this when it finds an entry for a legacy ISA IRQ override. - * Note that the gsi_base and IOSAPIC address must be set in iosapic_init(). */ void __init iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, @@ -949,16 +1011,19 @@ iosapic_system_init (int system_pcat_compat) for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) { iosapic_intr_info[vector].low32 = IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */ + /* mark as unused */ + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); } pcat_compat = system_pcat_compat; if (pcat_compat) { /* - * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support - * enabled. + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. */ - printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__); + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __FUNCTION__); outb(0xff, 0xA1); outb(0xff, 0x21); } @@ -998,10 +1063,7 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) base = iosapic_lists[index].gsi_base; end = base + iosapic_lists[index].num_rte - 1; - if (gsi_base < base && gsi_end < base) - continue;/* OK */ - - if (gsi_base > end && gsi_end > end) + if (gsi_end < base || end < gsi_base) continue; /* OK */ return -EBUSY; @@ -1047,12 +1109,14 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) if ((gsi_base == 0) && pcat_compat) { /* - * Map the legacy ISA devices into the IOSAPIC data. Some of these may - * get reprogrammed later on with data from the ACPI Interrupt Source - * Override table. + * Map the legacy ISA devices into the IOSAPIC data. Some of + * these may get reprogrammed later on with data from the ACPI + * Interrupt Source Override table. */ for (isa_irq = 0; isa_irq < 16; ++isa_irq) - iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE); + iosapic_override_isa_irq(isa_irq, isa_irq, + IOSAPIC_POL_HIGH, + IOSAPIC_EDGE); } return 0; } @@ -1075,7 +1139,8 @@ iosapic_remove (unsigned int gsi_base) if (iosapic_lists[index].rtes_inuse) { err = -EBUSY; - printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n", + printk(KERN_WARNING + "%s: IOSAPIC for GSI base %u is busy\n", __FUNCTION__, gsi_base); goto out; } diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index d33244c3275..5ce908ef9c9 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -163,8 +163,19 @@ void fixup_irqs(void) { unsigned int irq; extern void ia64_process_pending_intr(void); + extern void ia64_disable_timer(void); + extern volatile int time_keeper_id; + + ia64_disable_timer(); + + /* + * Find a new timesync master + */ + if (smp_processor_id() == time_keeper_id) { + time_keeper_id = first_cpu(cpu_online_map); + printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); + } - ia64_set_itv(1<<16); /* * Phase 1: Locate irq's bound to this cpu and * relocate them for cpu removal. diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index dcd906fe574..829a43cab79 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -865,6 +865,7 @@ ENTRY(interrupt) ;; SAVE_REST ;; + MCA_RECOVER_RANGE(interrupt) alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group mov out0=cr.ivr // pass cr.ivr as first arg add out1=16,sp // pass pointer to pt_regs as second arg diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 50ae8c7d453..789881ca83d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -34,6 +34,7 @@ #include <asm/pgtable.h> #include <asm/kdebug.h> #include <asm/sections.h> +#include <asm/uaccess.h> extern void jprobe_inst_return(void); @@ -722,13 +723,50 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs); - reset_current_kprobe(); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the instruction pointer points back to + * the probe address and allow the page fault handler + * to continue as a normal page fault. + */ + regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL; + ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * Let ia64_do_page_fault() fix it. + */ + break; + default: + break; } return 0; @@ -740,6 +778,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch(val) { case DIE_BREAK: /* err is break number from ia64_bad_break() */ diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index c3a04ee7f4f..4b0b71d5aef 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -14,7 +14,15 @@ struct ia64_machine_vector ia64_mv; EXPORT_SYMBOL(ia64_mv); -static struct ia64_machine_vector * +static __initdata const char *mvec_name; +static __init int setup_mvec(char *s) +{ + mvec_name = s; + return 0; +} +early_param("machvec", setup_mvec); + +static struct ia64_machine_vector * __init lookup_machvec (const char *name) { extern struct ia64_machine_vector machvec_start[]; @@ -33,10 +41,13 @@ machvec_init (const char *name) { struct ia64_machine_vector *mv; + if (!name) + name = mvec_name ? mvec_name : acpi_get_sysname(); mv = lookup_machvec(name); - if (!mv) { - panic("generic kernel failed to find machine vector for platform %s!", name); - } + if (!mv) + panic("generic kernel failed to find machine vector for" + " platform %s!", name); + ia64_mv = *mv; printk(KERN_INFO "booting generic kernel on platform %s\n", name); } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ee7eec9ee57..8963171788d 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -69,6 +69,7 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/workqueue.h> +#include <linux/cpumask.h> #include <asm/delay.h> #include <asm/kdebug.h> @@ -83,6 +84,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> +#include "mca_drv.h" #include "entry.h" #if defined(IA64_MCA_DEBUG_INFO) @@ -133,7 +135,7 @@ static int cpe_poll_enabled = 1; extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); -static int mca_init; +static int mca_init __initdata; static void inline @@ -184,7 +186,7 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) * Outputs : None */ -static void +static void __init ia64_log_init(int sal_info_type) { u64 max_size = 0; @@ -282,13 +284,53 @@ ia64_mca_log_sal_error_record(int sal_info_type) } /* - * platform dependent error handling + * search_mca_table + * See if the MCA surfaced in an instruction range + * that has been tagged as recoverable. + * + * Inputs + * first First address range to check + * last Last address range to check + * ip Instruction pointer, address we are looking for + * + * Return value: + * 1 on Success (in the table)/ 0 on Failure (not in the table) */ -#ifndef PLATFORM_MCA_HANDLERS +int +search_mca_table (const struct mca_table_entry *first, + const struct mca_table_entry *last, + unsigned long ip) +{ + const struct mca_table_entry *curr; + u64 curr_start, curr_end; + + curr = first; + while (curr <= last) { + curr_start = (u64) &curr->start_addr + curr->start_addr; + curr_end = (u64) &curr->end_addr + curr->end_addr; + + if ((ip >= curr_start) && (ip <= curr_end)) { + return 1; + } + curr++; + } + return 0; +} + +/* Given an address, look for it in the mca tables. */ +int mca_recover_range(unsigned long addr) +{ + extern struct mca_table_entry __start___mca_table[]; + extern struct mca_table_entry __stop___mca_table[]; + + return search_mca_table(__start___mca_table, __stop___mca_table-1, addr); +} +EXPORT_SYMBOL_GPL(mca_recover_range); #ifdef CONFIG_ACPI int cpe_vector = -1; +int ia64_cpe_irq = -1; static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) @@ -359,7 +401,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) * Outputs * None */ -static void +static void __init ia64_mca_register_cpev (int cpev) { /* Register the CPE interrupt vector with SAL */ @@ -377,8 +419,6 @@ ia64_mca_register_cpev (int cpev) } #endif /* CONFIG_ACPI */ -#endif /* PLATFORM_MCA_HANDLERS */ - /* * ia64_mca_cmc_vector_setup * @@ -392,7 +432,7 @@ ia64_mca_register_cpev (int cpev) * Outputs * None */ -void +void __cpuinit ia64_mca_cmc_vector_setup (void) { cmcv_reg_t cmcv; @@ -630,6 +670,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) *tnat |= (nat << tslot); } +/* Change the comm field on the MCA/INT task to include the pid that + * was interrupted, it makes for easier debugging. If that pid was 0 + * (swapper or nested MCA/INIT) then use the start of the previous comm + * field suffixed with its cpu. + */ + +static void +ia64_mca_modify_comm(const task_t *previous_current) +{ + char *p, comm[sizeof(current->comm)]; + if (previous_current->pid) + snprintf(comm, sizeof(comm), "%s %d", + current->comm, previous_current->pid); + else { + int l; + if ((p = strchr(previous_current->comm, ' '))) + l = p - previous_current->comm; + else + l = strlen(previous_current->comm); + snprintf(comm, sizeof(comm), "%s %*s %d", + current->comm, l, previous_current->comm, + task_thread_info(previous_current)->cpu); + } + memcpy(current->comm, comm, sizeof(current->comm)); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -648,7 +714,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, struct ia64_sal_os_state *sos, const char *type) { - char *p, comm[sizeof(current->comm)]; + char *p; ia64_va va; extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ const pal_min_state_area_t *ms = sos->pal_min_state; @@ -721,54 +787,43 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, /* Verify the previous stack state before we change it */ if (user_mode(regs)) { msg = "occurred in user space"; - goto no_mod; - } - if (r13 != sos->prev_IA64_KR_CURRENT) { - msg = "inconsistent previous current and r13"; - goto no_mod; - } - if ((r12 - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent r12 and r13"; - goto no_mod; - } - if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent ar.bspstore and r13"; - goto no_mod; - } - va.p = old_bspstore; - if (va.f.reg < 5) { - msg = "old_bspstore is in the wrong region"; - goto no_mod; - } - if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent ar.bsp and r13"; - goto no_mod; - } - size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; - if (ar_bspstore + size > r12) { - msg = "no room for blocked state"; + /* previous_current is guaranteed to be valid when the task was + * in user space, so ... + */ + ia64_mca_modify_comm(previous_current); goto no_mod; } - /* Change the comm field on the MCA/INT task to include the pid that - * was interrupted, it makes for easier debugging. If that pid was 0 - * (swapper or nested MCA/INIT) then use the start of the previous comm - * field suffixed with its cpu. - */ - if (previous_current->pid) - snprintf(comm, sizeof(comm), "%s %d", - current->comm, previous_current->pid); - else { - int l; - if ((p = strchr(previous_current->comm, ' '))) - l = p - previous_current->comm; - else - l = strlen(previous_current->comm); - snprintf(comm, sizeof(comm), "%s %*s %d", - current->comm, l, previous_current->comm, - task_thread_info(previous_current)->cpu); + if (!mca_recover_range(ms->pmsa_iip)) { + if (r13 != sos->prev_IA64_KR_CURRENT) { + msg = "inconsistent previous current and r13"; + goto no_mod; + } + if ((r12 - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent r12 and r13"; + goto no_mod; + } + if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bspstore and r13"; + goto no_mod; + } + va.p = old_bspstore; + if (va.f.reg < 5) { + msg = "old_bspstore is in the wrong region"; + goto no_mod; + } + if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bsp and r13"; + goto no_mod; + } + size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; + if (ar_bspstore + size > r12) { + msg = "no room for blocked state"; + goto no_mod; + } } - memcpy(current->comm, comm, sizeof(current->comm)); + + ia64_mca_modify_comm(previous_current); /* Make the original task look blocked. First stack a struct pt_regs, * describing the state at the time of interrupt. mca_asm.S built a @@ -908,7 +963,7 @@ no_mod: static void ia64_wait_for_slaves(int monarch) { - int c, wait = 0; + int c, wait = 0, missing = 0; for_each_online_cpu(c) { if (c == monarch) continue; @@ -919,15 +974,32 @@ ia64_wait_for_slaves(int monarch) } } if (!wait) - return; + goto all_in; for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + missing = 1; break; } } + if (!missing) + goto all_in; + printk(KERN_INFO "OS MCA slave did not rendezvous on cpu"); + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + printk(" %d", c); + } + printk("\n"); + return; + +all_in: + printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n"); + return; } /* @@ -953,6 +1025,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, task_t *previous_current; oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ + console_loglevel = 15; /* make sure printks make it to console */ + printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", + sos->proc_state_param, cpu, sos->monarch); + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) @@ -1416,7 +1492,7 @@ static struct irqaction mca_cpep_irqaction = { * format most of the fields. */ -static void +static void __cpuinit format_mca_init_stack(void *mca_data, unsigned long offset, const char *type, int cpu) { @@ -1430,7 +1506,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->cpu = cpu; p->thread_info = ti; p->state = TASK_UNINTERRUPTIBLE; - __set_bit(cpu, &p->cpus_allowed); + cpu_set(cpu, p->cpus_allowed); INIT_LIST_HEAD(&p->tasks); p->parent = p->real_parent = p->group_leader = p; INIT_LIST_HEAD(&p->children); @@ -1440,15 +1516,17 @@ format_mca_init_stack(void *mca_data, unsigned long offset, /* Do per-CPU MCA-related initialization. */ -void __devinit +void __cpuinit ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; + static int first_time = 1; - if (smp_processor_id() == 0) { + if (first_time) { void *mca_data; int cpu; + first_time = 0; mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + KERNEL_STACK_SIZE); mca_data = (void *)(((unsigned long)mca_data + @@ -1704,6 +1782,7 @@ ia64_mca_late_init(void) desc = irq_descp(irq); desc->status |= IRQ_PER_CPU; setup_irq(irq, &mca_cpe_irqaction); + ia64_cpe_irq = irq; } ia64_mca_register_cpev(cpe_vector); IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index e883d85906d..37c88eb5587 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -6,6 +6,7 @@ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) * Copyright (C) 2005 Silicon Graphics, Inc * Copyright (C) 2005 Keith Owens <kaos@sgi.com> + * Copyright (C) 2006 Russ Anderson <rja@sgi.com> */ #include <linux/config.h> #include <linux/types.h> @@ -121,11 +122,12 @@ mca_page_isolate(unsigned long paddr) */ void -mca_handler_bh(unsigned long paddr) +mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { - printk(KERN_ERR - "OS_MCA: process [pid: %d](%s) encounters MCA (paddr=%lx)\n", - current->pid, current->comm, paddr); + printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " + "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", + raw_smp_processor_id(), current->pid, current->uid, + iip, ipsr, paddr, current->comm); spin_lock(&mca_bh_lock); switch (mca_page_isolate(paddr)) { @@ -442,21 +444,26 @@ recover_from_read_error(slidx_table_t *slidx, if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) return 0; psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); + psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); /* * Check the privilege level of interrupted context. * If it is user-mode, then terminate affected process. */ - if (psr1->cpl != 0) { + + pmsa = sos->pal_min_state; + if (psr1->cpl != 0 || + ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { smei = peidx_bus_check(peidx, 0); if (smei->valid.target_identifier) { /* * setup for resume to bottom half of MCA, * "mca_handler_bhhook" */ - pmsa = sos->pal_min_state; - /* pass to bhhook as 1st argument (gr8) */ + /* pass to bhhook as argument (gr8, ...) */ pmsa->pmsa_gr[8-1] = smei->target_identifier; + pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; + pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; /* set interrupted return address (but no use) */ pmsa->pmsa_br0 = pmsa->pmsa_iip; /* change resume address to bottom half */ @@ -466,6 +473,7 @@ recover_from_read_error(slidx_table_t *slidx, psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; psr2->cpl = 0; psr2->ri = 0; + psr2->bn = 1; psr2->i = 0; return 1; diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index e2f6fa1e0ef..31a2e52bb16 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h @@ -111,3 +111,10 @@ typedef struct slidx_table { slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ __count; }) +struct mca_table_entry { + int start_addr; /* location-relative starting address of MCA recoverable range */ + int end_addr; /* location-relative ending address of MCA recoverable range */ +}; + +extern const struct mca_table_entry *search_mca_tables (unsigned long addr); +extern int mca_recover_range(unsigned long); diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S index 3f298ee4d00..e6a580d354b 100644 --- a/arch/ia64/kernel/mca_drv_asm.S +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -14,15 +14,12 @@ GLOBAL_ENTRY(mca_handler_bhhook) invala // clear RSE ? - ;; cover ;; clrrrb ;; - alloc r16=ar.pfs,0,2,1,0 // make a new frame - ;; + alloc r16=ar.pfs,0,2,3,0 // make a new frame mov ar.rsc=0 - ;; mov r13=IA64_KR(CURRENT) // current task pointer ;; mov r2=r13 @@ -30,7 +27,6 @@ GLOBAL_ENTRY(mca_handler_bhhook) addl r22=IA64_RBS_OFFSET,r2 ;; mov ar.bspstore=r22 - ;; addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 ;; adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 @@ -40,12 +36,12 @@ GLOBAL_ENTRY(mca_handler_bhhook) movl loc1=mca_handler_bh // recovery C function ;; mov out0=r8 // poisoned address + mov out1=r9 // iip + mov out2=r10 // psr mov b6=loc1 ;; mov loc1=rp - ;; - ssm psr.i - ;; + ssm psr.i | psr.ic br.call.sptk.many rp=b6 // does not return ... ;; mov ar.pfs=loc0 @@ -53,5 +49,4 @@ GLOBAL_ENTRY(mca_handler_bhhook) ;; mov r8=r0 br.ret.sptk.many rp - ;; END(mca_handler_bhhook) diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index a68ce667809..0766493d4d0 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -25,7 +25,7 @@ #include <asm/processor.h> #include <asm/smp.h> -u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 89faa603c6b..6386f63c413 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -240,7 +240,7 @@ cache_info(char *page) } p += sprintf(p, "%s Cache level %lu:\n" - "\tSize : %lu bytes\n" + "\tSize : %u bytes\n" "\tAttributes : ", cache_types[j+cci.pcci_unified], i+1, cci.pcci_cache_size); @@ -648,9 +648,9 @@ frequency_info(char *page) if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; p += sprintf(p, - "Processor/Clock ratio : %ld/%ld\n" - "Bus/Clock ratio : %ld/%ld\n" - "ITC/Clock ratio : %ld/%ld\n", + "Processor/Clock ratio : %d/%d\n" + "Bus/Clock ratio : %d/%d\n" + "ITC/Clock ratio : %d/%d\n", proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); return p - page; diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index 6a4ac7d70b3..bc11bb096f5 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -115,7 +115,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end) ia64_srlz_i(); } -void +void __init ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) { static int first_time = 1; @@ -149,7 +149,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) ia64_srlz_i(); } -static void +static void __init patch_fsyscall_table (unsigned long start, unsigned long end) { extern unsigned long fsyscall_table[NR_syscalls]; @@ -166,7 +166,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end) ia64_srlz_i(); } -static void +static void __init patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) { extern char fsys_bubble_down[]; @@ -184,7 +184,7 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) ia64_srlz_i(); } -void +void __init ia64_patch_gate (void) { # define START(name) ((unsigned long) __start_gate_##name##_patchlist) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9c5194b385d..077f21216b6 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6722,6 +6722,7 @@ __initcall(pfm_init); void pfm_init_percpu (void) { + static int first_time=1; /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6734,8 +6735,10 @@ pfm_init_percpu (void) */ pfm_unfreeze_pmu(); - if (smp_processor_id() == 0) + if (first_time) { register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 309d59658e5..355d57970ba 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -30,7 +30,6 @@ #include <linux/efi.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/kprobes.h> #include <asm/cpu.h> #include <asm/delay.h> @@ -738,13 +737,6 @@ void exit_thread (void) { - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(current); - ia64_drop_fpu(current); #ifdef CONFIG_PERFMON /* if needed, stop monitoring and flush state to perfmon context */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index eaed14aac6a..9887c8787e7 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1656,8 +1656,14 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { - if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8); + if (unlikely(current->audit_context)) { + int success = AUDITSC_RESULT(regs.r10); + long result = regs.r8; + + if (success != AUDITSC_SUCCESS) + result = -result; + audit_syscall_exit(current, success, result); + } if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 3258e09278d..e4dfda1eb7d 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -37,11 +37,11 @@ #include <linux/string.h> #include <linux/threads.h> #include <linux/tty.h> +#include <linux/dmi.h> #include <linux/serial.h> #include <linux/serial_core.h> #include <linux/efi.h> #include <linux/initrd.h> -#include <linux/platform.h> #include <linux/pm.h> #include <linux/cpufreq.h> @@ -131,8 +131,8 @@ EXPORT_SYMBOL(ia64_max_iommu_merge_mask); /* * We use a special marker for the end of memory and it uses the extra (+1) slot */ -struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; -int num_rsvd_regions; +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; +int num_rsvd_regions __initdata; /* @@ -141,7 +141,7 @@ int num_rsvd_regions; * caller-specified function is called with the memory ranges that remain after filtering. * This routine does not assume the incoming segments are sorted. */ -int +int __init filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) { unsigned long range_start, range_end, prev_start; @@ -177,7 +177,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) return 0; } -static void +static void __init sort_regions (struct rsvd_region *rsvd_region, int max) { int j; @@ -218,7 +218,7 @@ __initcall(register_memory); * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, * see include/asm-ia64/meminit.h if you need to define more. */ -void +void __init reserve_memory (void) { int n = 0; @@ -270,7 +270,7 @@ reserve_memory (void) * Grab the initrd start and end from the boot parameter struct given us by * the boot loader. */ -void +void __init find_initrd (void) { #ifdef CONFIG_BLK_DEV_INITRD @@ -362,7 +362,7 @@ mark_bsp_online (void) } #ifdef CONFIG_SMP -static void +static void __init check_for_logical_procs (void) { pal_logical_to_physical_t info; @@ -389,6 +389,14 @@ check_for_logical_procs (void) } #endif +static __initdata int nomca; +static __init int setup_nomca(char *s) +{ + nomca = 1; + return 0; +} +early_param("nomca", setup_nomca); + void __init setup_arch (char **cmdline_p) { @@ -402,35 +410,15 @@ setup_arch (char **cmdline_p) efi_init(); io_port_init(); + parse_early_param(); + #ifdef CONFIG_IA64_GENERIC - { - const char *mvec_name = strstr (*cmdline_p, "machvec="); - char str[64]; - - if (mvec_name) { - const char *end; - size_t len; - - mvec_name += 8; - end = strchr (mvec_name, ' '); - if (end) - len = end - mvec_name; - else - len = strlen (mvec_name); - len = min(len, sizeof (str) - 1); - strncpy (str, mvec_name, len); - str[len] = '\0'; - mvec_name = str; - } else - mvec_name = acpi_get_sysname(); - machvec_init(mvec_name); - } + machvec_init(NULL); #endif if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); - parse_early_param(); #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -446,7 +434,7 @@ setup_arch (char **cmdline_p) find_memory(); /* process SAL system table: */ - ia64_sal_init(efi.sal_systab); + ia64_sal_init(__va(efi.sal_systab)); ia64_setup_printk_clock(); @@ -493,7 +481,7 @@ setup_arch (char **cmdline_p) #endif /* enable IA-64 Machine Check Abort Handling unless disabled */ - if (!strstr(saved_command_line, "nomca")) + if (!nomca) ia64_mca_init(); platform_setup(cmdline_p); @@ -623,7 +611,7 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; -void +static void __cpuinit identify_cpu (struct cpuinfo_ia64 *c) { union { @@ -700,7 +688,7 @@ setup_per_cpu_areas (void) * In addition, the minimum of the i-cache stride sizes is calculated for * "flush_icache_range()". */ -static void +static void __cpuinit get_max_cacheline_size (void) { unsigned long line_size, max = 1; @@ -763,10 +751,10 @@ get_max_cacheline_size (void) * cpu_init() initializes state that is per-CPU. This function acts * as a 'CPU state barrier', nothing should get across. */ -void +void __cpuinit cpu_init (void) { - extern void __devinit ia64_mmu_init (void *); + extern void __cpuinit ia64_mmu_init (void *); unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; unsigned int max_ctx; @@ -894,9 +882,16 @@ void sched_cacheflush(void) ia64_sal_cache_flush(3); } -void +void __init check_bugs (void) { ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, (unsigned long) __end___mckinley_e9_bundles); } + +static int __init run_dmi_scan(void) +{ + dmi_scan_machine(); + return 0; +} +core_initcall(run_dmi_scan); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 463f6bb44d0..1d7903ee212 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) } return 0; } - -/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it - * could not be delivered. It is important that the target process is not - * allowed to do any more work in user space. Possible cases for the target - * process: - * - * - It is sleeping and will wake up soon. Store the data in the current task, - * the signal will be sent when the current task returns from the next - * interrupt. - * - * - It is running in user context. Store the data in the current task, the - * signal will be sent when the current task returns from the next interrupt. - * - * - It is running in kernel context on this or another cpu and will return to - * user context. Store the data in the target task, the signal will be sent - * to itself when the target task returns to user space. - * - * - It is running in kernel context on this cpu and will sleep before - * returning to user context. Because this is also the current task, the - * signal will not get delivered and the task could sleep indefinitely. - * Store the data in the idle task for this cpu, the signal will be sent - * after the idle task processes its next interrupt. - * - * To cover all cases, store the data in the target task, the current task and - * the idle task on this cpu. Whatever happens, the signal will be delivered - * to the target task before it can do any useful user space work. Multiple - * deliveries have no unwanted side effects. - * - * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts - * disabled. It must not take any locks nor use kernel structures or services - * that require locks. - */ - -/* To ensure that we get the right pid, check its start time. To avoid extra - * include files in thread_info.h, convert the task start_time to unsigned long, - * giving us a cycle time of > 580 years. - */ -static inline unsigned long -start_time_ul(const struct task_struct *t) -{ - return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; -} - -void -set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) -{ - struct task_struct *t; - unsigned long start_time = 0; - int i; - - for (i = 1; i <= 3; ++i) { - switch (i) { - case 1: - t = find_task_by_pid(pid); - if (t) - start_time = start_time_ul(t); - break; - case 2: - t = current; - break; - default: - t = idle_task(smp_processor_id()); - break; - } - - if (!t) - return; - task_thread_info(t)->sigdelayed.signo = signo; - task_thread_info(t)->sigdelayed.code = code; - task_thread_info(t)->sigdelayed.addr = addr; - task_thread_info(t)->sigdelayed.start_time = start_time; - task_thread_info(t)->sigdelayed.pid = pid; - wmb(); - set_tsk_thread_flag(t, TIF_SIGDELAYED); - } -} - -/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that - * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. - */ - -void -do_sigdelayed(void) -{ - struct siginfo siginfo; - pid_t pid; - struct task_struct *t; - - clear_thread_flag(TIF_SIGDELAYED); - memset(&siginfo, 0, sizeof(siginfo)); - siginfo.si_signo = current_thread_info()->sigdelayed.signo; - siginfo.si_code = current_thread_info()->sigdelayed.code; - siginfo.si_addr = current_thread_info()->sigdelayed.addr; - pid = current_thread_info()->sigdelayed.pid; - t = find_task_by_pid(pid); - if (!t) - return; - if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) - return; - force_sig_info(siginfo.si_signo, &siginfo, t); -} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index b681ef34a86..44e9547878a 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -70,6 +70,12 @@ #endif #ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_PERMIT_BSP_REMOVE +#define bsp_remove_ok 1 +#else +#define bsp_remove_ok 0 +#endif + /* * Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality @@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; /* * ITC synchronization related stuff: */ -#define MASTER 0 +#define MASTER (0) #define SLAVE (SMP_CACHE_BYTES/8) #define NUM_ROUNDS 64 /* magic value */ @@ -151,6 +157,27 @@ char __initdata no_int_routing; unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ +#ifdef CONFIG_FORCE_CPEI_RETARGET +#define CPEI_OVERRIDE_DEFAULT (1) +#else +#define CPEI_OVERRIDE_DEFAULT (0) +#endif + +unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; + +static int __init +cmdl_force_cpei(char *str) +{ + int value=0; + + get_option (&str, &value); + force_cpei_retarget = value; + + return 1; +} + +__setup("force_cpei=", cmdl_force_cpei); + static int __init nointroute (char *str) { @@ -161,6 +188,27 @@ nointroute (char *str) __setup("nointroute", nointroute); +static void fix_b0_for_bsp(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpuid; + static int fix_bsp_b0 = 1; + + cpuid = smp_processor_id(); + + /* + * Cache the b0 value on the first AP that comes up + */ + if (!(fix_bsp_b0 && cpuid)) + return; + + sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; + printk ("Fixed BSP b0 value from CPU %d\n", cpuid); + + fix_bsp_b0 = 0; +#endif +} + void sync_master (void *arg) { @@ -327,8 +375,9 @@ smp_setup_percpu_timer (void) static void __devinit smp_callin (void) { - int cpuid, phys_id; + int cpuid, phys_id, itc_master; extern void ia64_init_itm(void); + extern volatile int time_keeper_id; #ifdef CONFIG_PERFMON extern void pfm_init_percpu(void); @@ -336,6 +385,7 @@ smp_callin (void) cpuid = smp_processor_id(); phys_id = hard_smp_processor_id(); + itc_master = time_keeper_id; if (cpu_online(cpuid)) { printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", @@ -343,6 +393,8 @@ smp_callin (void) BUG(); } + fix_b0_for_bsp(); + lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); @@ -365,8 +417,8 @@ smp_callin (void) * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls * local_bh_enable(), which bugs out if irqs are not enabled... */ - Dprintk("Going to syncup ITC with BP.\n"); - ia64_sync_itc(0); + Dprintk("Going to syncup ITC with ITC Master.\n"); + ia64_sync_itc(itc_master); } /* @@ -572,32 +624,8 @@ void __devinit smp_prepare_boot_cpu(void) per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } -/* - * mt_info[] is a temporary store for all info returned by - * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the - * specific cpu comes. - */ -static struct { - __u32 socket_id; - __u16 core_id; - __u16 thread_id; - __u16 proc_fixed_addr; - __u8 valid; -} mt_info[NR_CPUS] __devinitdata; - #ifdef CONFIG_HOTPLUG_CPU static inline void -remove_from_mtinfo(int cpu) -{ - int i; - - for_each_cpu(i) - if (mt_info[i].valid && mt_info[i].socket_id == - cpu_data(cpu)->socket_id) - mt_info[i].valid = 0; -} - -static inline void clear_cpu_sibling_map(int cpu) { int i; @@ -626,15 +654,50 @@ remove_siblinginfo(int cpu) /* remove it from all sibling map's */ clear_cpu_sibling_map(cpu); +} + +extern void fixup_irqs(void); - /* if this cpu is the last in the core group, remove all its info - * from mt_info structure +int migrate_platform_irqs(unsigned int cpu) +{ + int new_cpei_cpu; + irq_desc_t *desc = NULL; + cpumask_t mask; + int retval = 0; + + /* + * dont permit CPEI target to removed. */ - if (last) - remove_from_mtinfo(cpu); + if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { + printk ("CPU (%d) is CPEI Target\n", cpu); + if (can_cpei_retarget()) { + /* + * Now re-target the CPEI to a different processor + */ + new_cpei_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpei_cpu); + set_cpei_target_cpu(new_cpei_cpu); + desc = irq_descp(ia64_cpe_irq); + /* + * Switch for now, immediatly, we need to do fake intr + * as other interrupts, but need to study CPEI behaviour with + * polling before making changes. + */ + if (desc) { + desc->handler->disable(ia64_cpe_irq); + desc->handler->set_affinity(ia64_cpe_irq, mask); + desc->handler->enable(ia64_cpe_irq); + printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); + } + } + if (!desc) { + printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); + retval = -EBUSY; + } + } + return retval; } -extern void fixup_irqs(void); /* must be called with cpucontrol mutex held */ int __cpu_disable(void) { @@ -643,8 +706,17 @@ int __cpu_disable(void) /* * dont permit boot processor for now */ - if (cpu == 0) - return -EBUSY; + if (cpu == 0 && !bsp_remove_ok) { + printk ("Your platform does not support removal of BSP\n"); + return (-EBUSY); + } + + cpu_clear(cpu, cpu_online_map); + + if (migrate_platform_irqs(cpu)) { + cpu_set(cpu, cpu_online_map); + return (-EBUSY); + } remove_siblinginfo(cpu); cpu_clear(cpu, cpu_online_map); @@ -776,40 +848,6 @@ init_smp_config(void) ia64_sal_strerror(sal_ret)); } -static inline int __devinit -check_for_mtinfo_index(void) -{ - int i; - - for_each_cpu(i) - if (!mt_info[i].valid) - return i; - - return -1; -} - -/* - * Search the mt_info to find out if this socket's cid/tid information is - * cached or not. If the socket exists, fill in the core_id and thread_id - * in cpuinfo - */ -static int __devinit -check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c) -{ - int i; - __u32 sid = c->socket_id; - - for_each_cpu(i) { - if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address - && mt_info[i].socket_id == sid) { - c->core_id = mt_info[i].core_id; - c->thread_id = mt_info[i].thread_id; - return 1; /* not a new socket */ - } - } - return 0; -} - /* * identify_siblings(cpu) gets called from identify_cpu. This populates the * information related to logical execution units in per_cpu_data structure. @@ -819,14 +857,12 @@ identify_siblings(struct cpuinfo_ia64 *c) { s64 status; u16 pltid; - u64 proc_fixed_addr; - int count, i; pal_logical_to_physical_t info; if (smp_num_cpucores == 1 && smp_num_siblings == 1) return; - if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) { + if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) { printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", status); return; @@ -835,47 +871,12 @@ identify_siblings(struct cpuinfo_ia64 *c) printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status); return; } - if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) { - printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status); - return; - } c->socket_id = (pltid << 8) | info.overview_ppid; c->cores_per_socket = info.overview_cpp; c->threads_per_core = info.overview_tpc; - count = c->num_log = info.overview_num_log; - - /* If the thread and core id information is already cached, then - * we will simply update cpu_info and return. Otherwise, we will - * do the PAL calls and cache core and thread id's of all the siblings. - */ - if (check_for_new_socket(proc_fixed_addr, c)) - return; - - for (i = 0; i < count; i++) { - int index; - - if (i && (status = ia64_pal_logical_to_phys(i, &info)) - != PAL_STATUS_SUCCESS) { - printk(KERN_ERR "ia64_pal_logical_to_phys failed" - " with %ld\n", status); - return; - } - if (info.log2_la == proc_fixed_addr) { - c->core_id = info.log1_cid; - c->thread_id = info.log1_tid; - } + c->num_log = info.overview_num_log; - index = check_for_mtinfo_index(); - /* We will not do the mt_info caching optimization in this case. - */ - if (index < 0) - continue; - - mt_info[index].valid = 1; - mt_info[index].socket_id = c->socket_id; - mt_info[index].core_id = info.log1_cid; - mt_info[index].thread_id = info.log1_tid; - mt_info[index].proc_fixed_addr = info.log2_la; - } + c->core_id = info.log1_cid; + c->thread_id = info.log1_tid; } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 307d01e15b2..49958904045 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -32,7 +32,7 @@ extern unsigned long wall_jiffies; -#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ +volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ @@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == TIME_KEEPER_ID) { + if (smp_processor_id() == time_keeper_id) { /* * Here we are in the timer irq handler. We have irqs locally * disabled, but we don't know if the timer_bh is running on @@ -188,7 +188,7 @@ ia64_init_itm (void) itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; - printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, " + printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, " "ITC freq=%lu.%03luMHz", smp_processor_id(), platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); @@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = { .name = "timer" }; +void __devinit ia64_disable_timer(void) +{ + ia64_set_itv(1 << 16); +} + void __init time_init (void) { diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 6e5eea19fa6..b47476d655f 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -9,6 +9,8 @@ * 2002/08/07 Erich Focht <efocht@ess.nec.de> * Populate cpu entries in sysfs for non-numa systems as well * Intel Corporation - Ashok Raj + * 02/27/2006 Zhang, Yanmin + * Populate cpu cache entries in sysfs for cpu cache info */ #include <linux/config.h> @@ -19,6 +21,7 @@ #include <linux/init.h> #include <linux/bootmem.h> #include <linux/nodemask.h> +#include <linux/notifier.h> #include <asm/mmzone.h> #include <asm/numa.h> #include <asm/cpu.h> @@ -36,7 +39,7 @@ int arch_register_cpu(int num) parent = &sysfs_nodes[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ -#ifdef CONFIG_ACPI +#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* * If CPEI cannot be re-targetted, and this is * CPEI target, then dont create the control file @@ -101,3 +104,367 @@ out: } subsys_initcall(topology_init); + + +/* + * Export cpu cache information through sysfs + */ + +/* + * A bunch of string array to get pretty printing + */ +static const char *cache_types[] = { + "", /* not used */ + "Instruction", + "Data", + "Unified" /* unified */ +}; + +static const char *cache_mattrib[]={ + "WriteThrough", + "WriteBack", + "", /* reserved */ + "" /* reserved */ +}; + +struct cache_info { + pal_cache_config_info_t cci; + cpumask_t shared_cpu_map; + int level; + int type; + struct kobject kobj; +}; + +struct cpu_cache_info { + struct cache_info *cache_leaves; + int num_cache_leaves; + struct kobject kobj; +}; + +static struct cpu_cache_info all_cpu_cache_info[NR_CPUS]; +#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y]) + +#ifdef CONFIG_SMP +static void cache_shared_cpu_map_setup( unsigned int cpu, + struct cache_info * this_leaf) +{ + pal_cache_shared_info_t csi; + int num_shared, i = 0; + unsigned int j; + + if (cpu_data(cpu)->threads_per_core <= 1 && + cpu_data(cpu)->cores_per_socket <= 1) { + cpu_set(cpu, this_leaf->shared_cpu_map); + return; + } + + if (ia64_pal_cache_shared_info(this_leaf->level, + this_leaf->type, + 0, + &csi) != PAL_STATUS_SUCCESS) + return; + + num_shared = (int) csi.num_shared; + do { + for_each_cpu(j) + if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id + && cpu_data(j)->core_id == csi.log1_cid + && cpu_data(j)->thread_id == csi.log1_tid) + cpu_set(j, this_leaf->shared_cpu_map); + + i++; + } while (i < num_shared && + ia64_pal_cache_shared_info(this_leaf->level, + this_leaf->type, + i, + &csi) == PAL_STATUS_SUCCESS); +} +#else +static void cache_shared_cpu_map_setup(unsigned int cpu, + struct cache_info * this_leaf) +{ + cpu_set(cpu, this_leaf->shared_cpu_map); + return; +} +#endif + +static ssize_t show_coherency_line_size(struct cache_info *this_leaf, + char *buf) +{ + return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size); +} + +static ssize_t show_ways_of_associativity(struct cache_info *this_leaf, + char *buf) +{ + return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc); +} + +static ssize_t show_attributes(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, + "%s\n", + cache_mattrib[this_leaf->cci.pcci_cache_attr]); +} + +static ssize_t show_size(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024); +} + +static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf) +{ + unsigned number_of_sets = this_leaf->cci.pcci_cache_size; + number_of_sets /= this_leaf->cci.pcci_assoc; + number_of_sets /= 1 << this_leaf->cci.pcci_line_size; + + return sprintf(buf, "%u\n", number_of_sets); +} + +static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) +{ + ssize_t len; + cpumask_t shared_cpu_map; + + cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); + len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map); + len += sprintf(buf+len, "\n"); + return len; +} + +static ssize_t show_type(struct cache_info *this_leaf, char *buf) +{ + int type = this_leaf->type + this_leaf->cci.pcci_unified; + return sprintf(buf, "%s\n", cache_types[type]); +} + +static ssize_t show_level(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, "%u\n", this_leaf->level); +} + +struct cache_attr { + struct attribute attr; + ssize_t (*show)(struct cache_info *, char *); + ssize_t (*store)(struct cache_info *, const char *, size_t count); +}; + +#ifdef define_one_ro + #undef define_one_ro +#endif +#define define_one_ro(_name) \ + static struct cache_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(ways_of_associativity); +define_one_ro(size); +define_one_ro(number_of_sets); +define_one_ro(shared_cpu_map); +define_one_ro(attributes); + +static struct attribute * cache_default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &ways_of_associativity.attr, + &attributes.attr, + &size.attr, + &number_of_sets.attr, + &shared_cpu_map.attr, + NULL +}; + +#define to_object(k) container_of(k, struct cache_info, kobj) +#define to_attr(a) container_of(a, struct cache_attr, attr) + +static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf) +{ + struct cache_attr *fattr = to_attr(attr); + struct cache_info *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->show ? fattr->show(this_leaf, buf) : 0; + return ret; +} + +static struct sysfs_ops cache_sysfs_ops = { + .show = cache_show +}; + +static struct kobj_type cache_ktype = { + .sysfs_ops = &cache_sysfs_ops, + .default_attrs = cache_default_attrs, +}; + +static struct kobj_type cache_ktype_percpu_entry = { + .sysfs_ops = &cache_sysfs_ops, +}; + +static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu) +{ + if (all_cpu_cache_info[cpu].cache_leaves) { + kfree(all_cpu_cache_info[cpu].cache_leaves); + all_cpu_cache_info[cpu].cache_leaves = NULL; + } + all_cpu_cache_info[cpu].num_cache_leaves = 0; + memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); + + return; +} + +static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu) +{ + u64 i, levels, unique_caches; + pal_cache_config_info_t cci; + int j; + s64 status; + struct cache_info *this_cache; + int num_cache_leaves = 0; + + if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { + printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); + return -1; + } + + this_cache=kzalloc(sizeof(struct cache_info)*unique_caches, + GFP_KERNEL); + if (this_cache == NULL) + return -ENOMEM; + + for (i=0; i < levels; i++) { + for (j=2; j >0 ; j--) { + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != + PAL_STATUS_SUCCESS) + continue; + + this_cache[num_cache_leaves].cci = cci; + this_cache[num_cache_leaves].level = i + 1; + this_cache[num_cache_leaves].type = j; + + cache_shared_cpu_map_setup(cpu, + &this_cache[num_cache_leaves]); + num_cache_leaves ++; + } + } + + all_cpu_cache_info[cpu].cache_leaves = this_cache; + all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves; + + memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); + + return 0; +} + +/* Add cache interface for CPU device */ +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i, j; + struct cache_info *this_object; + int retval = 0; + cpumask_t oldmask; + + if (all_cpu_cache_info[cpu].kobj.parent) + return 0; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (unlikely(retval)) + return retval; + + retval = cpu_cache_sysfs_init(cpu); + set_cpus_allowed(current, oldmask); + if (unlikely(retval < 0)) + return retval; + + all_cpu_cache_info[cpu].kobj.parent = &sys_dev->kobj; + kobject_set_name(&all_cpu_cache_info[cpu].kobj, "%s", "cache"); + all_cpu_cache_info[cpu].kobj.ktype = &cache_ktype_percpu_entry; + retval = kobject_register(&all_cpu_cache_info[cpu].kobj); + + for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { + this_object = LEAF_KOBJECT_PTR(cpu,i); + this_object->kobj.parent = &all_cpu_cache_info[cpu].kobj; + kobject_set_name(&(this_object->kobj), "index%1lu", i); + this_object->kobj.ktype = &cache_ktype; + retval = kobject_register(&(this_object->kobj)); + if (unlikely(retval)) { + for (j = 0; j < i; j++) { + kobject_unregister( + &(LEAF_KOBJECT_PTR(cpu,j)->kobj)); + } + kobject_unregister(&all_cpu_cache_info[cpu].kobj); + cpu_cache_sysfs_exit(cpu); + break; + } + } + return retval; +} + +/* Remove cache interface for CPU device */ +static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i; + + for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) + kobject_unregister(&(LEAF_KOBJECT_PTR(cpu,i)->kobj)); + + if (all_cpu_cache_info[cpu].kobj.parent) { + kobject_unregister(&all_cpu_cache_info[cpu].kobj); + memset(&all_cpu_cache_info[cpu].kobj, + 0, + sizeof(struct kobject)); + } + + cpu_cache_sysfs_exit(cpu); + + return 0; +} + +/* + * When a cpu is hot-plugged, do a check and initiate + * cache kobject if necessary + */ +static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block cache_cpu_notifier = +{ + .notifier_call = cache_cpu_callback +}; + +static int __cpuinit cache_sysfs_init(void) +{ + int i; + + for_each_online_cpu(i) { + cache_cpu_callback(&cache_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + register_cpu_notifier(&cache_cpu_notifier); + + return 0; +} + +device_initcall(cache_sysfs_init); + diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index dabd6c32641..7c1ddc8ac44 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -30,19 +30,19 @@ extern spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); -struct notifier_block *ia64die_chain; +ATOMIC_NOTIFIER_HEAD(ia64die_chain); int register_die_notifier(struct notifier_block *nb) { - return notifier_chain_register(&ia64die_chain, nb); + return atomic_notifier_chain_register(&ia64die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); int unregister_die_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&ia64die_chain, nb); + return atomic_notifier_chain_unregister(&ia64die_chain, nb); } EXPORT_SYMBOL_GPL(unregister_die_notifier); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 73af6267d2e..783600fe52b 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -70,34 +70,18 @@ SECTIONS __stop___ex_table = .; } - .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) - { - __start___vtop_patchlist = .; - *(.data.patch.vtop) - __end___vtop_patchlist = .; - } - - .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) + /* MCA table */ + . = ALIGN(16); + __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { - __start___mckinley_e9_bundles = .; - *(.data.patch.mckinley_e9) - __end___mckinley_e9_bundles = .; + __start___mca_table = .; + *(__mca_table) + __stop___mca_table = .; } /* Global data */ _data = .; -#if defined(CONFIG_IA64_GENERIC) - /* Machine Vector */ - . = ALIGN(16); - .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) - { - machvec_start = .; - *(.machvec) - machvec_end = .; - } -#endif - /* Unwind info & table: */ . = ALIGN(8); .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) @@ -154,6 +138,32 @@ SECTIONS *(.initcall7.init) __initcall_end = .; } + + .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) + { + __start___vtop_patchlist = .; + *(.data.patch.vtop) + __end___vtop_patchlist = .; + } + + .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) + { + __start___mckinley_e9_bundles = .; + *(.data.patch.mckinley_e9) + __end___mckinley_e9_bundles = .; + } + +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) + { + machvec_start = .; + *(.machvec) + machvec_end = .; + } +#endif + __con_initcall_start = .; .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { *(.con_initcall.init) } diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index ac64664a180..d8536a2c22a 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -6,7 +6,7 @@ obj-y := io.o lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ - bitop.o checksum.o clear_page.o csum_partial_copy.o \ + checksum.o clear_page.o csum_partial_copy.o \ clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ flush.o ip_fast_csum.o do_csum.o \ memset.o strlen.o diff --git a/arch/ia64/lib/bitop.c b/arch/ia64/lib/bitop.c deleted file mode 100644 index 82e299c8464..00000000000 --- a/arch/ia64/lib/bitop.c +++ /dev/null @@ -1,88 +0,0 @@ -#include <linux/compiler.h> -#include <linux/types.h> -#include <asm/intrinsics.h> -#include <linux/module.h> -#include <linux/bitops.h> - -/* - * Find next zero bit in a bitmap reasonably efficiently.. - */ - -int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset) -{ - unsigned long *p = ((unsigned long *) addr) + (offset >> 6); - unsigned long result = offset & ~63UL; - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 63UL; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (64-offset); - if (size < 64) - goto found_first; - if (~tmp) - goto found_middle; - size -= 64; - result += 64; - } - while (size & ~63UL) { - if (~(tmp = *(p++))) - goto found_middle; - result += 64; - size -= 64; - } - if (!size) - return result; - tmp = *p; -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* any bits zero? */ - return result + size; /* nope */ -found_middle: - return result + ffz(tmp); -} -EXPORT_SYMBOL(__find_next_zero_bit); - -/* - * Find next bit in a bitmap reasonably efficiently.. - */ -int __find_next_bit(const void *addr, unsigned long size, unsigned long offset) -{ - unsigned long *p = ((unsigned long *) addr) + (offset >> 6); - unsigned long result = offset & ~63UL; - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 63UL; - if (offset) { - tmp = *(p++); - tmp &= ~0UL << offset; - if (size < 64) - goto found_first; - if (tmp) - goto found_middle; - size -= 64; - result += 64; - } - while (size & ~63UL) { - if ((tmp = *(p++))) - goto found_middle; - result += 64; - size -= 64; - } - if (!size) - return result; - tmp = *p; - found_first: - tmp &= ~0UL >> (64-size); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ - found_middle: - return result + __ffs(tmp); -} -EXPORT_SYMBOL(__find_next_bit); diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile index d78d20f0a0f..bb0a01a8187 100644 --- a/arch/ia64/mm/Makefile +++ b/arch/ia64/mm/Makefile @@ -2,7 +2,7 @@ # Makefile for the ia64-specific parts of the memory manager. # -obj-y := init.o fault.o tlb.o extable.o +obj-y := init.o fault.o tlb.o extable.o ioremap.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NUMA) += numa.o diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index acaaec4e468..84fd1c14c8a 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -97,7 +97,7 @@ find_max_pfn (unsigned long start, unsigned long end, void *arg) * Find a place to put the bootmap and return its starting address in * bootmap_start. This address must be page-aligned. */ -int +static int __init find_bootmap_location (unsigned long start, unsigned long end, void *arg) { unsigned long needed = *(unsigned long *)arg; @@ -141,7 +141,7 @@ find_bootmap_location (unsigned long start, unsigned long end, void *arg) * Walk the EFI memory map and find usable memory for the system, taking * into account reserved areas. */ -void +void __init find_memory (void) { unsigned long bootmap_size; @@ -176,18 +176,20 @@ find_memory (void) * * Allocate and setup per-cpu data areas. */ -void * +void * __cpuinit per_cpu_init (void) { void *cpu_data; int cpu; + static int first_time=1; /* * get_free_pages() cannot be used before cpu_init() done. BSP * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls * get_zeroed_page(). */ - if (smp_processor_id() == 0) { + if (first_time) { + first_time=0; cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); for (cpu = 0; cpu < NR_CPUS; cpu++) { @@ -226,7 +228,7 @@ count_dma_pages (u64 start, u64 end, void *arg) * Set up the page tables. */ -void +void __init paging_init (void) { unsigned long max_dma; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c87d6d1d581..ec9eeb89975 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -379,31 +379,6 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) } /** - * pgdat_insert - insert the pgdat into global pgdat_list - * @pgdat: the pgdat for a node. - */ -static void __init pgdat_insert(pg_data_t *pgdat) -{ - pg_data_t *prev = NULL, *next; - - for_each_pgdat(next) - if (pgdat->node_id < next->node_id) - break; - else - prev = next; - - if (prev) { - prev->pgdat_next = pgdat; - pgdat->pgdat_next = next; - } else { - pgdat->pgdat_next = pgdat_list; - pgdat_list = pgdat; - } - - return; -} - -/** * memory_less_nodes - allocate and initialize CPU only nodes pernode * information. */ @@ -525,15 +500,20 @@ void __init find_memory(void) * find_pernode_space() does most of this already, we just need to set * local_per_cpu_offset */ -void *per_cpu_init(void) +void __cpuinit *per_cpu_init(void) { int cpu; + static int first_time = 1; + if (smp_processor_id() != 0) return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; - for (cpu = 0; cpu < NR_CPUS; cpu++) - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + if (first_time) { + first_time = 0; + for (cpu = 0; cpu < NR_CPUS; cpu++) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } @@ -555,7 +535,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; @@ -740,11 +720,5 @@ void __init paging_init(void) pfn_offset, zholes_size); } - /* - * Make memory less nodes become a member of the known nodes. - */ - for_each_node_mask(node, memory_less_mask) - pgdat_insert(mem_data[node].pgdat); - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 2d13889d0a9..8d506710fdb 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } /* - * This function checks for proper alignment of input addr and len parameters. + * Don't actually need to do any preparation, but need to make sure + * the address is in the right region. */ -int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +int prepare_hugepage_range(unsigned long addr, unsigned long len) { if (len & ~HPAGE_MASK) return -EINVAL; @@ -112,8 +113,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long floor, unsigned long ceiling) { /* - * This is called only when is_hugepage_only_range(addr,), - * and it follows that is_hugepage_only_range(end,) also. + * This is called to free hugetlb page tables. * * The offset of these addresses from the base of the hugetlb * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that @@ -125,9 +125,9 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, addr = htlbpage_to_page(addr); end = htlbpage_to_page(end); - if (is_hugepage_only_range(tlb->mm, floor, HPAGE_SIZE)) + if (REGION_NUMBER(floor) == RGN_HPAGE) floor = htlbpage_to_page(floor); - if (is_hugepage_only_range(tlb->mm, ceiling, HPAGE_SIZE)) + if (REGION_NUMBER(ceiling) == RGN_HPAGE) ceiling = htlbpage_to_page(ceiling); free_pgd_range(tlb, addr, end, floor, ceiling); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b38b6d213c1..cafa8776a53 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -109,6 +109,7 @@ lazy_mmu_prot_update (pte_t pte) { unsigned long addr; struct page *page; + unsigned long order; if (!pte_exec(pte)) return; /* not an executable page... */ @@ -119,7 +120,12 @@ lazy_mmu_prot_update (pte_t pte) if (test_bit(PG_arch_1, &page->flags)) return; /* i-cache is already coherent with d-cache */ - flush_icache_range(addr, addr + PAGE_SIZE); + if (PageCompound(page)) { + order = (unsigned long) (page[1].lru.prev); + flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT)); + } + else + flush_icache_range(addr, addr + PAGE_SIZE); set_bit(PG_arch_1, &page->flags); /* mark page as clean */ } @@ -197,7 +203,7 @@ free_initmem (void) eaddr = (unsigned long) ia64_imva(__init_end); while (addr < eaddr) { ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); + init_page_count(virt_to_page(addr)); free_page(addr); ++totalram_pages; addr += PAGE_SIZE; @@ -206,7 +212,7 @@ free_initmem (void) (__init_end - __init_begin) >> 10); } -void +void __init free_initrd_mem (unsigned long start, unsigned long end) { struct page *page; @@ -252,7 +258,7 @@ free_initrd_mem (unsigned long start, unsigned long end) continue; page = virt_to_page(start); ClearPageReserved(page); - set_page_count(page, 1); + init_page_count(page); free_page(start); ++totalram_pages; } @@ -261,7 +267,7 @@ free_initrd_mem (unsigned long start, unsigned long end) /* * This installs a clean page in the kernel's page table. */ -struct page * +static struct page * __init put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) { pgd_t *pgd; @@ -294,7 +300,7 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) return page; } -static void +static void __init setup_gate (void) { struct page *page; @@ -411,7 +417,7 @@ ia64_mmu_init (void *my_cpu_data) #ifdef CONFIG_VIRTUAL_MEM_MAP -int +int __init create_mem_map_page_table (u64 start, u64 end, void *arg) { unsigned long address, start_page, end_page; @@ -519,7 +525,7 @@ ia64_pfn_valid (unsigned long pfn) } EXPORT_SYMBOL(ia64_pfn_valid); -int +int __init find_largest_hole (u64 start, u64 end, void *arg) { u64 *max_gap = arg; @@ -535,7 +541,7 @@ find_largest_hole (u64 start, u64 end, void *arg) } #endif /* CONFIG_VIRTUAL_MEM_MAP */ -static int +static int __init count_reserved_pages (u64 start, u64 end, void *arg) { unsigned long num_reserved = 0; @@ -556,7 +562,7 @@ count_reserved_pages (u64 start, u64 end, void *arg) * purposes. */ -static int nolwsys; +static int nolwsys __initdata; static int __init nolwsys_setup (char *s) @@ -567,7 +573,7 @@ nolwsys_setup (char *s) __setup("nolwsys", nolwsys_setup); -void +void __init mem_init (void) { long reserved_pages, codesize, datasize, initsize; @@ -600,7 +606,7 @@ mem_init (void) kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); kclist_add(&kcore_kernel, _stext, _end - _stext); - for_each_pgdat(pgdat) + for_each_online_pgdat(pgdat) if (pgdat->bdata->node_bootmem_map) totalram_pages += free_all_bootmem_node(pgdat); @@ -640,7 +646,7 @@ mem_init (void) void online_page(struct page *page) { ClearPageReserved(page); - set_page_count(page, 1); + init_page_count(page); __free_page(page); totalram_pages++; num_physpages++; diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c new file mode 100644 index 00000000000..643ccc6960c --- /dev/null +++ b/arch/ia64/mm/ioremap.c @@ -0,0 +1,43 @@ +/* + * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas <bjorn.helgaas@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/compiler.h> +#include <linux/module.h> +#include <linux/efi.h> +#include <asm/io.h> + +static inline void __iomem * +__ioremap (unsigned long offset, unsigned long size) +{ + return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset); +} + +void __iomem * +ioremap (unsigned long offset, unsigned long size) +{ + if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB)) + return phys_to_virt(offset); + + if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC)) + return __ioremap(offset, size); + + /* + * Someday this should check ACPI resources so we + * can do the right thing for hot-plugged regions. + */ + return __ioremap(offset, size); +} +EXPORT_SYMBOL(ioremap); + +void __iomem * +ioremap_nocache (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size); +} +EXPORT_SYMBOL(ioremap_nocache); diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 6a4eec9113e..4dbbca0b5e9 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -156,17 +156,19 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, nbits = purge.max_bits; start &= ~((1UL << nbits) - 1); -# ifdef CONFIG_SMP - platform_global_tlb_purge(mm, start, end, nbits); -# else preempt_disable(); +#ifdef CONFIG_SMP + if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) { + platform_global_tlb_purge(mm, start, end, nbits); + preempt_enable(); + return; + } +#endif do { ia64_ptcl(start, (nbits<<2)); start += (1UL << nbits); } while (start < end); preempt_enable(); -# endif - ia64_srlz_i(); /* srlz.i implies srlz.d */ } EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 3e9b4eea741..ab9c48c8801 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -10,7 +10,8 @@ CPPFLAGS += -I$(srctree)/arch/ia64/sn/include obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ - huberror.o io_init.o iomv.o klconflib.o sn2/ + huberror.o io_init.o iomv.o klconflib.o pio_phys.o \ + sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o obj-$(CONFIG_SGI_TIOCX) += tiocx.o obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 1f11db470d9..e952ef4f6d9 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -36,7 +36,7 @@ static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) nodepda_t *tmp_nodepda; if (nasid_to_cnodeid(nasid) == -1) - return (struct bteinfo_s *)NULL;; + return (struct bteinfo_s *)NULL; tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); return &tmp_nodepda->bte_if[interface]; diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index dfb3f290237..5101ac46264 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -13,6 +13,8 @@ #include <asm/sn/sn_feature_sets.h> #include <asm/sn/geo.h> #include <asm/sn/io.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> #include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> @@ -710,9 +712,36 @@ cnodeid_get_geoid(cnodeid_t cnode) return hubdev->hdi_geoid; } +void sn_generate_path(struct pci_bus *pci_bus, char *address) +{ + nasid_t nasid; + cnodeid_t cnode; + geoid_t geoid; + moduleid_t moduleid; + u16 bricktype; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnode); + moduleid = geo_module(geoid); + + sprintf(address, "module_%c%c%c%c%.2d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), + MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); + + /* Tollhouse requires slot id to be displayed */ + bricktype = MODULE_GET_BTYPE(moduleid); + if ((bricktype == L1_BRICKTYPE_191010) || + (bricktype == L1_BRICKTYPE_1932)) + sprintf(address, "%s^%d", address, geo_slot(geoid)); +} + subsys_initcall(sn_pci_init); EXPORT_SYMBOL(sn_pci_fixup_slot); EXPORT_SYMBOL(sn_pci_unfixup_slot); EXPORT_SYMBOL(sn_pci_controller_fixup); EXPORT_SYMBOL(sn_bus_store_sysdata); EXPORT_SYMBOL(sn_bus_free_sysdata); +EXPORT_SYMBOL(sn_generate_path); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index c373113d073..c265e02f503 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -350,9 +350,6 @@ static void force_interrupt(int irq) static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) { u64 regval; - int irr_reg_num; - int irr_bit; - u64 irr_reg; struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; @@ -373,23 +370,7 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) pdi_pcibus_info; regval = pcireg_intr_status_get(pcibus_info); - irr_reg_num = irq_to_vector(irq) / 64; - irr_bit = irq_to_vector(irq) % 64; - switch (irr_reg_num) { - case 0: - irr_reg = ia64_getreg(_IA64_REG_CR_IRR0); - break; - case 1: - irr_reg = ia64_getreg(_IA64_REG_CR_IRR1); - break; - case 2: - irr_reg = ia64_getreg(_IA64_REG_CR_IRR2); - break; - case 3: - irr_reg = ia64_getreg(_IA64_REG_CR_IRR3); - break; - } - if (!test_bit(irr_bit, &irr_reg)) { + if (!ia64_get_irr(irq_to_vector(irq))) { if (!test_bit(irq, pda->sn_in_service_ivecs)) { regval &= 0xff; if (sn_irq_info->irq_int_bit & regval & diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 00000000000..3c7d48d6ecb --- /dev/null +++ b/arch/ia64/sn/kernel/pio_phys.S @@ -0,0 +1,71 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + * + * This file contains macros used to access MMR registers via + * uncached physical addresses. + * pio_phys_read_mmr - read an MMR + * pio_phys_write_mmr - write an MMR + * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 + * Second MMR will be skipped if address is NULL + * + * Addresses passed to these routines should be uncached physical addresses + * ie., 0x80000.... + */ + + + +#include <asm/asmmacro.h> +#include <asm/page.h> + +GLOBAL_ENTRY(pio_phys_read_mmr) + .prologue + .regstk 1,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + ld8.acq r8=[r32] + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_read_mmr) + +GLOBAL_ENTRY(pio_phys_write_mmr) + .prologue + .regstk 2,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + st8.rel [r32]=r33 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_write_mmr) + +GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) + .prologue + .regstk 4,0,0,0 + .body + mov r2=psr + cmp.ne p9,p0=r34,r0; + rsm psr.i | psr.dt | psr.ic + ;; + srlz.d + st8.rel [r32]=r33 +(p9) st8.rel [r34]=r35 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_atomic_phys_write_mmrs) + + diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 5b84836c217..30988dfbddf 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. */ #include <linux/config.h> @@ -327,10 +327,11 @@ sn_scan_pcdp(void) struct pcdp_interface_pci if_pci; extern struct efi efi; - pcdp = efi.hcdp; - if (! pcdp) + if (efi.hcdp == EFI_INVALID_TABLE_ADDR) return; /* no hcdp/pcdp table */ + pcdp = __va(efi.hcdp); + if (pcdp->rev < 3) return; /* only support PCDP (rev >= 3) */ @@ -498,6 +499,7 @@ void __init sn_setup(char **cmdline_p) * for sn. */ pm_power_off = ia64_sn_power_down; + current->thread.flags |= IA64_THREAD_MIGRATION; } /** @@ -660,7 +662,8 @@ void __init sn_cpu_init(void) SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; u64 *pio; pio = is_shub1() ? pio1 : pio2; - pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); + pda->pio_write_status_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; } diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index b2e1e746b47..d9d306c79f2 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void) return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; } +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from seperate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} + void sn_tlb_migrate_finish(struct mm_struct *mm) { /* flush_tlb_mm is inefficient if more than 1 users of mm */ diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 70db21f3df2..d917afa30b2 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -110,7 +110,11 @@ static int sn_hwperf_geoid_to_cnode(char *location) if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) return -1; - for_each_node(cnode) { + /* + * FIXME: replace with cleaner for_each_XXX macro which addresses + * both compute and IO nodes once ACPI3.0 is available. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { geoid = cnodeid_get_geoid(cnode); module_id = geo_module(geoid); this_rack = MODULE_GET_RACK(module_id); @@ -605,7 +609,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; if (cpu != SN_HWPERF_ARG_ANY_CPU) { - if (cpu >= num_online_cpus() || !cpu_online(cpu)) { + if (cpu >= NR_CPUS || !cpu_online(cpu)) { r = -EINVAL; goto out; } diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index c686d9c12f7..5100261310f 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -93,19 +93,22 @@ static int coherence_id_open(struct inode *inode, struct file *file) static struct proc_dir_entry *sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent, int (*openfunc)(struct inode *, struct file *), - int (*releasefunc)(struct inode *, struct file *)) + int (*releasefunc)(struct inode *, struct file *), + ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *)) { struct proc_dir_entry *e = create_proc_entry(name, 0444, parent); if (e) { - e->proc_fops = (struct file_operations *)kmalloc( - sizeof(struct file_operations), GFP_KERNEL); - if (e->proc_fops) { - memset(e->proc_fops, 0, sizeof(struct file_operations)); - e->proc_fops->open = openfunc; - e->proc_fops->read = seq_read; - e->proc_fops->llseek = seq_lseek; - e->proc_fops->release = releasefunc; + struct file_operations *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (f) { + f->open = openfunc; + f->read = seq_read; + f->llseek = seq_lseek; + f->release = releasefunc; + f->write = write; + e->proc_fops = f; } } @@ -119,31 +122,29 @@ extern int sn_topology_release(struct inode *, struct file *); void register_sn_procfs(void) { static struct proc_dir_entry *sgi_proc_dir = NULL; - struct proc_dir_entry *e; BUG_ON(sgi_proc_dir != NULL); if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) return; sn_procfs_create_entry("partition_id", sgi_proc_dir, - partition_id_open, single_release); + partition_id_open, single_release, NULL); sn_procfs_create_entry("system_serial_number", sgi_proc_dir, - system_serial_number_open, single_release); + system_serial_number_open, single_release, NULL); sn_procfs_create_entry("licenseID", sgi_proc_dir, - licenseID_open, single_release); + licenseID_open, single_release, NULL); - e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, - sn_force_interrupt_open, single_release); - if (e) - e->proc_fops->write = sn_force_interrupt_write_proc; + sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, + sn_force_interrupt_open, single_release, + sn_force_interrupt_write_proc); sn_procfs_create_entry("coherence_id", sgi_proc_dir, - coherence_id_open, single_release); + coherence_id_open, single_release, NULL); sn_procfs_create_entry("sn_topology", sgi_proc_dir, - sn_topology_open, sn_topology_release); + sn_topology_open, sn_topology_release, NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 99cb28e7429..feaf1a6e810 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -369,9 +369,15 @@ static void tio_corelet_reset(nasid_t nasid, int corelet) static int is_fpga_tio(int nasid, int *bt) { - int ioboard_type; + u16 ioboard_type; + s64 rc; - ioboard_type = ia64_sn_sysctl_ioboard_get(nasid); + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } switch (ioboard_type) { case L1_BRICKTYPE_SA: diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index cdf6856ce08..d0abddd9ffe 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -21,7 +21,6 @@ #include <linux/sched.h> #include <linux/cache.h> #include <linux/interrupt.h> -#include <linux/slab.h> #include <linux/mutex.h> #include <linux/completion.h> #include <asm/sn/bte.h> @@ -30,6 +29,31 @@ /* + * Guarantee that the kzalloc'd memory is cacheline aligned. + */ +static void * +xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kzalloc will give us cachline aligned memory by default */ + *base = kzalloc(size, flags); + if (*base == NULL) { + return NULL; + } + if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { + return *base; + } + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kzalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) { + return NULL; + } + return (void *) L1_CACHE_ALIGN((u64) *base); +} + + +/* * Set up the initial values for the XPartition Communication channels. */ static void @@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part) * Allocate all of the channel structures as a contiguous chunk of * memory. */ - part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, + part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, GFP_KERNEL); if (part->channels == NULL) { dev_err(xpc_chan, "can't get memory for channels\n"); return xpcNoMemory; } - memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS); part->nchannels = XPC_NCHANNELS; /* allocate all the required GET/PUT values */ - part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, + part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, &part->local_GPs_base); if (part->local_GPs == NULL) { kfree(part->channels); @@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part) "values\n"); return xpcNoMemory; } - memset(part->local_GPs, 0, XPC_GP_SIZE); - part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, + part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, &part->remote_GPs_base); if (part->remote_GPs == NULL) { - kfree(part->channels); - part->channels = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; dev_err(xpc_chan, "can't get memory for remote get/put " "values\n"); + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->channels); + part->channels = NULL; return xpcNoMemory; } - memset(part->remote_GPs, 0, XPC_GP_SIZE); /* allocate all the required open and close args */ - part->local_openclose_args = xpc_kmalloc_cacheline_aligned( + part->local_openclose_args = xpc_kzalloc_cacheline_aligned( XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, &part->local_openclose_args_base); if (part->local_openclose_args == NULL) { - kfree(part->channels); - part->channels = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; + dev_err(xpc_chan, "can't get memory for local connect args\n"); kfree(part->remote_GPs_base); part->remote_GPs = NULL; - dev_err(xpc_chan, "can't get memory for local connect args\n"); + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->channels); + part->channels = NULL; return xpcNoMemory; } - memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); - part->remote_openclose_args = xpc_kmalloc_cacheline_aligned( + part->remote_openclose_args = xpc_kzalloc_cacheline_aligned( XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, &part->remote_openclose_args_base); if (part->remote_openclose_args == NULL) { - kfree(part->channels); - part->channels = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; + dev_err(xpc_chan, "can't get memory for remote connect args\n"); kfree(part->local_openclose_args_base); part->local_openclose_args = NULL; - dev_err(xpc_chan, "can't get memory for remote connect args\n"); + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->channels); + part->channels = NULL; return xpcNoMemory; } - memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); xpc_initialize_channels(part, partid); @@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part) ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, part->IPI_owner, (void *) (u64) partid); if (ret != 0) { - kfree(part->channels); - part->channels = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " "errno=%d\n", -ret); + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->channels); + part->channels = NULL; return xpcLackOfResources; } @@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch) for (nentries = ch->local_nentries; nentries > 0; nentries--) { nbytes = nentries * ch->msg_size; - ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, + ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch->local_msgqueue_base); if (ch->local_msgqueue == NULL) { continue; } - memset(ch->local_msgqueue, 0, nbytes); nbytes = nentries * sizeof(struct xpc_notify); - ch->notify_queue = kmalloc(nbytes, GFP_KERNEL); + ch->notify_queue = kzalloc(nbytes, GFP_KERNEL); if (ch->notify_queue == NULL) { kfree(ch->local_msgqueue_base); ch->local_msgqueue = NULL; continue; } - memset(ch->notify_queue, 0, nbytes); spin_lock_irqsave(&ch->lock, irq_flags); if (nentries < ch->local_nentries) { @@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch) for (nentries = ch->remote_nentries; nentries > 0; nentries--) { nbytes = nentries * ch->msg_size; - ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, + ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch->remote_msgqueue_base); if (ch->remote_msgqueue == NULL) { continue; } - memset(ch->remote_msgqueue, 0, nbytes); spin_lock_irqsave(&ch->lock, irq_flags); if (nentries < ch->remote_nentries) { diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 8cbf1643257..99b123a6421 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -52,7 +52,6 @@ #include <linux/syscalls.h> #include <linux/cache.h> #include <linux/interrupt.h> -#include <linux/slab.h> #include <linux/delay.h> #include <linux/reboot.h> #include <linux/completion.h> diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 88a730e6cfd..94211429fd0 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE + /* + * Guarantee that the kmalloc'd memory is cacheline aligned. + */ +static void * +xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kmalloc will give us cachline aligned memory by default */ + *base = kmalloc(size, flags); + if (*base == NULL) { + return NULL; + } + if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { + return *base; + } + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kmalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) { + return NULL; + } + return (void *) L1_CACHE_ALIGN((u64) *base); +} + + +/* * Given a nasid, get the physical address of the partition's reserved page * for that nasid. This function returns 0 on any error. */ @@ -1038,13 +1063,12 @@ xpc_discovery(void) remote_vars = (struct xpc_vars *) remote_rp; - discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words, + discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, GFP_KERNEL); if (discovered_nasids == NULL) { kfree(remote_rp_base); return; } - memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words); rp = (struct xpc_rsvd_page *) xpc_rsvd_page; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 98f716bd92f..ab1211ef017 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -74,6 +74,22 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft) return (int)ret_stuff.v0; } +u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus) +{ + s64 rc; + u16 ioboard; + nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } + + return ioboard; +} + /* * PCI Bridge Error interrupt handler. Gets invoked whenever a PCI * bridge sends an error interrupt. @@ -255,3 +271,4 @@ pcibr_init_provider(void) EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable); EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable); +EXPORT_SYMBOL_GPL(sn_ioboard_to_pci_bus); diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 7571a402552..be017691296 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -377,7 +377,7 @@ tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size) struct tioca_dmamap *ca_dmamap; void *map; unsigned long flags; - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index e52831ed93e..fa073cc4b56 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -15,6 +15,124 @@ #include <asm/sn/pcidev.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/tioce_provider.h> +#include <asm/sn/sn2/sn_hwperf.h> + +/* + * 1/26/2006 + * + * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe + * (taken from the above PV) before and after accessing tioce internal MMR's + * to avoid tioce lockups. + * + * The recipe as taken from the PV: + * + * if(mmr address < 0x45000) { + * if(mmr address == 0 or 0x80) + * mmr wrt or read address 0xc0 + * else if(mmr address == 0x148 or 0x200) + * mmr wrt or read address 0x28 + * else + * mmr wrt or read address 0x158 + * + * do desired mmr access (rd or wrt) + * + * if(mmr address == 0x100) + * mmr wrt or read address 0x38 + * mmr wrt or read address 0xb050 + * } else + * do desired mmr access + * + * According to hw, we can use reads instead of writes to the above addres + * + * Note this WAR can only to be used for accessing internal MMR's in the + * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the + * "Local CE Registers and Memories" and "PCI Compatible Config Space" address + * spaces from table 2-1 of the "CE Programmer's Reference Overview" document. + * + * All registers defined in struct tioce will meet that criteria. + */ + +static void inline +tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (u64)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + u64 mmr_war_offset; + + if (mmr_offset == 0 || mmr_offset == 0x80) + mmr_war_offset = 0xc0; + else if (mmr_offset == 0x148 || mmr_offset == 0x200) + mmr_war_offset = 0x28; + else + mmr_war_offset = 0x158; + + readq_relaxed((void *)(mmr_base + mmr_war_offset)); + } +} + +static void inline +tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (u64)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + if (mmr_offset == 0x100) + readq_relaxed((void *)(mmr_base + 0x38)); + readq_relaxed((void *)(mmr_base + 0xb050)); + } +} + +/* load mmr contents into a variable */ +#define tioce_mmr_load(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + *(varp) = readq_relaxed(mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store variable contents into mmr */ +#define tioce_mmr_store(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(*varp, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store immediate value into mmr */ +#define tioce_mmr_storei(kern, mmrp, val) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(val, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* set bits (immediate value) into mmr */ +#define tioce_mmr_seti(kern, mmrp, bits) do {\ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp |= (bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) + +/* clear bits (immediate value) into mmr */ +#define tioce_mmr_clri(kern, mmrp, bits) do { \ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp &= ~(bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) /** * Bus address ranges for the 5 flavors of TIOCE DMA @@ -62,9 +180,9 @@ #define TIOCE_ATE_M40 2 #define TIOCE_ATE_M40S 3 -#define KB(x) ((x) << 10) -#define MB(x) ((x) << 20) -#define GB(x) ((x) << 30) +#define KB(x) ((u64)(x) << 10) +#define MB(x) ((u64)(x) << 20) +#define GB(x) ((u64)(x) << 30) /** * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode @@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, int last; int entries; int nates; - int pagesize; + u64 pagesize; u64 *ate_shadow; u64 *ate_reg; u64 addr; @@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate = ATE_MAKE(addr, pagesize); ate_shadow[i + j] = ate; - writeq(ate, &ate_reg[i + j]); + tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); addr += pagesize; } @@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) u64 tmp; ce_kern->ce_port[port].dirmap_shadow = ct_upper; - writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + ct_upper); tmp = ce_mmr->ce_ure_dir_map[port]; dma_ok = 1; } else @@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) if (TIOCE_D32_ADDR(bus_addr)) { if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { ce_kern->ce_port[port].dirmap_shadow = 0; - writeq(0, &ce_mmr->ce_ure_dir_map[port]); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + 0); } } else { struct tioce_dmamap *map; @@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) } else if (--map->refcnt == 0) { for (i = 0; i < map->ate_count; i++) { map->ate_shadow[i] = 0; - map->ate_hw[i] = 0; + tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0); } list_del(&map->ce_dmamap_list); @@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, spin_unlock_irqrestore(&ce_kern->ce_lock, flags); dma_map_done: - if (mapaddr & barrier) + if (mapaddr && barrier) mapaddr = tioce_dma_barrier(mapaddr, 1); return mapaddr; @@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt) soft->ce_pcibus.bs_persist_segment, soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); + if (ret_stuff.v0) + panic("tioce_error_intr_handler: Fatal TIOCE error"); + return IRQ_HANDLED; } /** + * tioce_reserve_m32 - reserve M32 ate's for the indicated address range + * @tioce_kernel: TIOCE context to reserve ate's for + * @base: starting bus address to reserve + * @limit: last bus address to reserve + * + * If base/limit falls within the range of bus space mapped through the + * M32 space, reserve the resources corresponding to the range. + */ +static void +tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) +{ + int ate_index, last_ate, ps; + struct tioce *ce_mmr; + + if (!TIOCE_M32_ADDR(base)) + return; + + ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; + ps = ce_kern->ce_ate3240_pagesize; + ate_index = ATE_PAGE(base, ps); + last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; + + if (ate_index < 64) + ate_index = 64; + + while (ate_index <= last_ate) { + u64 ate; + + ate = ATE_MAKE(0xdeadbeef, ps); + ce_kern->ce_ate3240_shadow[ate_index] = ate; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], + ate); + ate_index++; + } +} + +/** * tioce_kern_init - init kernel structures related to a given TIOCE * @tioce_common: ptr to a cached tioce_common struct that originated in prom - */ static struct tioce_kernel * + */ +static struct tioce_kernel * tioce_kern_init(struct tioce_common *tioce_common) { int i; + int ps; + int dev; u32 tmp; + unsigned int seg, bus; struct tioce *tioce_mmr; struct tioce_kernel *tioce_kern; @@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common) * here to use pci_read_config_xxx() so use the raw_pci_ops vector. */ - raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, - tioce_common->ce_pcibus.bs_persist_busnum, - PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); + seg = tioce_common->ce_pcibus.bs_persist_segment; + bus = tioce_common->ce_pcibus.bs_persist_busnum; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp); tioce_kern->ce_port1_secondary = (u8) tmp; /* @@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common) */ tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; - __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK); - __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE); - tioce_kern->ce_ate3240_pagesize = KB(256); + tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_PAGESIZE_MASK); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_256K_PAGESIZE); + ps = tioce_kern->ce_ate3240_pagesize = KB(256); for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { tioce_kern->ce_ate40_shadow[i] = 0; - writeq(0, &tioce_mmr->ce_ure_ate40[i]); + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0); } for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { tioce_kern->ce_ate3240_shadow[i] = 0; - writeq(0, &tioce_mmr->ce_ure_ate3240[i]); + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0); + } + + /* + * Reserve ATE's corresponding to reserved address ranges. These + * include: + * + * Memory space covered by each PPB mem base/limit register + * Memory space covered by each PPB prefetch base/limit register + * + * These bus ranges are for pio (downstream) traffic only, and so + * cannot be used for DMA. + */ + + for (dev = 1; dev <= 2; dev++) { + u64 base, limit; + + /* mem base/limit */ + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_BASE, 2, &tmp); + base = (u64)tmp << 16; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_LIMIT, 2, &tmp); + limit = (u64)tmp << 16; + limit |= 0xfffffUL; + + if (base < limit) + tioce_reserve_m32(tioce_kern, base, limit); + + /* + * prefetch mem base/limit. The tioce ppb's have 64-bit + * decoders, so read the upper portions w/o checking the + * attributes. + */ + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_BASE, 2, &tmp); + base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_BASE_UPPER32, 4, &tmp); + base |= (u64)tmp << 32; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_LIMIT, 2, &tmp); + + limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + limit |= 0xfffffUL; + + raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_LIMIT_UPPER32, 4, &tmp); + limit |= (u64)tmp << 32; + + if ((base < limit) && TIOCE_M32_ADDR(base)) + tioce_reserve_m32(tioce_kern, base, limit); } return tioce_kern; @@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) { struct pcidev_info *pcidev_info; struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; struct tioce *ce_mmr; u64 force_int_val; @@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; + + /* + * TIOCE Rev A workaround (PV 945826), force an interrupt by writing + * the TIO_INTx register directly (1/26/2006) + */ + if (ce_common->ce_rev == TIOCE_REV_A) { + u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit); + u64 status; + + tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status); + if (status & int_bit_mask) { + u64 force_irq = (1 << 8) | sn_irq_info->irq_irq; + u64 ctalk = sn_irq_info->irq_xtalkaddr; + u64 nasid, offset; + + nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT; + offset = (ctalk & CTALK_NODE_OFFSET); + HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq); + } + + return; + } /* * irq_int_bit is originally set up by prom, and holds the interrupt @@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) default: return; } - writeq(force_int_val, &ce_mmr->ce_adm_force_int); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val); } /** @@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) { struct pcidev_info *pcidev_info; struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; struct tioce *ce_mmr; int bit; u64 vector; @@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; bit = sn_irq_info->irq_int_bit; - __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); + tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; vector |= sn_irq_info->irq_xtalkaddr; - writeq(vector, &ce_mmr->ce_adm_int_dest[bit]); - __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector); + tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); tioce_force_interrupt(sn_irq_info); } @@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) static void * tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) { + int my_nasid; + cnodeid_t my_cnode, mem_cnode; struct tioce_common *tioce_common; + struct tioce_kernel *tioce_kern; + struct tioce *tioce_mmr; /* * Allocate kernel bus soft and copy from prom. @@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; - if (tioce_kern_init(tioce_common) == NULL) { + tioce_kern = tioce_kern_init(tioce_common); + if (tioce_kern == NULL) { kfree(tioce_common); return NULL; } + /* + * Clear out any transient errors before registering the error + * interrupt handler. + */ + + tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, + ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL); + if (request_irq(SGI_PCIASIC_ERROR, tioce_error_intr_handler, SA_SHIRQ, "TIOCE error", (void *)tioce_common)) @@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont tioce_common->ce_pcibus.bs_persist_segment, tioce_common->ce_pcibus.bs_persist_busnum); + /* + * identify closest nasid for memory allocations + */ + + my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base); + my_cnode = nasid_to_cnodeid(my_nasid); + + if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) { + printk(KERN_WARNING "tioce_bus_fixup: failed to find " + "closest node with MEM to TIO node %d\n", my_cnode); + mem_cnode = (cnodeid_t)-1; /* use any node */ + } + + controller->node = mem_cnode; + return tioce_common; } |