diff options
Diffstat (limited to 'arch/ia64')
-rw-r--r-- | arch/ia64/Kconfig | 18 | ||||
-rw-r--r-- | arch/ia64/Kconfig.debug | 11 | ||||
-rw-r--r-- | arch/ia64/hp/sim/simserial.c | 6 | ||||
-rw-r--r-- | arch/ia64/ia32/ia32_ioctl.c | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 144 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 120 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 20 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/process.c | 40 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/signal.c | 11 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 44 | ||||
-rw-r--r-- | arch/ia64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/ia64/lib/swiotlb.c | 759 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 19 | ||||
-rw-r--r-- | arch/ia64/oprofile/Kconfig | 6 | ||||
-rw-r--r-- | arch/ia64/sn/pci/pcibr/pcibr_provider.c | 4 | ||||
-rw-r--r-- | arch/ia64/sn/pci/pcibr/pcibr_reg.c | 2 |
19 files changed, 283 insertions, 937 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 1642375fb14..d4de8a4814b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -26,6 +26,10 @@ config MMU bool default y +config SWIOTLB + bool + default y + config RWSEM_XCHGADD_ALGORITHM bool default y @@ -187,6 +191,7 @@ config IOSAPIC config IA64_SGI_SN_XP tristate "Support communication between SGI SSIs" + depends on IA64_GENERIC || IA64_SGI_SN2 select IA64_UNCACHED_ALLOCATOR help An SGI machine can be divided into multiple Single System @@ -422,8 +427,21 @@ config GENERIC_PENDING_IRQ source "arch/ia64/hp/sim/Kconfig" +menu "Instrumentation Support" + depends on EXPERIMENTAL + source "arch/ia64/oprofile/Kconfig" +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". +endmenu + source "arch/ia64/Kconfig.debug" source "security/Kconfig" diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index fda67ac993d..de9d507ba0f 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -2,17 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config KPROBES - bool "Kprobes" - depends on DEBUG_KERNEL - help - Kprobes allows you to trap at almost any kernel address and - execute a callback function. register_kprobe() establishes - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - - choice prompt "Physical memory granularity" default IA64_GRANULE_64MB diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index b42ec37be51..19ee635eeb7 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -642,10 +642,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp) info->event = 0; info->tty = 0; if (info->blocked_open) { - if (info->close_delay) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(info->close_delay); - } + if (info->close_delay) + schedule_timeout_interruptible(info->close_delay); wake_up_interruptible(&info->open_wait); } info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c index 164b211f417..88739394f6d 100644 --- a/arch/ia64/ia32/ia32_ioctl.c +++ b/arch/ia64/ia32/ia32_ioctl.c @@ -29,10 +29,8 @@ #define CODE #include "compat_ioctl.c" -typedef int (* ioctl32_handler_t)(unsigned int, unsigned int, unsigned long, struct file *); - #define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl) -#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl32_handler_t)(handler), NULL }, +#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler), NULL }, #define IOCTL_TABLE_START \ struct ioctl_trans ioctl_start[] = { #define IOCTL_TABLE_END \ diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 471086b808a..801eeaeaf3d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -26,7 +26,6 @@ #include <linux/config.h> #include <linux/kprobes.h> #include <linux/ptrace.h> -#include <linux/spinlock.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/preempt.h> @@ -38,13 +37,8 @@ extern void jprobe_inst_return(void); -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - -static struct kprobe *current_kprobe, *kprobe_prev; -static unsigned long kprobe_status, kprobe_status_prev; -static struct pt_regs jprobe_saved_regs; +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); enum instruction_type {A, I, M, F, B, L, X, u}; static enum instruction_type bundle_encoding[32][3] = { @@ -313,21 +307,22 @@ static int __kprobes valid_kprobe_addr(int template, int slot, return 0; } -static inline void save_previous_kprobe(void) +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { - kprobe_prev = current_kprobe; - kprobe_status_prev = kprobe_status; + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; } -static inline void restore_previous_kprobe(void) +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - current_kprobe = kprobe_prev; - kprobe_status = kprobe_status_prev; + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; } -static inline void set_current_kprobe(struct kprobe *p) +static inline void set_current_kprobe(struct kprobe *p, + struct kprobe_ctlblk *kcb) { - current_kprobe = p; + __get_cpu_var(current_kprobe) = p; } static void kretprobe_trampoline(void) @@ -347,11 +342,12 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct kretprobe_instance *ri = NULL; struct hlist_head *head; struct hlist_node *node, *tmp; - unsigned long orig_ret_address = 0; + unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = ((struct fnptr *)kretprobe_trampoline)->ip; - head = kretprobe_inst_table_head(current); + spin_lock_irqsave(&kretprobe_lock, flags); + head = kretprobe_inst_table_head(current); /* * It is possible to have multiple instances associated with a given @@ -367,9 +363,9 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * kretprobe_trampoline */ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) + if (ri->task != current) /* another task is sharing our hash bucket */ - continue; + continue; if (ri->rp && ri->rp->handler) ri->rp->handler(ri, regs); @@ -389,17 +385,19 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); regs->cr_iip = orig_ret_address; - unlock_kprobes(); + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. - */ - return 1; + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; } +/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -606,17 +604,22 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) int ret = 0; struct pt_regs *regs = args->regs; kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + struct kprobe_ctlblk *kcb; + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Handle recursion cases */ if (kprobe_running()) { p = get_kprobe(addr); if (p) { - if ( (kprobe_status == KPROBE_HIT_SS) && + if ((kcb->kprobe_status == KPROBE_HIT_SS) && (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { ia64_psr(regs)->ss = 0; - unlock_kprobes(); goto no_kprobe; } /* We have reentered the pre_kprobe_handler(), since @@ -625,17 +628,17 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) * just single step on the instruction of the new probe * without calling any user handlers. */ - save_previous_kprobe(); - set_current_kprobe(p); + save_previous_kprobe(kcb); + set_current_kprobe(p, kcb); p->nmissed++; prepare_ss(p, regs); - kprobe_status = KPROBE_REENTER; + kcb->kprobe_status = KPROBE_REENTER; return 1; } else if (args->err == __IA64_BREAK_JPROBE) { /* * jprobe instrumented function just completed */ - p = current_kprobe; + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } @@ -645,10 +648,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) } } - lock_kprobes(); p = get_kprobe(addr); if (!p) { - unlock_kprobes(); if (!is_ia64_break_inst(regs)) { /* * The breakpoint instruction was removed right @@ -665,8 +666,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) goto no_kprobe; } - kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p); + set_current_kprobe(p, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (p->pre_handler && p->pre_handler(p, regs)) /* @@ -678,7 +679,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) ss_probe: prepare_ss(p, regs); - kprobe_status = KPROBE_HIT_SS; + kcb->kprobe_status = KPROBE_HIT_SS; return 1; no_kprobe: @@ -688,23 +689,25 @@ no_kprobe: static int __kprobes post_kprobes_handler(struct pt_regs *regs) { - if (!kprobe_running()) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) return 0; - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { - kprobe_status = KPROBE_HIT_SSDONE; - current_kprobe->post_handler(current_kprobe, regs, 0); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); } - resume_execution(current_kprobe, regs); + resume_execution(cur, regs); /*Restore back the original saved kprobes variables and continue. */ - if (kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); goto out; } - - unlock_kprobes(); + reset_current_kprobe(); out: preempt_enable_no_resched(); @@ -713,16 +716,15 @@ out: static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) { - if (!kprobe_running()) - return 0; + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (current_kprobe->fault_handler && - current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; - if (kprobe_status & KPROBE_HIT_SS) { - resume_execution(current_kprobe, regs); - unlock_kprobes(); + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs); + reset_current_kprobe(); preempt_enable_no_resched(); } @@ -733,31 +735,42 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + switch(val) { case DIE_BREAK: - if (pre_kprobes_handler(args)) - return NOTIFY_STOP; + /* err is break number from ia64_bad_break() */ + if (args->err == 0x80200 || args->err == 0x80300) + if (pre_kprobes_handler(args)) + ret = NOTIFY_STOP; break; - case DIE_SS: - if (post_kprobes_handler(args->regs)) - return NOTIFY_STOP; + case DIE_FAULT: + /* err is vector number from ia64_fault() */ + if (args->err == 36) + if (post_kprobes_handler(args->regs)) + ret = NOTIFY_STOP; break; case DIE_PAGE_FAULT: - if (kprobes_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && + kprobes_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); default: break; } - return NOTIFY_DONE; + return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* save architectural state */ - jprobe_saved_regs = *regs; + kcb->jprobe_saved_regs = *regs; /* after rfi, execute the jprobe instrumented function */ regs->cr_iip = addr & ~0xFULL; @@ -775,7 +788,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { - *regs = jprobe_saved_regs; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + *regs = kcb->jprobe_saved_regs; + preempt_enable_no_resched(); return 1; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 52c47da1724..355af15287c 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -51,6 +51,9 @@ * * 2005-08-12 Keith Owens <kaos@sgi.com> * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. + * + * 2005-10-07 Keith Owens <kaos@sgi.com> + * Add notify_die() hooks. */ #include <linux/config.h> #include <linux/types.h> @@ -58,7 +61,6 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <linux/kallsyms.h> #include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/acpi.h> @@ -69,6 +71,7 @@ #include <linux/workqueue.h> #include <asm/delay.h> +#include <asm/kdebug.h> #include <asm/machvec.h> #include <asm/meminit.h> #include <asm/page.h> @@ -132,6 +135,14 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init; + +static void inline +ia64_mca_spin(const char *func) +{ + printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + while (1) + cpu_relax(); +} /* * IA64_MCA log support */ @@ -526,13 +537,16 @@ ia64_mca_wakeup_all(void) * Outputs : None */ static irqreturn_t -ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) { unsigned long flags; int cpu = smp_processor_id(); /* Mask all interrupts */ local_irq_save(flags); + if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; /* Register with the SAL monarch that the slave has @@ -540,10 +554,18 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) */ ia64_sal_mc_rendez(); + if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ + if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + /* Enable all interrupts */ local_irq_restore(flags); return IRQ_HANDLED; @@ -933,6 +955,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; + if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); ia64_wait_for_slaves(cpu); /* Wakeup all the processors which are spinning in the rendezvous loop. @@ -942,6 +967,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, * spinning in SAL does not work. */ ia64_mca_wakeup_all(); + if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); @@ -960,6 +988,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; } + if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, 0, 0, recover) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); set_curr_task(cpu, previous_current); monarch_cpu = -1; @@ -1188,6 +1219,37 @@ ia64_mca_cpe_poll (unsigned long dummy) #endif /* CONFIG_ACPI */ +static int +default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) +{ + int c; + struct task_struct *g, *t; + if (val != DIE_INIT_MONARCH_PROCESS) + return NOTIFY_DONE; + printk(KERN_ERR "Processes interrupted by INIT -"); + for_each_online_cpu(c) { + struct ia64_sal_os_state *s; + t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); + s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); + g = s->prev_task; + if (g) { + if (g->pid) + printk(" %d", g->pid); + else + printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); + } + } + printk("\n\n"); + if (read_trylock(&tasklist_lock)) { + do_each_thread (g, t) { + printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); + show_stack(t, NULL); + } while_each_thread (g, t); + read_unlock(&tasklist_lock); + } + return NOTIFY_DONE; +} + /* * C portion of the OS INIT handler * @@ -1212,8 +1274,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, static atomic_t slaves; static atomic_t monarchs; task_t *previous_current; - int cpu = smp_processor_id(), c; - struct task_struct *g, *t; + int cpu = smp_processor_id(); oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ console_loglevel = 15; /* make sure printks make it to console */ @@ -1253,8 +1314,17 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; while (monarch_cpu == -1) cpu_relax(); /* spin until monarch enters */ + if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ + if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); printk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -1263,6 +1333,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, } monarch_cpu = cpu; + if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); /* * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be @@ -1273,27 +1346,16 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, printk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu); - printk(KERN_ERR "Processes interrupted by INIT -"); - for_each_online_cpu(c) { - struct ia64_sal_os_state *s; - t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); - s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); - g = s->prev_task; - if (g) { - if (g->pid) - printk(" %d", g->pid); - else - printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); - } - } - printk("\n\n"); - if (read_trylock(&tasklist_lock)) { - do_each_thread (g, t) { - printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); - show_stack(t, NULL); - } while_each_thread (g, t); - read_unlock(&tasklist_lock); - } + /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through + * to default_monarch_init_process() above and just print all the + * tasks. + */ + if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); @@ -1462,6 +1524,10 @@ ia64_mca_init(void) s64 rc; struct ia64_sal_retval isrv; u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + static struct notifier_block default_init_monarch_nb = { + .notifier_call = default_monarch_init_process, + .priority = 0/* we need to notified last */ + }; IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); @@ -1555,6 +1621,10 @@ ia64_mca_init(void) "(status %ld)\n", rc); return; } + if (register_die_notifier(&default_init_monarch_nb)) { + printk(KERN_ERR "Failed to register default monarch INIT process\n"); + return; + } IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index f081c60ab20..3492e3211a4 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -88,7 +88,7 @@ mca_page_isolate(unsigned long paddr) if (!ia64_phys_addr_valid(paddr)) return ISOLATE_NONE; - if (!pfn_valid(paddr)) + if (!pfn_valid(paddr >> PAGE_SHIFT)) return ISOLATE_NONE; /* convert physical address to physical page number */ @@ -108,6 +108,7 @@ mca_page_isolate(unsigned long paddr) return ISOLATE_NG; /* add attribute 'Reserved' and register the page */ + get_page(p); SetPageReserved(p); page_isolate[num_page_isolate++] = p; @@ -546,9 +547,20 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, (pal_processor_state_info_t*)peidx_psp(peidx); /* - * We cannot recover errors with other than bus_check. + * Processor recovery status must key off of the PAL recovery + * status in the Processor State Parameter. */ - if (psp->cc || psp->rc || psp->uc) + + /* + * The machine check is corrected. + */ + if (psp->cm == 1) + return 1; + + /* + * The error was not contained. Software must be reset. + */ + if (psp->us || psp->ci == 0) return 0; /* @@ -569,8 +581,6 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, return 0; if (pbci->eb && pbci->bsi > 0) return 0; - if (psp->ci == 0) - return 0; /* * This is a local MCA and estimated as recoverble external bus error. diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f7dfc107cb7..410d4804fa6 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4940,7 +4940,7 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (args_k) kfree(args_k); + kfree(args_k); DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 051e050359e..e92ea64d804 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -4,6 +4,9 @@ * Copyright (C) 1998-2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * 04/11/17 Ashok Raj <ashok.raj@intel.com> Added CPU Hotplug Support + * + * 2005-10-07 Keith Owens <kaos@sgi.com> + * Add notify_die() hooks. */ #define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ #include <linux/config.h> @@ -34,6 +37,7 @@ #include <asm/elf.h> #include <asm/ia32.h> #include <asm/irq.h> +#include <asm/kdebug.h> #include <asm/pgalloc.h> #include <asm/processor.h> #include <asm/sal.h> @@ -197,11 +201,15 @@ void default_idle (void) { local_irq_enable(); - while (!need_resched()) - if (can_do_pal_halt) - safe_halt(); - else + while (!need_resched()) { + if (can_do_pal_halt) { + local_irq_disable(); + if (!need_resched()) + safe_halt(); + local_irq_enable(); + } else cpu_relax(); + } } #ifdef CONFIG_HOTPLUG_CPU @@ -263,16 +271,16 @@ void __attribute__((noreturn)) cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; + int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { + if (!need_resched()) { + void (*idle)(void); #ifdef CONFIG_SMP - if (!need_resched()) min_xtp(); #endif - while (!need_resched()) { - void (*idle)(void); - if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; @@ -284,17 +292,17 @@ cpu_idle (void) if (!idle) idle = default_idle; (*idle)(); - } - - if (mark_idle) - (*mark_idle)(0); - + if (mark_idle) + (*mark_idle)(0); #ifdef CONFIG_SMP - normal_xtp(); + normal_xtp(); #endif + } + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(cpu)) play_dead(); } } @@ -804,12 +812,14 @@ cpu_halt (void) void machine_restart (char *restart_cmd) { + (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); } void machine_halt (void) { + (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); cpu_halt(); } diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c9388a92cf4..5add0bcf87a 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -92,6 +92,13 @@ extern void efi_initialize_iomem_resources(struct resource *, extern char _text[], _end[], _etext[]; unsigned long ia64_max_cacheline_size; + +int dma_get_cache_alignment(void) +{ + return ia64_max_cacheline_size; +} +EXPORT_SYMBOL(dma_get_cache_alignment); + unsigned long ia64_iobase; /* virtual address for I/O accesses */ EXPORT_SYMBOL(ia64_iobase); struct io_space io_space[MAX_IO_SPACES]; diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 774f34b675c..58ce07efc56 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -387,15 +387,14 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct sigscratch *scr) { extern char __kernel_sigtramp[]; - unsigned long tramp_addr, new_rbs = 0; + unsigned long tramp_addr, new_rbs = 0, new_sp; struct sigframe __user *frame; long err; - frame = (void __user *) scr->pt.r12; + new_sp = scr->pt.r12; tramp_addr = (unsigned long) __kernel_sigtramp; - if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { - frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size) - & ~(STACK_ALIGN - 1)); + if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags(new_sp) == 0) { + new_sp = current->sas_ss_sp + current->sas_ss_size; /* * We need to check for the register stack being on the signal stack * separately, because it's switched separately (memory stack is switched @@ -404,7 +403,7 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); } - frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); + frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return force_sigsegv_info(sig, frame); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 400a4898712..8f44e7d2df6 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -399,6 +399,7 @@ start_secondary (void *unused) Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); efi_map_pal_code(); cpu_init(); + preempt_disable(); smp_callin(); cpu_idle(); diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index f970359e7ed..fba5fdd1f96 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -30,17 +30,20 @@ fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); struct notifier_block *ia64die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); -int register_die_notifier(struct notifier_block *nb) +int +register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&ia64die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + return notifier_chain_register(&ia64die_chain, nb); } +EXPORT_SYMBOL_GPL(register_die_notifier); + +int +unregister_die_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ia64die_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_die_notifier); void __init trap_init (void) @@ -105,6 +108,7 @@ die (const char *str, struct pt_regs *regs, long err) if (++die.lock_owner_depth < 3) { printk("%s[%d]: %s %ld [%d]\n", current->comm, current->pid, str, err, ++die_counter); + (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_regs(regs); } else printk(KERN_ERR "Recursive die() failure, output suppressed\n"); @@ -155,9 +159,8 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) { + == NOTIFY_STOP) return; - } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -210,15 +213,6 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; - case 0x80200: - case 0x80300: - if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) { - return; - } - sig = SIGTRAP; code = TRAP_BRKPT; - break; - default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -226,6 +220,9 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) if (break_num < 0x80000) { sig = SIGILL; code = __ILL_BREAK; } else { + if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; sig = SIGTRAP; code = TRAP_BRKPT; } } @@ -578,12 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: - if (notify_die(DIE_SS, "ss", ®s, vector, - vector, SIGTRAP) == NOTIFY_STOP) - return; - siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; } + if (notify_die(DIE_FAULT, "ia64_fault", ®s, vector, siginfo.si_code, SIGTRAP) + == NOTIFY_STOP) + return; siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; siginfo.si_addr = (void __user *) ifa; diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index cb1af597370..ac64664a180 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -9,7 +9,7 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ bitop.o checksum.o clear_page.o csum_partial_copy.o \ clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ flush.o ip_fast_csum.o do_csum.o \ - memset.o strlen.o swiotlb.o + memset.o strlen.o lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c deleted file mode 100644 index 96edcc0fdcd..00000000000 --- a/arch/ia64/lib/swiotlb.c +++ /dev/null @@ -1,759 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is for IA-64 platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> - * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. - * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid - * unnecessary i-cache flushing. - * 04/07/.. ak Better overflow handling. Assorted fixes. - */ - -#include <linux/cache.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ctype.h> - -#include <asm/io.h> -#include <asm/pci.h> -#include <asm/dma.h> - -#include <linux/init.h> -#include <linux/bootmem.h> - -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - -#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) -#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) - -/* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? - * The complexity of {map,unmap}_single is linearly dependent on this value. - */ -#define IO_TLB_SEGSIZE 128 - -/* - * log of the size of each IO TLB slab. The number of slabs is command line - * controllable. - */ -#define IO_TLB_SHIFT 11 - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) - -/* - * Minimum IO TLB size to bother booting with. Systems with mainly - * 64bit capable cards will only lightly use the swiotlb. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) - -int swiotlb_force; - -/* - * Used to do a quick range check in swiotlb_unmap_single and - * swiotlb_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static char *io_tlb_start, *io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -void *io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -static unsigned char **io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static int __init -setup_io_tlb_npages(char *str) -{ - if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); - /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - if (*str == ',') - ++str; - if (!strcmp(str, "force")) - swiotlb_force = 1; - return 1; -} -__setup("swiotlb=", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the PCI DMA API. - */ -void -swiotlb_init_with_default_size (size_t default_size) -{ - unsigned long i; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs * - (1 << IO_TLB_SHIFT), 0x100000000); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); - - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); - printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", - virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); -} - -void -swiotlb_init (void) -{ - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ -} - -/* - * Systems with larger DMA zones (those that don't support ISA) can - * initialize the swiotlb later using the slab allocator if needed. - * This should be just like above, but with some error catching. - */ -int -swiotlb_late_init_with_default_size (size_t default_size) -{ - unsigned long i, req_nslabs = io_tlb_nslabs; - unsigned int order; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); - io_tlb_nslabs = SLABS_PER_PAGE << order; - - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (io_tlb_start) - break; - order--; - } - - if (!io_tlb_start) - goto cleanup1; - - if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; - } - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); - memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT)); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup2; - - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - - io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(char *))); - if (!io_tlb_orig_addr) - goto cleanup3; - - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); - - /* - * Get the overflow emergency buffer - */ - io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!io_tlb_overflow_buffer) - goto cleanup4; - - printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - " - "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20, - virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); - - return 0; - -cleanup4: - free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * - sizeof(char *))); - io_tlb_orig_addr = NULL; -cleanup3: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; - io_tlb_end = NULL; -cleanup2: - free_pages((unsigned long)io_tlb_start, order); - io_tlb_start = NULL; -cleanup1: - io_tlb_nslabs = req_nslabs; - return -ENOMEM; -} - -static inline int -address_needs_mapping(struct device *hwdev, dma_addr_t addr) -{ - dma_addr_t mask = 0xffffffff; - /* If the device has a mask, use it, otherwise default to 32 bits */ - if (hwdev && hwdev->dma_mask) - mask = *hwdev->dma_mask; - return (addr & ~mask) != 0; -} - -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, char *buffer, size_t size, int dir) -{ - unsigned long flags; - char *dma_addr; - unsigned int nslots, stride, index, wrap; - int i; - - /* - * For mappings greater than a page, we limit the stride (and - * hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - if (!nslots) - BUG(); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - wrap = index = ALIGN(io_tlb_index, stride); - - if (index >= io_tlb_nslabs) - wrap = index = 0; - - do { - /* - * If we find a slot that indicates we have 'nslots' - * number of contiguous buffers, we allocate the - * buffers from that slot and mark the entries as '0' - * indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in - * the next round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - - spin_unlock_irqrestore(&io_tlb_lock, flags); - return NULL; - } - found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - io_tlb_orig_addr[index] = buffer; - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - memcpy(dma_addr, buffer, size); - - return dma_addr; -} - -/* - * dma_addr is the kernel virtual address of the bounce buffer to unmap. - */ -static void -unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - /* - * bounce... copy the data back into the original buffer * and - * delete the bounce buffer. - */ - memcpy(buffer, dma_addr, size); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) - io_tlb_list[i] = ++count; - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) -{ - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - /* - * bounce... copy the data back into/from the original buffer - * XXX How do you handle DMA_BIDIRECTIONAL here ? - */ - if (dir == DMA_FROM_DEVICE) - memcpy(buffer, dma_addr, size); - else if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, buffer, size); - else - BUG(); -} - -void * -swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) -{ - unsigned long dev_addr; - void *ret; - int order = get_order(size); - - /* - * XXX fix me: the DMA API should pass us an explicit DMA mask - * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 - * bit range instead of a 16MB one). - */ - flags |= GFP_DMA; - - ret = (void *)__get_free_pages(flags, order); - if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) { - /* - * The allocated memory isn't reachable by the device. - * Fall back on swiotlb_map_single(). - */ - free_pages((unsigned long) ret, order); - ret = NULL; - } - if (!ret) { - /* - * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on - * swiotlb_map_single(), which will grab memory from - * the lowest available address range. - */ - dma_addr_t handle; - handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); - if (dma_mapping_error(handle)) - return NULL; - - ret = phys_to_virt(handle); - } - - memset(ret, 0, size); - dev_addr = virt_to_phys(ret); - - /* Confirm address can be DMA'd by device */ - if (address_needs_mapping(hwdev, dev_addr)) { - printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n", - (unsigned long long)*hwdev->dma_mask, dev_addr); - panic("swiotlb_alloc_coherent: allocated memory is out of " - "range for device"); - } - *dma_handle = dev_addr; - return ret; -} - -void -swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - if (!(vaddr >= (void *)io_tlb_start - && vaddr < (void *)io_tlb_end)) - free_pages((unsigned long) vaddr, get_order(size)); - else - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); -} - -static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) -{ - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for pci_dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " - "device %s\n", size, dev ? dev->bus_id : "?"); - - if (size > io_tlb_overflow && do_panic) { - if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic("PCI-DMA: Random memory would be DMAed\n"); - } -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * PCI address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. - */ -dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) -{ - unsigned long dev_addr = virt_to_phys(ptr); - void *map; - - if (dir == DMA_NONE) - BUG(); - /* - * If the pointer passed in happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) - return dev_addr; - - /* - * Oh well, have to allocate and map a bounce buffer. - */ - map = map_single(hwdev, ptr, size, dir); - if (!map) { - swiotlb_full(hwdev, size, dir, 1); - map = io_tlb_overflow_buffer; - } - - dev_addr = virt_to_phys(map); - - /* - * Ensure that the address returned is DMA'ble - */ - if (address_needs_mapping(hwdev, dev_addr)) - panic("map_single: bounce buffer is not DMA'ble"); - - return dev_addr; -} - -/* - * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -static void -mark_clean(void *addr, size_t size) -{ - unsigned long pg_addr, end; - - pg_addr = PAGE_ALIGN((unsigned long) addr); - end = (unsigned long) addr + size; - while (pg_addr + PAGE_SIZE <= end) { - struct page *page = virt_to_page(pg_addr); - set_bit(PG_arch_1, &page->flags); - pg_addr += PAGE_SIZE; - } -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, - int dir) -{ - char *dma_addr = phys_to_virt(dev_addr); - - if (dir == DMA_NONE) - BUG(); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - unmap_single(hwdev, dma_addr, size, dir); - else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_single() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the PCI dma mapping, you must - * call this function before doing so. At the next point you give the PCI dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) -{ - char *dma_addr = phys_to_virt(dev_addr); - - if (dir == DMA_NONE) - BUG(); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - sync_single(hwdev, dma_addr, size, dir); - else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) -{ - char *dma_addr = phys_to_virt(dev_addr); - - if (dir == DMA_NONE) - BUG(); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - sync_single(hwdev, dma_addr, size, dir); - else if (dir == DMA_FROM_DEVICE) - mark_clean(dma_addr, size); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_single - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_single are the - * same here. - */ -int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, - int dir) -{ - void *addr; - unsigned long dev_addr; - int i; - - if (dir == DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) { - addr = SG_ENT_VIRT_ADDRESS(sg); - dev_addr = virt_to_phys(addr); - if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { - sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir)); - if (!sg->dma_address) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; - return 0; - } - } else - sg->dma_address = dev_addr; - sg->dma_length = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_single() above. - */ -void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, - int dir) -{ - int i; - - if (dir == DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir); - else if (dir == DMA_FROM_DEVICE) - mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) -{ - int i; - - if (dir == DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - sync_single(hwdev, (void *) sg->dma_address, - sg->dma_length, dir); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) -{ - int i; - - if (dir == DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) - if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - sync_single(hwdev, (void *) sg->dma_address, - sg->dma_length, dir); -} - -int -swiotlb_dma_mapping_error(dma_addr_t dma_addr) -{ - return (dma_addr == virt_to_phys(io_tlb_overflow_buffer)); -} - -/* - * Return whether the given PCI device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during PCI bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported (struct device *hwdev, u64 mask) -{ - return (virt_to_phys (io_tlb_end) - 1) <= mask; -} - -EXPORT_SYMBOL(swiotlb_init); -EXPORT_SYMBOL(swiotlb_map_single); -EXPORT_SYMBOL(swiotlb_unmap_single); -EXPORT_SYMBOL(swiotlb_map_sg); -EXPORT_SYMBOL(swiotlb_unmap_sg); -EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); -EXPORT_SYMBOL(swiotlb_sync_single_for_device); -EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); -EXPORT_SYMBOL(swiotlb_sync_sg_for_device); -EXPORT_SYMBOL(swiotlb_dma_mapping_error); -EXPORT_SYMBOL(swiotlb_alloc_coherent); -EXPORT_SYMBOL(swiotlb_free_coherent); -EXPORT_SYMBOL(swiotlb_dma_supported); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index a88cdb7232f..0f776b032d3 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -350,14 +350,12 @@ static void __init initialize_pernode_data(void) * for best. * @nid: node id * @pernodesize: size of this node's pernode data - * @align: alignment to use for this node's pernode data */ -static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, - unsigned long align) +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) { void *ptr = NULL; u8 best = 0xff; - int bestnode = -1, node; + int bestnode = -1, node, anynode = 0; for_each_online_node(node) { if (node_isset(node, memory_less_mask)) @@ -366,13 +364,15 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, best = node_distance(nid, node); bestnode = node; } + anynode = node; } - ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, - pernodesize, align, __pa(MAX_DMA_ADDRESS)); + if (bestnode == -1) + bestnode = anynode; + + ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize, + PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); - if (!ptr) - panic("NO memory for memory less node\n"); return ptr; } @@ -413,8 +413,7 @@ static void __init memory_less_nodes(void) for_each_node_mask(node, memory_less_mask) { pernodesize = compute_pernodesize(node); - pernode = memory_less_node_alloc(node, pernodesize, - (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024)); + pernode = memory_less_node_alloc(node, pernodesize); fill_pernode(node, __pa(pernode), pernodesize); } diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig index 56e6f614b04..97271ab484d 100644 --- a/arch/ia64/oprofile/Kconfig +++ b/arch/ia64/oprofile/Kconfig @@ -1,7 +1,3 @@ - -menu "Profiling support" - depends on EXPERIMENTAL - config PROFILING bool "Profiling support (EXPERIMENTAL)" help @@ -22,5 +18,3 @@ config OPROFILE If unsure, say N. -endmenu - diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 7b03b8084ff..1f500c81002 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -212,13 +212,13 @@ void pcibr_target_interrupt(struct sn_irq_info *sn_irq_info) pdi_pcibus_info; /* Disable the device's IRQ */ - pcireg_intr_enable_bit_clr(pcibus_info, bit); + pcireg_intr_enable_bit_clr(pcibus_info, (1 << bit)); /* Change the device's IRQ */ pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr); /* Re-enable the device's IRQ */ - pcireg_intr_enable_bit_set(pcibus_info, bit); + pcireg_intr_enable_bit_set(pcibus_info, (1 << bit)); pcibr_force_interrupt(sn_irq_info); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c index 4f718c3e93d..5d534091262 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -131,7 +131,7 @@ void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits) __sn_clrq_relaxed(&ptr->tio.cp_int_enable, bits); break; case PCIBR_BRIDGETYPE_PIC: - __sn_clrq_relaxed(&ptr->pic.p_int_enable, ~bits); + __sn_clrq_relaxed(&ptr->pic.p_int_enable, bits); break; default: panic |