diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 25 | ||||
-rw-r--r-- | arch/x86_64/boot/install.sh | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_signal.c | 15 | ||||
-rw-r--r-- | arch/x86_64/kernel/aperture.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/early_printk.c | 13 | ||||
-rw-r--r-- | arch/x86_64/kernel/head64.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/i8259.c | 15 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 190 | ||||
-rw-r--r-- | arch/x86_64/kernel/mpparse.c | 25 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 16 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 33 | ||||
-rw-r--r-- | arch/x86_64/kernel/signal.c | 30 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 42 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 22 | ||||
-rw-r--r-- | arch/x86_64/lib/delay.c | 7 | ||||
-rw-r--r-- | arch/x86_64/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 4 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 9 | ||||
-rw-r--r-- | arch/x86_64/mm/ioremap.c | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 8 | ||||
-rw-r--r-- | arch/x86_64/pci/k8-bus.c | 16 |
21 files changed, 365 insertions, 119 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 289f448ac89..db259757dc8 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -265,7 +265,7 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on NUMA default y @@ -274,6 +274,27 @@ config NUMA bool default n +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + config HAVE_DEC_LOCK bool depends on SMP @@ -381,6 +402,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu # diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh index 90f2452b3b9..f17b40dfc0f 100644 --- a/arch/x86_64/boot/install.sh +++ b/arch/x86_64/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index fbd09b5126c..66e2821533d 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -428,8 +428,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) return (void __user *)((rsp - frame_size) & -8UL); } -void ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs * regs) { struct sigframe __user *frame; int err = 0; @@ -514,14 +514,15 @@ void ia32_setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; int err = 0; @@ -613,9 +614,9 @@ void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } - diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 504e6347499..c9a6b812e92 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -40,11 +40,7 @@ int fix_aperture __initdata = 1; static u32 __init allocate_aperture(void) { -#ifdef CONFIG_DISCONTIGMEM pg_data_t *nd0 = NODE_DATA(0); -#else - pg_data_t *nd0 = &contig_page_data; -#endif u32 aper_size; void *p; diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index e3a19e8ebbf..9631c747c5e 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -2,20 +2,24 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> +#include <linux/tty.h> #include <asm/io.h> #include <asm/processor.h> /* Simple VGA output */ #ifdef __i386__ +#include <asm/setup.h> #define VGABASE (__ISA_IO_base + 0xb8000) #else +#include <asm/bootsetup.h> #define VGABASE ((void __iomem *)0xffffffff800b8000UL) #endif -#define MAX_YPOS 25 -#define MAX_XPOS 80 +#define MAX_YPOS max_ypos +#define MAX_XPOS max_xpos +static int max_ypos = 25, max_xpos = 80; static int current_ypos = 1, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) @@ -196,7 +200,10 @@ int __init setup_early_printk(char *opt) } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); early_console = &early_serial_console; - } else if (!strncmp(buf, "vga", 3)) { + } else if (!strncmp(buf, "vga", 3) + && SCREEN_INFO.orig_video_isVGA == 1) { + max_xpos = SCREEN_INFO.orig_video_cols; + max_ypos = SCREEN_INFO.orig_video_lines; early_console = &early_vga_console; } early_console_initialized = 1; diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 0f8c78dcd38..cf6ab147a2a 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -94,7 +94,7 @@ void __init x86_64_start_kernel(char * real_mode_data) s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) setup_early_printk(s); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA s = strstr(saved_command_line, "numa="); if (s != NULL) numa_setup(s+5); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 7873d9ba881..19eafa0aa95 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -157,14 +157,13 @@ static unsigned int startup_8259A_irq(unsigned int irq) } static struct hw_interrupt_type i8259A_irq_type = { - "XT-PIC", - startup_8259A_irq, - shutdown_8259A_irq, - enable_8259A_irq, - disable_8259A_irq, - mask_and_ack_8259A, - end_8259A_irq, - NULL + .typename = "XT-PIC", + .startup = startup_8259A_irq, + .shutdown = shutdown_8259A_irq, + .enable = enable_8259A_irq, + .disable = disable_8259A_irq, + .ack = mask_and_ack_8259A, + .end = end_8259A_irq, }; /* diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index f77f8a0ff18..4e680f87a75 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -27,6 +27,8 @@ * <prasanna@in.ibm.com> adapted for x86_64 * 2005-Mar Roland McGrath <roland@redhat.com> * Fixed to handle %rip-relative addressing mode correctly. + * 2005-May Rusty Lynch <rusty.lynch@intel.com> + * Added function return probes functionality */ #include <linux/config.h> @@ -37,18 +39,16 @@ #include <linux/slab.h> #include <linux/preempt.h> #include <linux/moduleloader.h> - +#include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/kdebug.h> static DECLARE_MUTEX(kprobe_mutex); -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_rsp; static kprobe_opcode_t *get_insn_slot(void); @@ -214,6 +214,21 @@ void arch_copy_kprobe(struct kprobe *p) BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ *ripdisp = disp; } + p->opcode = *p->addr; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } void arch_remove_kprobe(struct kprobe *p) @@ -223,10 +238,29 @@ void arch_remove_kprobe(struct kprobe *p) down(&kprobe_mutex); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) { - *p->addr = p->opcode; - regs->rip = (unsigned long)p->addr; + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_rflags_prev = kprobe_old_rflags; + kprobe_saved_rflags_prev = kprobe_saved_rflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_rflags = kprobe_old_rflags_prev; + kprobe_saved_rflags = kprobe_saved_rflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_rflags = kprobe_old_rflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->ainsn.insn)) + kprobe_saved_rflags &= ~IF_MASK; } static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -240,6 +274,50 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)regs->rsp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -264,9 +342,30 @@ int kprobe_handler(struct pt_regs *regs) regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); goto no_kprobe; + } else if (kprobe_status == KPROBE_HIT_SSDONE) { + /* TODO: Provide re-entrancy from + * post_kprobes_handler() and avoid exception + * stack corruption while single-stepping on + * the instruction of the new probe. + */ + arch_disarm_kprobe(p); + regs->rip = (unsigned long)p->addr; + ret = 1; + } else { + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the + * handler. We here save the original kprobe + * variables and just single step on instruction + * of the new probe without calling any user + * handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } - disarm_kprobe(p, regs); - ret = 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -296,11 +395,7 @@ int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_rflags = kprobe_old_rflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->ainsn.insn)) - kprobe_saved_rflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -317,6 +412,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = (unsigned long *)regs->rsp - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)regs->rsp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->rip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -401,13 +545,23 @@ int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_rflags; - unlock_kprobes(); + /* Restore the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } else { + unlock_kprobes(); + } +out: preempt_enable_no_resched(); /* diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 61a63be6b29..9c5aa2a790c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -23,6 +23,7 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/acpi.h> +#include <linux/module.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -45,7 +46,8 @@ int acpi_found_madt; int apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL }; +unsigned char pci_bus_to_node [256]; +EXPORT_SYMBOL(pci_bus_to_node); static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -904,11 +906,20 @@ void __init mp_config_acpi_legacy_irqs (void) return; } +#define MAX_GSI_NUM 4096 + int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, to the IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -943,11 +954,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index dce8bab4306..e59d1f9d616 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -34,6 +34,7 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -293,6 +294,14 @@ void exit_thread(void) { struct task_struct *me = current; struct thread_struct *t = &me->thread; + + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(me); + if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); @@ -312,6 +321,13 @@ void flush_thread(void) struct task_struct *tsk = current; struct thread_info *t = current_thread_info(); + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 99f038ede23..000015dd5a8 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -40,6 +40,8 @@ #include <linux/acpi.h> #include <linux/kallsyms.h> #include <linux/edd.h> +#include <linux/mmzone.h> + #include <asm/mtrr.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -348,7 +350,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (!memcmp(from, "mem=", 4)) parse_memopt(from+4, &from); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA if (!memcmp(from, "numa=", 5)) numa_setup(from+5); #endif @@ -377,17 +379,20 @@ static __init void parse_cmdline_early (char ** cmdline_p) *cmdline_p = command_line; } -#ifndef CONFIG_DISCONTIGMEM -static void __init contig_initmem_init(void) +#ifndef CONFIG_NUMA +static void __init +contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { - unsigned long bootmap_size, bootmap; - bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); - if (bootmap == -1L) - panic("Cannot find bootmem map of size %ld\n",bootmap_size); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); - e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT); - reserve_bootmem(bootmap, bootmap_size); + unsigned long bootmap_size, bootmap; + + memory_present(0, start_pfn, end_pfn); + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n",bootmap_size); + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); + e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); + reserve_bootmem(bootmap, bootmap_size); } #endif @@ -554,10 +559,10 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA numa_initmem_init(0, end_pfn); #else - contig_initmem_init(); + contig_initmem_init(0, end_pfn); #endif /* Reserve direct mapping */ @@ -618,6 +623,8 @@ void __init setup_arch(char **cmdline_p) } } #endif + + sparse_init(); paging_init(); check_ioapic(); diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 429c0269dc4..98b7ba95d58 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -34,9 +34,9 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs); -void ia32_setup_frame(int sig, struct k_sigaction *ka, +int ia32_setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs); asmlinkage long @@ -238,7 +238,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) return (void __user *)round_down(rsp - size, 16); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; @@ -327,20 +327,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { + int ret; + #ifdef DEBUG_SIG printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, @@ -384,20 +387,22 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { if (ka->sa.sa_flags & SA_SIGINFO) - ia32_setup_rt_frame(sig, ka, info, oldset, regs); + ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else - ia32_setup_frame(sig, ka, oldset, regs); + ret = ia32_setup_frame(sig, ka, oldset, regs); } else #endif - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -417,7 +422,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->cs & 3) != 3) + if (!user_mode(regs)) return 1; if (try_to_freeze(0)) @@ -434,11 +439,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (current->thread.debugreg7) - asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg7)); + set_debugreg(current->thread.debugreg7, 7); /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fb8c809b4cd..66bf6ddeb0c 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -64,6 +64,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ +static int hpet_use_timer; unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -105,7 +106,9 @@ static inline unsigned int do_gettimeoffset_tsc(void) static inline unsigned int do_gettimeoffset_hpet(void) { - return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32; + /* cap counter read to one tick to avoid inconsistencies */ + unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; + return (min(counter,hpet_tick) * vxtime.quot) >> 32; } unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; @@ -301,7 +304,7 @@ unsigned long long monotonic_clock(void) last_offset = vxtime.last; base = monotonic_base; - this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick; + this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); @@ -377,7 +380,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_seqlock(&xtime_lock); - if (vxtime.hpet_address) { + if (vxtime.hpet_address) + offset = hpet_readl(HPET_COUNTER); + + if (hpet_use_timer) { + /* if we're using the hpet timer functionality, + * we can more accurately know the counter value + * when the timer interrupt occured. + */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; } else { @@ -803,17 +813,18 @@ static int hpet_timer_stop_set_go(unsigned long tick) * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, * and period also hpet_tick. */ - - hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + if (hpet_use_timer) { + hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); - hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ - + hpet_writel(hpet_tick, HPET_T0_CMP); + hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ + cfg |= HPET_CFG_LEGACY; + } /* * Go! */ - cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY; + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); return 0; @@ -834,8 +845,7 @@ static int hpet_init(void) id = hpet_readl(HPET_ID); - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) || - !(id & HPET_ID_LEGSUP)) + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) return -1; hpet_period = hpet_readl(HPET_PERIOD); @@ -845,6 +855,8 @@ static int hpet_init(void) hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / hpet_period; + hpet_use_timer = (id & HPET_ID_LEGSUP); + return hpet_timer_stop_set_go(hpet_tick); } @@ -901,9 +913,11 @@ void __init time_init(void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - if (!hpet_init()) { + if (!hpet_init()) vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; + + if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER @@ -968,7 +982,7 @@ void __init time_init_gtod(void) if (unsynchronized_tsc()) notsc = 1; if (vxtime.hpet_address && notsc) { - timetype = "HPET"; + timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; @@ -983,7 +997,7 @@ void __init time_init_gtod(void) printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); #endif } else { - timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; + timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 3dfec8fdabc..121646fc43f 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -274,7 +274,7 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = (regs->cs & 3) == 0; + int in_kernel = !user_mode(regs); unsigned long rsp; const int cpu = safe_smp_processor_id(); struct task_struct *cur = cpu_pda[cpu].pcurrent; @@ -318,7 +318,7 @@ void handle_BUG(struct pt_regs *regs) struct bug_frame f; char tmp; - if (regs->cs & 3) + if (user_mode(regs)) return; if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame))) @@ -437,7 +437,7 @@ static void do_trap(int trapnr, int signr, char *str, } #endif - if ((regs->cs & 3) != 0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, signr)) @@ -522,7 +522,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) } #endif - if ((regs->cs & 3)!=0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, SIGSEGV)) @@ -638,7 +638,7 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) if (eregs == (struct pt_regs *)eregs->rsp) ; /* Exception from user space */ - else if (eregs->cs & 3) + else if (user_mode(eregs)) regs = ((struct pt_regs *)current->thread.rsp0) - 1; /* Exception from kernel and interrupts are enabled. Move to kernel process stack. */ @@ -669,7 +669,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) } #endif - asm("movq %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -697,7 +697,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) * allowing programs to debug themselves without the ptrace() * interface. */ - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; /* * Was the TF flag set by a debugger? If so, clear it now, @@ -715,13 +715,13 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_dr7; info.si_addr = (void __user *)regs->rip; force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - asm volatile("movq %0,%%db7"::"r"(0UL)); + set_debugreg(0UL, 7); return; clear_TF_reenable: @@ -756,7 +756,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) unsigned short cwd, swd; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel x87 math error")) return; @@ -822,7 +822,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) unsigned short mxcsr; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel simd math error")) return; diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index aed61a668a1..33a873a3c22 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/delay.h> #include <asm/delay.h> +#include <asm/msr.h> #ifdef CONFIG_SMP #include <asm/smp.h> @@ -19,6 +20,12 @@ int x86_udelay_tsc = 0; /* Delay via TSC */ +int read_current_timer(unsigned long *timer_value) +{ + rdtscll(*timer_value); + return 0; +} + void __delay(unsigned long loops) { unsigned bclock, now; diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 66c354ad80c..1d232a87f11 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o fault.o ioremap.o extable.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o obj-$(CONFIG_ACPI_NUMA) += srat.o diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 57d3ab15a5c..2f187986f94 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -74,7 +74,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, instr = (unsigned char *)convert_rip_to_linear(current, regs); max_instr = instr + 15; - if ((regs->cs & 3) != 0 && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; while (scan_more && instr < max_instr) { @@ -106,7 +106,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, /* Could check the LDT for lm, but for now it's good enough to assume that long mode only uses well known segments or kernel. */ - scan_more = ((regs->cs & 3) == 0) || (regs->cs == __USER_CS); + scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); break; case 0x60: diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index dbe53b4c7e6..72e4b364ed7 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -318,7 +318,7 @@ void zap_low_mappings(void) flush_tlb_all(); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NUMA void __init paging_init(void) { { @@ -427,13 +427,16 @@ void __init mem_init(void) reservedpages = 0; /* this will put all low memory onto the freelists */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA totalram_pages += numa_free_all_bootmem(); tmp = 0; /* should count reserved pages here for all nodes */ #else + +#ifdef CONFIG_FLATMEM max_mapnr = end_pfn; if (!mem_map) BUG(); +#endif totalram_pages += free_all_bootmem(); @@ -515,7 +518,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { /* Should check here against the e820 map to avoid double free */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA int nid = phys_to_nid(phys); reserve_bootmem_node(NODE_DATA(nid), phys, len); #else diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 58aac23760e..6972df480d2 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -178,7 +178,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) return (__force void __iomem *)phys_to_virt(phys_addr); -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM /* * Don't allow anybody to remap normal RAM that we're using.. */ diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index fd9f25d7a6c..84cde796ecb 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -66,6 +66,13 @@ int __init compute_hash_shift(struct node *nodes, int numnodes) return -1; } +#ifdef CONFIG_SPARSEMEM +int early_pfn_to_nid(unsigned long pfn) +{ + return phys_to_nid(pfn << PAGE_SHIFT); +} +#endif + /* Initialize bootmem allocator for a node */ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { @@ -80,6 +87,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; + memory_present(nodeid, start_pfn, end_pfn); nodedata_phys = find_e820_area(start, end, pgdat_size); if (nodedata_phys == -1L) panic("Cannot find memory pgdat in node %d\n", nodeid); diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 62349c78db5..7e7d0c2a002 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -53,25 +53,11 @@ fill_mp_bus_to_cpumask(void) for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) - pci_bus_to_cpumask[j] = - node_to_cpumask(NODE_ID(nid)); + pci_bus_to_node[j] = NODE_ID(nid); } } } - /* quick sanity check */ - printed = 0; - for (i = 0; i < 256; i++) { - if (cpus_empty(pci_bus_to_cpumask[i])) { - pci_bus_to_cpumask[i] = CPU_MASK_ALL; - if (printed) - continue; - printk(KERN_ERR - "k8-bus.c: some busses have empty cpu mask\n"); - printed = 1; - } - } - return 0; } |