diff options
Diffstat (limited to 'arch/x86_64/kernel')
31 files changed, 2035 insertions, 478 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 72fe60c20d3..059c88313f4 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - dmi_scan.o pci-dma.o pci-nommu.o + pci-dma.o pci-nommu.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -43,11 +43,9 @@ CFLAGS_vsyscall.o := $(PROFILING) -g0 bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o -topology-y += ../../i386/mach-default/topology.o +topology-y += ../../i386/kernel/topology.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o -dmi_scan-y += ../../i386/kernel/dmi_scan.o - diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index e4e2b7d01f8..70b9d21ed67 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -60,7 +60,7 @@ static u32 __init allocate_aperture(void) printk("Cannot allocate aperture memory hole (%p,%uK)\n", p, aper_size>>10); if (p) - free_bootmem_node(nd0, (unsigned long)p, aper_size); + free_bootmem_node(nd0, __pa(p), aper_size); return 0; } printk("Mapping aperture over %d KB of RAM @ %lx\n", @@ -80,7 +80,7 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) printk("Aperture from %s beyond 4GB. Ignoring.\n",name); return 0; } - if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) { + if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); return 0; } @@ -161,7 +161,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) int num, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { + for (num = 0; num < 256; num++) { for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class, cap; @@ -248,7 +248,7 @@ void __init iommu_hole_init(void) /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ - } else if ((!no_iommu && end_pfn >= MAX_DMA32_PFN) || + } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || fallback_aper_force) { diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 6147770b434..100a30c4004 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -342,6 +342,7 @@ void __init init_bsp_APIC(void) void __cpuinit setup_local_APIC (void) { unsigned int value, maxlvt; + int i, j; value = apic_read(APIC_LVR); @@ -371,6 +372,25 @@ void __cpuinit setup_local_APIC (void) apic_write(APIC_TASKPRI, value); /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1<<j)) + ack_APIC_irq(); + } + } + + /* * Now that we are all set up, enable the APIC */ value = apic_read(APIC_SPIV); @@ -595,7 +615,7 @@ static int __init apic_set_verbosity(char *str) printk(KERN_WARNING "APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug", str); - return 0; + return 1; } __setup("apic=", apic_set_verbosity); @@ -708,7 +728,7 @@ static void setup_APIC_timer(unsigned int clocks) local_irq_save(flags); /* wait for irq slice */ - if (vxtime.hpet_address) { + if (vxtime.hpet_address && hpet_use_timer) { int trigger = hpet_readl(HPET_T0_CMP); while (hpet_readl(HPET_COUNTER) >= trigger) /* do nothing */ ; @@ -1117,41 +1137,42 @@ int __init APIC_init_uniprocessor (void) static __init int setup_disableapic(char *str) { disable_apic = 1; - return 0; + return 1; } static __init int setup_nolapic(char *str) { disable_apic = 1; - return 0; + return 1; } static __init int setup_noapictimer(char *str) { if (str[0] != ' ' && str[0] != 0) - return -1; + return 0; disable_apic_timer = 1; - return 0; + return 1; } static __init int setup_apicmaintimer(char *str) { apic_runs_main_timer = 1; nohpet = 1; - return 0; + return 1; } __setup("apicmaintimer", setup_apicmaintimer); static __init int setup_noapicmaintimer(char *str) { apic_runs_main_timer = -1; - return 0; + return 1; } __setup("noapicmaintimer", setup_noapicmaintimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; + notsc_setup(NULL); return setup_apicmaintimer(NULL); } __setup("apicpmtimer", setup_apicpmtimer); diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 293cd71a266..1ef6028f721 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -76,11 +76,22 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) *addrp = __pa_symbol(&_end); return 1; } + + if (last >= ebda_addr && addr < ebda_addr + ebda_size) { + *addrp = ebda_addr + ebda_size; + return 1; + } + /* XXX ramdisk image here? */ return 0; } -int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) +/* + * This function checks if any part of the range <start,end> is mapped + * with type. + */ +int __meminit +e820_any_mapped(unsigned long start, unsigned long end, unsigned type) { int i; for (i = 0; i < e820.nr_map; i++) { @@ -94,6 +105,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) return 0; } +/* + * This function checks if the entire range <start,end> is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +{ + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of <start,end> we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + /* * Find a free area in a specific range. */ @@ -109,7 +149,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi addr = start; if (addr > ei->addr + ei->size) continue; - while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size) + while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; last = addr + size; if (last > ei->addr + ei->size) diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 6dffb498ccd..b93ef5b5198 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -17,11 +17,8 @@ #define VGABASE ((void __iomem *)0xffffffff800b8000UL) #endif -#define MAX_YPOS max_ypos -#define MAX_XPOS max_xpos - static int max_ypos = 25, max_xpos = 80; -static int current_ypos = 1, current_xpos = 0; +static int current_ypos = 25, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) { @@ -29,26 +26,26 @@ static void early_vga_write(struct console *con, const char *str, unsigned n) int i, k, j; while ((c = *str++) != '\0' && n-- > 0) { - if (current_ypos >= MAX_YPOS) { + if (current_ypos >= max_ypos) { /* scroll 1 line up */ - for (k = 1, j = 0; k < MAX_YPOS; k++, j++) { - for (i = 0; i < MAX_XPOS; i++) { - writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), - VGABASE + 2*(MAX_XPOS*j + i)); + for (k = 1, j = 0; k < max_ypos; k++, j++) { + for (i = 0; i < max_xpos; i++) { + writew(readw(VGABASE+2*(max_xpos*k+i)), + VGABASE + 2*(max_xpos*j + i)); } } - for (i = 0; i < MAX_XPOS; i++) - writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); - current_ypos = MAX_YPOS-1; + for (i = 0; i < max_xpos; i++) + writew(0x720, VGABASE + 2*(max_xpos*j + i)); + current_ypos = max_ypos-1; } if (c == '\n') { current_xpos = 0; current_ypos++; } else if (c != '\r') { writew(((0x7 << 8) | (unsigned short) c), - VGABASE + 2*(MAX_XPOS*current_ypos + + VGABASE + 2*(max_xpos*current_ypos + current_xpos++)); - if (current_xpos >= MAX_XPOS) { + if (current_xpos >= max_xpos) { current_xpos = 0; current_ypos++; } @@ -63,7 +60,7 @@ static struct console early_vga_console = { .index = -1, }; -/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ +/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ static int early_serial_base = 0x3f8; /* ttyS0 */ @@ -83,30 +80,30 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ #define DLL 0 /* Divisor Latch Low */ #define DLH 1 /* Divisor latch High */ -static int early_serial_putc(unsigned char ch) -{ - unsigned timeout = 0xffff; - while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) +static int early_serial_putc(unsigned char ch) +{ + unsigned timeout = 0xffff; + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) cpu_relax(); outb(ch, early_serial_base + TXR); return timeout ? 0 : -1; -} +} static void early_serial_write(struct console *con, const char *s, unsigned n) { - while (*s && n-- > 0) { - early_serial_putc(*s); - if (*s == '\n') - early_serial_putc('\r'); - s++; - } -} + while (*s && n-- > 0) { + early_serial_putc(*s); + if (*s == '\n') + early_serial_putc('\r'); + s++; + } +} #define DEFAULT_BAUD 9600 static __init void early_serial_init(char *s) { - unsigned char c; + unsigned char c; unsigned divisor; unsigned baud = DEFAULT_BAUD; char *e; @@ -115,7 +112,7 @@ static __init void early_serial_init(char *s) ++s; if (*s) { - unsigned port; + unsigned port; if (!strncmp(s,"0x",2)) { early_serial_base = simple_strtoul(s, &e, 16); } else { @@ -139,16 +136,16 @@ static __init void early_serial_init(char *s) outb(0x3, early_serial_base + MCR); /* DTR + RTS */ if (*s) { - baud = simple_strtoul(s, &e, 0); - if (baud == 0 || s == e) + baud = simple_strtoul(s, &e, 0); + if (baud == 0 || s == e) baud = DEFAULT_BAUD; - } - - divisor = 115200 / baud; - c = inb(early_serial_base + LCR); - outb(c | DLAB, early_serial_base + LCR); - outb(divisor & 0xff, early_serial_base + DLL); - outb((divisor >> 8) & 0xff, early_serial_base + DLH); + } + + divisor = 115200 / baud; + c = inb(early_serial_base + LCR); + outb(c | DLAB, early_serial_base + LCR); + outb(divisor & 0xff, early_serial_base + DLL); + outb((divisor >> 8) & 0xff, early_serial_base + DLH); outb(c & ~DLAB, early_serial_base + LCR); } @@ -205,67 +202,68 @@ struct console *early_console = &early_vga_console; static int early_console_initialized = 0; void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; +{ + char buf[512]; + int n; va_list ap; - va_start(ap,fmt); + va_start(ap,fmt); n = vscnprintf(buf,512,fmt,ap); early_console->write(early_console,buf,n); - va_end(ap); -} + va_end(ap); +} static int __initdata keep_early; -int __init setup_early_printk(char *opt) -{ +int __init setup_early_printk(char *opt) +{ char *space; - char buf[256]; + char buf[256]; if (early_console_initialized) - return -1; + return 1; - strlcpy(buf,opt,sizeof(buf)); - space = strchr(buf, ' '); + strlcpy(buf,opt,sizeof(buf)); + space = strchr(buf, ' '); if (space) - *space = 0; + *space = 0; if (strstr(buf,"keep")) - keep_early = 1; + keep_early = 1; - if (!strncmp(buf, "serial", 6)) { + if (!strncmp(buf, "serial", 6)) { early_serial_init(buf + 6); early_console = &early_serial_console; - } else if (!strncmp(buf, "ttyS", 4)) { + } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); - early_console = &early_serial_console; + early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3) && SCREEN_INFO.orig_video_isVGA == 1) { max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; - early_console = &early_vga_console; + current_ypos = SCREEN_INFO.orig_y; + early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; } early_console_initialized = 1; - register_console(early_console); + register_console(early_console); return 0; } void __init disable_early_printk(void) -{ +{ if (!early_console_initialized || !early_console) return; if (!keep_early) { printk("disabling early console\n"); unregister_console(early_console); early_console_initialized = 0; - } else { + } else { printk("keeping early console\n"); } -} +} __setup("earlyprintk=", setup_early_printk); diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index b150c87a08c..586b34c00c4 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -180,6 +180,10 @@ rff_trace: * * XXX if we had a free scratch register we could save the RSP into the stack frame * and report it properly in ps. Unfortunately we haven't. + * + * When user can change the frames always force IRET. That is because + * it deals with uncanonical addresses better. SYSRET has trouble + * with them due to bugs in both AMD and Intel CPUs. */ ENTRY(system_call) @@ -254,7 +258,10 @@ sysret_signal: xorl %esi,%esi # oldset -> arg2 call ptregscall_common 1: movl $_TIF_NEED_RESCHED,%edi - jmp sysret_check + /* Use IRET because user could have changed frame. This + works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ + cli + jmp int_with_check badsys: movq $-ENOSYS,RAX-ARGOFFSET(%rsp) @@ -274,13 +281,9 @@ tracesys: ja 1f movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) -1: SAVE_REST - movq %rsp,%rdi - call syscall_trace_leave - RESTORE_TOP_OF_STACK %rbx - RESTORE_REST - jmp ret_from_sys_call +1: movq %rax,RAX-ARGOFFSET(%rsp) + /* Use IRET because user could have changed frame */ + jmp int_ret_from_sys_call CFI_ENDPROC /* @@ -408,25 +411,9 @@ ENTRY(stub_execve) CFI_ADJUST_CFA_OFFSET -8 CFI_REGISTER rip, r11 SAVE_REST - movq %r11, %r15 - CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call sys_execve - GET_THREAD_INFO(%rcx) - bt $TIF_IA32,threadinfo_flags(%rcx) - CFI_REMEMBER_STATE - jc exec_32bit RESTORE_TOP_OF_STACK %r11 - movq %r15, %r11 - CFI_REGISTER rip, r11 - RESTORE_REST - pushq %r11 - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rip, 0 - ret - -exec_32bit: - CFI_RESTORE_STATE movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call @@ -553,7 +540,8 @@ iret_label: /* force a signal here? this matches i386 behaviour */ /* running with kernel gs */ bad_iret: - movq $-9999,%rdi /* better code? */ + movq $11,%rdi /* SIGSEGV */ + sti jmp do_exit .previous diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist new file mode 100644 index 00000000000..2bcebdc3eed --- /dev/null +++ b/arch/x86_64/kernel/functionlist @@ -0,0 +1,1286 @@ +*(.text.flush_thread) +*(.text.check_poison_obj) +*(.text.copy_page) +*(.text.__set_personality) +*(.text.gart_map_sg) +*(.text.kmem_cache_free) +*(.text.find_get_page) +*(.text._raw_spin_lock) +*(.text.ide_outb) +*(.text.unmap_vmas) +*(.text.copy_page_range) +*(.text.kprobe_handler) +*(.text.__handle_mm_fault) +*(.text.__d_lookup) +*(.text.copy_user_generic) +*(.text.__link_path_walk) +*(.text.get_page_from_freelist) +*(.text.kmem_cache_alloc) +*(.text.drive_cmd_intr) +*(.text.ia32_setup_sigcontext) +*(.text.huge_pte_offset) +*(.text.do_page_fault) +*(.text.page_remove_rmap) +*(.text.release_pages) +*(.text.ide_end_request) +*(.text.__mutex_lock_slowpath) +*(.text.__find_get_block) +*(.text.kfree) +*(.text.vfs_read) +*(.text._raw_spin_unlock) +*(.text.free_hot_cold_page) +*(.text.fget_light) +*(.text.schedule) +*(.text.memcmp) +*(.text.touch_atime) +*(.text.__might_sleep) +*(.text.__down_read_trylock) +*(.text.arch_pick_mmap_layout) +*(.text.find_vma) +*(.text.__make_request) +*(.text.do_generic_mapping_read) +*(.text.mutex_lock_interruptible) +*(.text.__generic_file_aio_read) +*(.text._atomic_dec_and_lock) +*(.text.__wake_up_bit) +*(.text.add_to_page_cache) +*(.text.cache_alloc_debugcheck_after) +*(.text.vm_normal_page) +*(.text.mutex_debug_check_no_locks_freed) +*(.text.net_rx_action) +*(.text.__find_first_zero_bit) +*(.text.put_page) +*(.text._raw_read_lock) +*(.text.__delay) +*(.text.dnotify_parent) +*(.text.do_path_lookup) +*(.text.do_sync_read) +*(.text.do_lookup) +*(.text.bit_waitqueue) +*(.text.file_read_actor) +*(.text.strncpy_from_user) +*(.text.__pagevec_lru_add_active) +*(.text.fget) +*(.text.dput) +*(.text.__strnlen_user) +*(.text.inotify_inode_queue_event) +*(.text.rw_verify_area) +*(.text.ide_intr) +*(.text.inotify_dentry_parent_queue_event) +*(.text.permission) +*(.text.memscan) +*(.text.hpet_rtc_interrupt) +*(.text.do_mmap_pgoff) +*(.text.current_fs_time) +*(.text.vfs_getattr) +*(.text.kmem_flagcheck) +*(.text.mark_page_accessed) +*(.text.free_pages_and_swap_cache) +*(.text.generic_fillattr) +*(.text.__block_prepare_write) +*(.text.__set_page_dirty_nobuffers) +*(.text.link_path_walk) +*(.text.find_get_pages_tag) +*(.text.ide_do_request) +*(.text.__alloc_pages) +*(.text.generic_permission) +*(.text.mod_page_state_offset) +*(.text.free_pgd_range) +*(.text.generic_file_buffered_write) +*(.text.number) +*(.text.ide_do_rw_disk) +*(.text.__brelse) +*(.text.__mod_page_state_offset) +*(.text.rotate_reclaimable_page) +*(.text.find_vma_prepare) +*(.text.find_vma_prev) +*(.text.lru_cache_add_active) +*(.text.__kmalloc_track_caller) +*(.text.smp_invalidate_interrupt) +*(.text.handle_IRQ_event) +*(.text.__find_get_block_slow) +*(.text.do_wp_page) +*(.text.do_select) +*(.text.set_user_nice) +*(.text.sys_read) +*(.text.do_munmap) +*(.text.csum_partial) +*(.text.__do_softirq) +*(.text.may_open) +*(.text.getname) +*(.text.get_empty_filp) +*(.text.__fput) +*(.text.remove_mapping) +*(.text.filp_ctor) +*(.text.poison_obj) +*(.text.unmap_region) +*(.text.test_set_page_writeback) +*(.text.__do_page_cache_readahead) +*(.text.sock_def_readable) +*(.text.ide_outl) +*(.text.shrink_zone) +*(.text.rb_insert_color) +*(.text.get_request) +*(.text.sys_pread64) +*(.text.spin_bug) +*(.text.ide_outsl) +*(.text.mask_and_ack_8259A) +*(.text.filemap_nopage) +*(.text.page_add_file_rmap) +*(.text.find_lock_page) +*(.text.tcp_poll) +*(.text.__mark_inode_dirty) +*(.text.file_ra_state_init) +*(.text.generic_file_llseek) +*(.text.__pagevec_lru_add) +*(.text.page_cache_readahead) +*(.text.n_tty_receive_buf) +*(.text.zonelist_policy) +*(.text.vma_adjust) +*(.text.test_clear_page_dirty) +*(.text.sync_buffer) +*(.text.do_exit) +*(.text.__bitmap_weight) +*(.text.alloc_pages_current) +*(.text.get_unused_fd) +*(.text.zone_watermark_ok) +*(.text.cpuset_update_task_memory_state) +*(.text.__bitmap_empty) +*(.text.sys_munmap) +*(.text.__inode_dir_notify) +*(.text.__generic_file_aio_write_nolock) +*(.text.__pte_alloc) +*(.text.sys_select) +*(.text.vm_acct_memory) +*(.text.vfs_write) +*(.text.__lru_add_drain) +*(.text.prio_tree_insert) +*(.text.generic_file_aio_read) +*(.text.vma_merge) +*(.text.block_write_full_page) +*(.text.__page_set_anon_rmap) +*(.text.apic_timer_interrupt) +*(.text.release_console_sem) +*(.text.sys_write) +*(.text.sys_brk) +*(.text.dup_mm) +*(.text.read_current_timer) +*(.text.ll_rw_block) +*(.text.blk_rq_map_sg) +*(.text.dbg_userword) +*(.text.__block_commit_write) +*(.text.cache_grow) +*(.text.copy_strings) +*(.text.release_task) +*(.text.do_sync_write) +*(.text.unlock_page) +*(.text.load_elf_binary) +*(.text.__follow_mount) +*(.text.__getblk) +*(.text.do_sys_open) +*(.text.current_kernel_time) +*(.text.call_rcu) +*(.text.write_chan) +*(.text.vsnprintf) +*(.text.dummy_inode_setsecurity) +*(.text.submit_bh) +*(.text.poll_freewait) +*(.text.bio_alloc_bioset) +*(.text.skb_clone) +*(.text.page_waitqueue) +*(.text.__mutex_lock_interruptible_slowpath) +*(.text.get_index) +*(.text.csum_partial_copy_generic) +*(.text.bad_range) +*(.text.remove_vma) +*(.text.cp_new_stat) +*(.text.alloc_arraycache) +*(.text.test_clear_page_writeback) +*(.text.strsep) +*(.text.open_namei) +*(.text._raw_read_unlock) +*(.text.get_vma_policy) +*(.text.__down_write_trylock) +*(.text.find_get_pages) +*(.text.tcp_rcv_established) +*(.text.generic_make_request) +*(.text.__block_write_full_page) +*(.text.cfq_set_request) +*(.text.sys_inotify_init) +*(.text.split_vma) +*(.text.__mod_timer) +*(.text.get_options) +*(.text.vma_link) +*(.text.mpage_writepages) +*(.text.truncate_complete_page) +*(.text.tcp_recvmsg) +*(.text.sigprocmask) +*(.text.filemap_populate) +*(.text.sys_close) +*(.text.inotify_dev_queue_event) +*(.text.do_task_stat) +*(.text.__dentry_open) +*(.text.unlink_file_vma) +*(.text.__pollwait) +*(.text.packet_rcv_spkt) +*(.text.drop_buffers) +*(.text.free_pgtables) +*(.text.generic_file_direct_write) +*(.text.copy_process) +*(.text.netif_receive_skb) +*(.text.dnotify_flush) +*(.text.print_bad_pte) +*(.text.anon_vma_unlink) +*(.text.sys_mprotect) +*(.text.sync_sb_inodes) +*(.text.find_inode_fast) +*(.text.dummy_inode_readlink) +*(.text.putname) +*(.text.init_smp_flush) +*(.text.dbg_redzone2) +*(.text.sk_run_filter) +*(.text.may_expand_vm) +*(.text.generic_file_aio_write) +*(.text.find_next_zero_bit) +*(.text.file_kill) +*(.text.audit_getname) +*(.text.arch_unmap_area_topdown) +*(.text.alloc_page_vma) +*(.text.tcp_transmit_skb) +*(.text.rb_next) +*(.text.dbg_redzone1) +*(.text.generic_file_mmap) +*(.text.vfs_fstat) +*(.text.sys_time) +*(.text.page_lock_anon_vma) +*(.text.get_unmapped_area) +*(.text.remote_llseek) +*(.text.__up_read) +*(.text.fd_install) +*(.text.eventpoll_init_file) +*(.text.dma_alloc_coherent) +*(.text.create_empty_buffers) +*(.text.__mutex_unlock_slowpath) +*(.text.dup_fd) +*(.text.d_alloc) +*(.text.tty_ldisc_try) +*(.text.sys_stime) +*(.text.__rb_rotate_right) +*(.text.d_validate) +*(.text.rb_erase) +*(.text.path_release) +*(.text.memmove) +*(.text.invalidate_complete_page) +*(.text.clear_inode) +*(.text.cache_estimate) +*(.text.alloc_buffer_head) +*(.text.smp_call_function_interrupt) +*(.text.flush_tlb_others) +*(.text.file_move) +*(.text.balance_dirty_pages_ratelimited) +*(.text.vma_prio_tree_add) +*(.text.timespec_trunc) +*(.text.mempool_alloc) +*(.text.iget_locked) +*(.text.d_alloc_root) +*(.text.cpuset_populate_dir) +*(.text.anon_vma_prepare) +*(.text.sys_newstat) +*(.text.alloc_page_interleave) +*(.text.__path_lookup_intent_open) +*(.text.__pagevec_free) +*(.text.inode_init_once) +*(.text.free_vfsmnt) +*(.text.__user_walk_fd) +*(.text.cfq_idle_slice_timer) +*(.text.sys_mmap) +*(.text.sys_llseek) +*(.text.prio_tree_remove) +*(.text.filp_close) +*(.text.file_permission) +*(.text.vma_prio_tree_remove) +*(.text.tcp_ack) +*(.text.nameidata_to_filp) +*(.text.sys_lseek) +*(.text.percpu_counter_mod) +*(.text.igrab) +*(.text.__bread) +*(.text.alloc_inode) +*(.text.filldir) +*(.text.__rb_rotate_left) +*(.text.irq_affinity_write_proc) +*(.text.init_request_from_bio) +*(.text.find_or_create_page) +*(.text.tty_poll) +*(.text.tcp_sendmsg) +*(.text.ide_wait_stat) +*(.text.free_buffer_head) +*(.text.flush_signal_handlers) +*(.text.tcp_v4_rcv) +*(.text.nr_blockdev_pages) +*(.text.locks_remove_flock) +*(.text.__iowrite32_copy) +*(.text.do_filp_open) +*(.text.try_to_release_page) +*(.text.page_add_new_anon_rmap) +*(.text.kmem_cache_size) +*(.text.eth_type_trans) +*(.text.try_to_free_buffers) +*(.text.schedule_tail) +*(.text.proc_lookup) +*(.text.no_llseek) +*(.text.kfree_skbmem) +*(.text.do_wait) +*(.text.do_mpage_readpage) +*(.text.vfs_stat_fd) +*(.text.tty_write) +*(.text.705) +*(.text.sync_page) +*(.text.__remove_shared_vm_struct) +*(.text.__kfree_skb) +*(.text.sock_poll) +*(.text.get_request_wait) +*(.text.do_sigaction) +*(.text.do_brk) +*(.text.tcp_event_data_recv) +*(.text.read_chan) +*(.text.pipe_writev) +*(.text.__emul_lookup_dentry) +*(.text.rtc_get_rtc_time) +*(.text.print_objinfo) +*(.text.file_update_time) +*(.text.do_signal) +*(.text.disable_8259A_irq) +*(.text.blk_queue_bounce) +*(.text.__anon_vma_link) +*(.text.__vma_link) +*(.text.vfs_rename) +*(.text.sys_newlstat) +*(.text.sys_newfstat) +*(.text.sys_mknod) +*(.text.__show_regs) +*(.text.iput) +*(.text.get_signal_to_deliver) +*(.text.flush_tlb_page) +*(.text.debug_mutex_wake_waiter) +*(.text.copy_thread) +*(.text.clear_page_dirty_for_io) +*(.text.buffer_io_error) +*(.text.vfs_permission) +*(.text.truncate_inode_pages_range) +*(.text.sys_recvfrom) +*(.text.remove_suid) +*(.text.mark_buffer_dirty) +*(.text.local_bh_enable) +*(.text.get_zeroed_page) +*(.text.get_vmalloc_info) +*(.text.flush_old_exec) +*(.text.dummy_inode_permission) +*(.text.__bio_add_page) +*(.text.prio_tree_replace) +*(.text.notify_change) +*(.text.mntput_no_expire) +*(.text.fput) +*(.text.__end_that_request_first) +*(.text.wake_up_bit) +*(.text.unuse_mm) +*(.text.skb_release_data) +*(.text.shrink_icache_memory) +*(.text.sched_balance_self) +*(.text.__pmd_alloc) +*(.text.pipe_poll) +*(.text.normal_poll) +*(.text.__free_pages) +*(.text.follow_mount) +*(.text.cdrom_start_packet_command) +*(.text.blk_recount_segments) +*(.text.bio_put) +*(.text.__alloc_skb) +*(.text.__wake_up) +*(.text.vm_stat_account) +*(.text.sys_fcntl) +*(.text.sys_fadvise64) +*(.text._raw_write_unlock) +*(.text.__pud_alloc) +*(.text.alloc_page_buffers) +*(.text.vfs_llseek) +*(.text.sockfd_lookup) +*(.text._raw_write_lock) +*(.text.put_compound_page) +*(.text.prune_dcache) +*(.text.pipe_readv) +*(.text.mempool_free) +*(.text.make_ahead_window) +*(.text.lru_add_drain) +*(.text.constant_test_bit) +*(.text.__clear_user) +*(.text.arch_unmap_area) +*(.text.anon_vma_link) +*(.text.sys_chroot) +*(.text.setup_arg_pages) +*(.text.radix_tree_preload) +*(.text.init_rwsem) +*(.text.generic_osync_inode) +*(.text.generic_delete_inode) +*(.text.do_sys_poll) +*(.text.dev_queue_xmit) +*(.text.default_llseek) +*(.text.__writeback_single_inode) +*(.text.vfs_ioctl) +*(.text.__up_write) +*(.text.unix_poll) +*(.text.sys_rt_sigprocmask) +*(.text.sock_recvmsg) +*(.text.recalc_bh_state) +*(.text.__put_unused_fd) +*(.text.process_backlog) +*(.text.locks_remove_posix) +*(.text.lease_modify) +*(.text.expand_files) +*(.text.end_buffer_read_nobh) +*(.text.d_splice_alias) +*(.text.debug_mutex_init_waiter) +*(.text.copy_from_user) +*(.text.cap_vm_enough_memory) +*(.text.show_vfsmnt) +*(.text.release_sock) +*(.text.pfifo_fast_enqueue) +*(.text.half_md4_transform) +*(.text.fs_may_remount_ro) +*(.text.do_fork) +*(.text.copy_hugetlb_page_range) +*(.text.cache_free_debugcheck) +*(.text.__tcp_select_window) +*(.text.task_handoff_register) +*(.text.sys_open) +*(.text.strlcpy) +*(.text.skb_copy_datagram_iovec) +*(.text.set_up_list3s) +*(.text.release_open_intent) +*(.text.qdisc_restart) +*(.text.n_tty_chars_in_buffer) +*(.text.inode_change_ok) +*(.text.__downgrade_write) +*(.text.debug_mutex_unlock) +*(.text.add_timer_randomness) +*(.text.sock_common_recvmsg) +*(.text.set_bh_page) +*(.text.printk_lock) +*(.text.path_release_on_umount) +*(.text.ip_output) +*(.text.ide_build_dmatable) +*(.text.__get_user_8) +*(.text.end_buffer_read_sync) +*(.text.__d_path) +*(.text.d_move) +*(.text.del_timer) +*(.text.constant_test_bit) +*(.text.blockable_page_cache_readahead) +*(.text.tty_read) +*(.text.sys_readlink) +*(.text.sys_faccessat) +*(.text.read_swap_cache_async) +*(.text.pty_write_room) +*(.text.page_address_in_vma) +*(.text.kthread) +*(.text.cfq_exit_io_context) +*(.text.__tcp_push_pending_frames) +*(.text.sys_pipe) +*(.text.submit_bio) +*(.text.pid_revalidate) +*(.text.page_referenced_file) +*(.text.lock_sock) +*(.text.get_page_state_node) +*(.text.generic_block_bmap) +*(.text.do_setitimer) +*(.text.dev_queue_xmit_nit) +*(.text.copy_from_read_buf) +*(.text.__const_udelay) +*(.text.console_conditional_schedule) +*(.text.wake_up_new_task) +*(.text.wait_for_completion_interruptible) +*(.text.tcp_rcv_rtt_update) +*(.text.sys_mlockall) +*(.text.set_fs_altroot) +*(.text.schedule_timeout) +*(.text.nr_free_pagecache_pages) +*(.text.nf_iterate) +*(.text.mapping_tagged) +*(.text.ip_queue_xmit) +*(.text.ip_local_deliver) +*(.text.follow_page) +*(.text.elf_map) +*(.text.dummy_file_permission) +*(.text.dispose_list) +*(.text.dentry_open) +*(.text.dentry_iput) +*(.text.bio_alloc) +*(.text.alloc_skb_from_cache) +*(.text.wait_on_page_bit) +*(.text.vfs_readdir) +*(.text.vfs_lstat) +*(.text.seq_escape) +*(.text.__posix_lock_file) +*(.text.mm_release) +*(.text.kref_put) +*(.text.ip_rcv) +*(.text.__iget) +*(.text.free_pages) +*(.text.find_mergeable_anon_vma) +*(.text.find_extend_vma) +*(.text.dummy_inode_listsecurity) +*(.text.bio_add_page) +*(.text.__vm_enough_memory) +*(.text.vfs_stat) +*(.text.tty_paranoia_check) +*(.text.tcp_read_sock) +*(.text.tcp_data_queue) +*(.text.sys_uname) +*(.text.sys_renameat) +*(.text.__strncpy_from_user) +*(.text.__mutex_init) +*(.text.__lookup_hash) +*(.text.kref_get) +*(.text.ip_route_input) +*(.text.__insert_inode_hash) +*(.text.do_sock_write) +*(.text.blk_done_softirq) +*(.text.__wake_up_sync) +*(.text.__vma_link_rb) +*(.text.tty_ioctl) +*(.text.tracesys) +*(.text.sys_getdents) +*(.text.sys_dup) +*(.text.stub_execve) +*(.text.sha_transform) +*(.text.radix_tree_tag_clear) +*(.text.put_unused_fd) +*(.text.put_files_struct) +*(.text.mpage_readpages) +*(.text.may_delete) +*(.text.kmem_cache_create) +*(.text.ip_mc_output) +*(.text.interleave_nodes) +*(.text.groups_search) +*(.text.generic_drop_inode) +*(.text.generic_commit_write) +*(.text.fcntl_setlk) +*(.text.exit_mmap) +*(.text.end_page_writeback) +*(.text.__d_rehash) +*(.text.debug_mutex_free_waiter) +*(.text.csum_ipv6_magic) +*(.text.count) +*(.text.cleanup_rbuf) +*(.text.check_spinlock_acquired_node) +*(.text.can_vma_merge_after) +*(.text.bio_endio) +*(.text.alloc_pidmap) +*(.text.write_ldt) +*(.text.vmtruncate_range) +*(.text.vfs_create) +*(.text.__user_walk) +*(.text.update_send_head) +*(.text.unmap_underlying_metadata) +*(.text.tty_ldisc_deref) +*(.text.tcp_setsockopt) +*(.text.tcp_send_ack) +*(.text.sys_pause) +*(.text.sys_gettimeofday) +*(.text.sync_dirty_buffer) +*(.text.strncmp) +*(.text.release_posix_timer) +*(.text.proc_file_read) +*(.text.prepare_to_wait) +*(.text.locks_mandatory_locked) +*(.text.interruptible_sleep_on_timeout) +*(.text.inode_sub_bytes) +*(.text.in_group_p) +*(.text.hrtimer_try_to_cancel) +*(.text.filldir64) +*(.text.fasync_helper) +*(.text.dummy_sb_pivotroot) +*(.text.d_lookup) +*(.text.d_instantiate) +*(.text.__d_find_alias) +*(.text.cpu_idle_wait) +*(.text.cond_resched_lock) +*(.text.chown_common) +*(.text.blk_congestion_wait) +*(.text.activate_page) +*(.text.unlock_buffer) +*(.text.tty_wakeup) +*(.text.tcp_v4_do_rcv) +*(.text.tcp_current_mss) +*(.text.sys_openat) +*(.text.sys_fchdir) +*(.text.strnlen_user) +*(.text.strnlen) +*(.text.strchr) +*(.text.sock_common_getsockopt) +*(.text.skb_checksum) +*(.text.remove_wait_queue) +*(.text.rb_replace_node) +*(.text.radix_tree_node_ctor) +*(.text.pty_chars_in_buffer) +*(.text.profile_hit) +*(.text.prio_tree_left) +*(.text.pgd_clear_bad) +*(.text.pfifo_fast_dequeue) +*(.text.page_referenced) +*(.text.open_exec) +*(.text.mmput) +*(.text.mm_init) +*(.text.__ide_dma_off_quietly) +*(.text.ide_dma_intr) +*(.text.hrtimer_start) +*(.text.get_io_context) +*(.text.__get_free_pages) +*(.text.find_first_zero_bit) +*(.text.file_free_rcu) +*(.text.dummy_socket_sendmsg) +*(.text.do_unlinkat) +*(.text.do_arch_prctl) +*(.text.destroy_inode) +*(.text.can_vma_merge_before) +*(.text.block_sync_page) +*(.text.block_prepare_write) +*(.text.bio_init) +*(.text.arch_ptrace) +*(.text.wake_up_inode) +*(.text.wait_on_retry_sync_kiocb) +*(.text.vma_prio_tree_next) +*(.text.tcp_rcv_space_adjust) +*(.text.__tcp_ack_snd_check) +*(.text.sys_utime) +*(.text.sys_recvmsg) +*(.text.sys_mremap) +*(.text.sys_bdflush) +*(.text.sleep_on) +*(.text.set_page_dirty_lock) +*(.text.seq_path) +*(.text.schedule_timeout_interruptible) +*(.text.sched_fork) +*(.text.rt_run_flush) +*(.text.profile_munmap) +*(.text.prepare_binprm) +*(.text.__pagevec_release_nonlru) +*(.text.m_show) +*(.text.lookup_mnt) +*(.text.__lookup_mnt) +*(.text.lock_timer_base) +*(.text.is_subdir) +*(.text.invalidate_bh_lru) +*(.text.init_buffer_head) +*(.text.ifind_fast) +*(.text.ide_dma_start) +*(.text.__get_page_state) +*(.text.flock_to_posix_lock) +*(.text.__find_symbol) +*(.text.do_futex) +*(.text.do_execve) +*(.text.dirty_writeback_centisecs_handler) +*(.text.dev_watchdog) +*(.text.can_share_swap_page) +*(.text.blkdev_put) +*(.text.bio_get_nr_vecs) +*(.text.xfrm_compile_policy) +*(.text.vma_prio_tree_insert) +*(.text.vfs_lstat_fd) +*(.text.__user_path_lookup_open) +*(.text.thread_return) +*(.text.tcp_send_delayed_ack) +*(.text.sock_def_error_report) +*(.text.shrink_slab) +*(.text.serial_out) +*(.text.seq_read) +*(.text.secure_ip_id) +*(.text.search_binary_handler) +*(.text.proc_pid_unhash) +*(.text.pagevec_lookup) +*(.text.new_inode) +*(.text.memcpy_toiovec) +*(.text.locks_free_lock) +*(.text.__lock_page) +*(.text.__lock_buffer) +*(.text.load_module) +*(.text.is_bad_inode) +*(.text.invalidate_inode_buffers) +*(.text.insert_vm_struct) +*(.text.inode_setattr) +*(.text.inode_add_bytes) +*(.text.ide_read_24) +*(.text.ide_get_error_location) +*(.text.ide_do_drive_cmd) +*(.text.get_locked_pte) +*(.text.get_filesystem_list) +*(.text.generic_file_open) +*(.text.follow_down) +*(.text.find_next_bit) +*(.text.__find_first_bit) +*(.text.exit_mm) +*(.text.exec_keys) +*(.text.end_buffer_write_sync) +*(.text.end_bio_bh_io_sync) +*(.text.dummy_socket_shutdown) +*(.text.d_rehash) +*(.text.d_path) +*(.text.do_ioctl) +*(.text.dget_locked) +*(.text.copy_thread_group_keys) +*(.text.cdrom_end_request) +*(.text.cap_bprm_apply_creds) +*(.text.blk_rq_bio_prep) +*(.text.__bitmap_intersects) +*(.text.bio_phys_segments) +*(.text.bio_free) +*(.text.arch_get_unmapped_area_topdown) +*(.text.writeback_in_progress) +*(.text.vfs_follow_link) +*(.text.tcp_rcv_state_process) +*(.text.tcp_check_space) +*(.text.sys_stat) +*(.text.sys_rt_sigreturn) +*(.text.sys_rt_sigaction) +*(.text.sys_remap_file_pages) +*(.text.sys_pwrite64) +*(.text.sys_fchownat) +*(.text.sys_fchmodat) +*(.text.strncat) +*(.text.strlcat) +*(.text.strcmp) +*(.text.steal_locks) +*(.text.sock_create) +*(.text.sk_stream_rfree) +*(.text.sk_stream_mem_schedule) +*(.text.skip_atoi) +*(.text.sk_alloc) +*(.text.show_stat) +*(.text.set_fs_pwd) +*(.text.set_binfmt) +*(.text.pty_unthrottle) +*(.text.proc_symlink) +*(.text.pipe_release) +*(.text.pageout) +*(.text.n_tty_write_wakeup) +*(.text.n_tty_ioctl) +*(.text.nr_free_zone_pages) +*(.text.migration_thread) +*(.text.mempool_free_slab) +*(.text.meminfo_read_proc) +*(.text.max_sane_readahead) +*(.text.lru_cache_add) +*(.text.kill_fasync) +*(.text.kernel_read) +*(.text.invalidate_mapping_pages) +*(.text.inode_has_buffers) +*(.text.init_once) +*(.text.inet_sendmsg) +*(.text.idedisk_issue_flush) +*(.text.generic_file_write) +*(.text.free_more_memory) +*(.text.__free_fdtable) +*(.text.filp_dtor) +*(.text.exit_sem) +*(.text.exit_itimers) +*(.text.error_interrupt) +*(.text.end_buffer_async_write) +*(.text.eligible_child) +*(.text.elf_map) +*(.text.dump_task_regs) +*(.text.dummy_task_setscheduler) +*(.text.dummy_socket_accept) +*(.text.dummy_file_free_security) +*(.text.__down_read) +*(.text.do_sock_read) +*(.text.do_sigaltstack) +*(.text.do_mremap) +*(.text.current_io_context) +*(.text.cpu_swap_callback) +*(.text.copy_vma) +*(.text.cap_bprm_set_security) +*(.text.blk_insert_request) +*(.text.bio_map_kern_endio) +*(.text.bio_hw_segments) +*(.text.bictcp_cong_avoid) +*(.text.add_interrupt_randomness) +*(.text.wait_for_completion) +*(.text.version_read_proc) +*(.text.unix_write_space) +*(.text.tty_ldisc_ref_wait) +*(.text.tty_ldisc_put) +*(.text.try_to_wake_up) +*(.text.tcp_v4_tw_remember_stamp) +*(.text.tcp_try_undo_dsack) +*(.text.tcp_may_send_now) +*(.text.sys_waitid) +*(.text.sys_sched_getparam) +*(.text.sys_getppid) +*(.text.sys_getcwd) +*(.text.sys_dup2) +*(.text.sys_chmod) +*(.text.sys_chdir) +*(.text.sprintf) +*(.text.sock_wfree) +*(.text.sock_aio_write) +*(.text.skb_drop_fraglist) +*(.text.skb_dequeue) +*(.text.set_close_on_exec) +*(.text.set_brk) +*(.text.seq_puts) +*(.text.SELECT_DRIVE) +*(.text.sched_exec) +*(.text.return_EIO) +*(.text.remove_from_page_cache) +*(.text.rcu_start_batch) +*(.text.__put_task_struct) +*(.text.proc_pid_readdir) +*(.text.proc_get_inode) +*(.text.prepare_to_wait_exclusive) +*(.text.pipe_wait) +*(.text.pipe_new) +*(.text.pdflush_operation) +*(.text.__pagevec_release) +*(.text.pagevec_lookup_tag) +*(.text.packet_rcv) +*(.text.n_tty_set_room) +*(.text.nr_free_pages) +*(.text.__net_timestamp) +*(.text.mpage_end_io_read) +*(.text.mod_timer) +*(.text.__memcpy) +*(.text.mb_cache_shrink_fn) +*(.text.lock_rename) +*(.text.kstrdup) +*(.text.is_ignored) +*(.text.int_very_careful) +*(.text.inotify_inode_is_dead) +*(.text.inotify_get_cookie) +*(.text.inode_get_bytes) +*(.text.init_timer) +*(.text.init_dev) +*(.text.inet_getname) +*(.text.ide_map_sg) +*(.text.__ide_dma_end) +*(.text.hrtimer_get_remaining) +*(.text.get_task_mm) +*(.text.get_random_int) +*(.text.free_pipe_info) +*(.text.filemap_write_and_wait_range) +*(.text.exit_thread) +*(.text.enter_idle) +*(.text.end_that_request_first) +*(.text.end_8259A_irq) +*(.text.dummy_file_alloc_security) +*(.text.do_group_exit) +*(.text.debug_mutex_init) +*(.text.cpuset_exit) +*(.text.cpu_idle) +*(.text.copy_semundo) +*(.text.copy_files) +*(.text.chrdev_open) +*(.text.cdrom_transfer_packet_command) +*(.text.cdrom_mode_sense) +*(.text.blk_phys_contig_segment) +*(.text.blk_get_queue) +*(.text.bio_split) +*(.text.audit_alloc) +*(.text.anon_pipe_buf_release) +*(.text.add_wait_queue_exclusive) +*(.text.add_wait_queue) +*(.text.acct_process) +*(.text.account) +*(.text.zeromap_page_range) +*(.text.yield) +*(.text.writeback_acquire) +*(.text.worker_thread) +*(.text.wait_on_page_writeback_range) +*(.text.__wait_on_buffer) +*(.text.vscnprintf) +*(.text.vmalloc_to_pfn) +*(.text.vgacon_save_screen) +*(.text.vfs_unlink) +*(.text.vfs_rmdir) +*(.text.unregister_md_personality) +*(.text.unlock_new_inode) +*(.text.unix_stream_sendmsg) +*(.text.unix_stream_recvmsg) +*(.text.unhash_process) +*(.text.udp_v4_lookup_longway) +*(.text.tty_ldisc_flush) +*(.text.tty_ldisc_enable) +*(.text.tty_hung_up_p) +*(.text.tty_buffer_free_all) +*(.text.tso_fragment) +*(.text.try_to_del_timer_sync) +*(.text.tcp_v4_err) +*(.text.tcp_unhash) +*(.text.tcp_seq_next) +*(.text.tcp_select_initial_window) +*(.text.tcp_sacktag_write_queue) +*(.text.tcp_cwnd_validate) +*(.text.sys_vhangup) +*(.text.sys_uselib) +*(.text.sys_symlink) +*(.text.sys_signal) +*(.text.sys_poll) +*(.text.sys_mount) +*(.text.sys_kill) +*(.text.sys_ioctl) +*(.text.sys_inotify_add_watch) +*(.text.sys_getuid) +*(.text.sys_getrlimit) +*(.text.sys_getitimer) +*(.text.sys_getgroups) +*(.text.sys_ftruncate) +*(.text.sysfs_lookup) +*(.text.sys_exit_group) +*(.text.stub_fork) +*(.text.sscanf) +*(.text.sock_map_fd) +*(.text.sock_get_timestamp) +*(.text.__sock_create) +*(.text.smp_call_function_single) +*(.text.sk_stop_timer) +*(.text.skb_copy_and_csum_datagram) +*(.text.__skb_checksum_complete) +*(.text.single_next) +*(.text.sigqueue_alloc) +*(.text.shrink_dcache_parent) +*(.text.select_idle_routine) +*(.text.run_workqueue) +*(.text.run_local_timers) +*(.text.remove_inode_hash) +*(.text.remove_dquot_ref) +*(.text.register_binfmt) +*(.text.read_cache_pages) +*(.text.rb_last) +*(.text.pty_open) +*(.text.proc_root_readdir) +*(.text.proc_pid_flush) +*(.text.proc_pident_lookup) +*(.text.proc_fill_super) +*(.text.proc_exe_link) +*(.text.posix_locks_deadlock) +*(.text.pipe_iov_copy_from_user) +*(.text.opost) +*(.text.nf_register_hook) +*(.text.netif_rx_ni) +*(.text.m_start) +*(.text.mpage_writepage) +*(.text.mm_alloc) +*(.text.memory_open) +*(.text.mark_buffer_async_write) +*(.text.lru_add_drain_all) +*(.text.locks_init_lock) +*(.text.locks_delete_lock) +*(.text.lock_hrtimer_base) +*(.text.load_script) +*(.text.__kill_fasync) +*(.text.ip_mc_sf_allow) +*(.text.__ioremap) +*(.text.int_with_check) +*(.text.int_sqrt) +*(.text.install_thread_keyring) +*(.text.init_page_buffers) +*(.text.inet_sock_destruct) +*(.text.idle_notifier_register) +*(.text.ide_execute_command) +*(.text.ide_end_drive_cmd) +*(.text.__ide_dma_host_on) +*(.text.hrtimer_run_queues) +*(.text.hpet_mask_rtc_irq_bit) +*(.text.__get_zone_counts) +*(.text.get_zone_counts) +*(.text.get_write_access) +*(.text.get_fs_struct) +*(.text.get_dirty_limits) +*(.text.generic_readlink) +*(.text.free_hot_page) +*(.text.finish_wait) +*(.text.find_inode) +*(.text.find_first_bit) +*(.text.__filemap_fdatawrite_range) +*(.text.__filemap_copy_from_user_iovec) +*(.text.exit_aio) +*(.text.elv_set_request) +*(.text.elv_former_request) +*(.text.dup_namespace) +*(.text.dupfd) +*(.text.dummy_socket_getsockopt) +*(.text.dummy_sb_post_mountroot) +*(.text.dummy_quotactl) +*(.text.dummy_inode_rename) +*(.text.__do_SAK) +*(.text.do_pipe) +*(.text.do_fsync) +*(.text.d_instantiate_unique) +*(.text.d_find_alias) +*(.text.deny_write_access) +*(.text.dentry_unhash) +*(.text.d_delete) +*(.text.datagram_poll) +*(.text.cpuset_fork) +*(.text.cpuid_read) +*(.text.copy_namespace) +*(.text.cond_resched) +*(.text.check_version) +*(.text.__change_page_attr) +*(.text.cfq_slab_kill) +*(.text.cfq_completed_request) +*(.text.cdrom_pc_intr) +*(.text.cdrom_decode_status) +*(.text.cap_capset_check) +*(.text.blk_put_request) +*(.text.bio_fs_destructor) +*(.text.bictcp_min_cwnd) +*(.text.alloc_chrdev_region) +*(.text.add_element) +*(.text.acct_update_integrals) +*(.text.write_boundary_block) +*(.text.writeback_release) +*(.text.writeback_inodes) +*(.text.wake_up_state) +*(.text.__wake_up_locked) +*(.text.wake_futex) +*(.text.wait_task_inactive) +*(.text.__wait_on_freeing_inode) +*(.text.wait_noreap_copyout) +*(.text.vmstat_start) +*(.text.vgacon_do_font_op) +*(.text.vfs_readv) +*(.text.vfs_quota_sync) +*(.text.update_queue) +*(.text.unshare_files) +*(.text.unmap_vm_area) +*(.text.unix_socketpair) +*(.text.unix_release_sock) +*(.text.unix_detach_fds) +*(.text.unix_create1) +*(.text.unix_bind) +*(.text.udp_sendmsg) +*(.text.udp_rcv) +*(.text.udp_queue_rcv_skb) +*(.text.uart_write) +*(.text.uart_startup) +*(.text.uart_open) +*(.text.tty_vhangup) +*(.text.tty_termios_baud_rate) +*(.text.tty_release) +*(.text.tty_ldisc_ref) +*(.text.throttle_vm_writeout) +*(.text.058) +*(.text.tcp_xmit_probe_skb) +*(.text.tcp_v4_send_check) +*(.text.tcp_v4_destroy_sock) +*(.text.tcp_sync_mss) +*(.text.tcp_snd_test) +*(.text.tcp_slow_start) +*(.text.tcp_send_fin) +*(.text.tcp_rtt_estimator) +*(.text.tcp_parse_options) +*(.text.tcp_ioctl) +*(.text.tcp_init_tso_segs) +*(.text.tcp_init_cwnd) +*(.text.tcp_getsockopt) +*(.text.tcp_fin) +*(.text.tcp_connect) +*(.text.tcp_cong_avoid) +*(.text.__tcp_checksum_complete_user) +*(.text.task_dumpable) +*(.text.sys_wait4) +*(.text.sys_utimes) +*(.text.sys_symlinkat) +*(.text.sys_socketpair) +*(.text.sys_rmdir) +*(.text.sys_readahead) +*(.text.sys_nanosleep) +*(.text.sys_linkat) +*(.text.sys_fstat) +*(.text.sysfs_readdir) +*(.text.sys_execve) +*(.text.sysenter_tracesys) +*(.text.sys_chown) +*(.text.stub_clone) +*(.text.strrchr) +*(.text.strncpy) +*(.text.stopmachine_set_state) +*(.text.sock_sendmsg) +*(.text.sock_release) +*(.text.sock_fasync) +*(.text.sock_close) +*(.text.sk_stream_write_space) +*(.text.sk_reset_timer) +*(.text.skb_split) +*(.text.skb_recv_datagram) +*(.text.skb_queue_tail) +*(.text.sk_attach_filter) +*(.text.si_swapinfo) +*(.text.simple_strtoll) +*(.text.set_termios) +*(.text.set_task_comm) +*(.text.set_shrinker) +*(.text.set_normalized_timespec) +*(.text.set_brk) +*(.text.serial_in) +*(.text.seq_printf) +*(.text.secure_dccp_sequence_number) +*(.text.rwlock_bug) +*(.text.rt_hash_code) +*(.text.__rta_fill) +*(.text.__request_resource) +*(.text.relocate_new_kernel) +*(.text.release_thread) +*(.text.release_mem) +*(.text.rb_prev) +*(.text.rb_first) +*(.text.random_poll) +*(.text.__put_super_and_need_restart) +*(.text.pty_write) +*(.text.ptrace_stop) +*(.text.proc_self_readlink) +*(.text.proc_root_lookup) +*(.text.proc_root_link) +*(.text.proc_pid_make_inode) +*(.text.proc_pid_attr_write) +*(.text.proc_lookupfd) +*(.text.proc_delete_inode) +*(.text.posix_same_owner) +*(.text.posix_block_lock) +*(.text.poll_initwait) +*(.text.pipe_write) +*(.text.pipe_read_fasync) +*(.text.pipe_ioctl) +*(.text.pdflush) +*(.text.pci_user_read_config_dword) +*(.text.page_readlink) +*(.text.null_lseek) +*(.text.nf_hook_slow) +*(.text.netlink_sock_destruct) +*(.text.netlink_broadcast) +*(.text.neigh_resolve_output) +*(.text.name_to_int) +*(.text.mwait_idle) +*(.text.mutex_trylock) +*(.text.mutex_debug_check_no_locks_held) +*(.text.m_stop) +*(.text.mpage_end_io_write) +*(.text.mpage_alloc) +*(.text.move_page_tables) +*(.text.mounts_open) +*(.text.__memset) +*(.text.memcpy_fromiovec) +*(.text.make_8259A_irq) +*(.text.lookup_user_key_possessed) +*(.text.lookup_create) +*(.text.locks_insert_lock) +*(.text.locks_alloc_lock) +*(.text.kthread_should_stop) +*(.text.kswapd) +*(.text.kobject_uevent) +*(.text.kobject_get_path) +*(.text.kobject_get) +*(.text.klist_children_put) +*(.text.__ip_route_output_key) +*(.text.ip_flush_pending_frames) +*(.text.ip_compute_csum) +*(.text.ip_append_data) +*(.text.ioc_set_batching) +*(.text.invalidate_inode_pages) +*(.text.__invalidate_device) +*(.text.install_arg_page) +*(.text.in_sched_functions) +*(.text.inotify_unmount_inodes) +*(.text.init_once) +*(.text.init_cdrom_command) +*(.text.inet_stream_connect) +*(.text.inet_sk_rebuild_header) +*(.text.inet_csk_addr2sockaddr) +*(.text.inet_create) +*(.text.ifind) +*(.text.ide_setup_dma) +*(.text.ide_outsw) +*(.text.ide_fixstring) +*(.text.ide_dma_setup) +*(.text.ide_cdrom_packet) +*(.text.ide_cd_put) +*(.text.ide_build_sglist) +*(.text.i8259A_shutdown) +*(.text.hung_up_tty_ioctl) +*(.text.hrtimer_nanosleep) +*(.text.hrtimer_init) +*(.text.hrtimer_cancel) +*(.text.hash_futex) +*(.text.group_send_sig_info) +*(.text.grab_cache_page_nowait) +*(.text.get_wchan) +*(.text.get_stack) +*(.text.get_page_state) +*(.text.getnstimeofday) +*(.text.get_node) +*(.text.get_kprobe) +*(.text.generic_unplug_device) +*(.text.free_task) +*(.text.frag_show) +*(.text.find_next_zero_string) +*(.text.filp_open) +*(.text.fillonedir) +*(.text.exit_io_context) +*(.text.exit_idle) +*(.text.exact_lock) +*(.text.eth_header) +*(.text.dummy_unregister_security) +*(.text.dummy_socket_post_create) +*(.text.dummy_socket_listen) +*(.text.dummy_quota_on) +*(.text.dummy_inode_follow_link) +*(.text.dummy_file_receive) +*(.text.dummy_file_mprotect) +*(.text.dummy_file_lock) +*(.text.dummy_file_ioctl) +*(.text.dummy_bprm_post_apply_creds) +*(.text.do_writepages) +*(.text.__down_interruptible) +*(.text.do_notify_resume) +*(.text.do_acct_process) +*(.text.del_timer_sync) +*(.text.default_rebuild_header) +*(.text.d_callback) +*(.text.dcache_readdir) +*(.text.ctrl_dumpfamily) +*(.text.cpuset_rmdir) +*(.text.copy_strings_kernel) +*(.text.con_write_room) +*(.text.complete_all) +*(.text.collect_sigign_sigcatch) +*(.text.clear_user) +*(.text.check_unthrottle) +*(.text.cdrom_release) +*(.text.cdrom_newpc_intr) +*(.text.cdrom_ioctl) +*(.text.cdrom_check_status) +*(.text.cdev_put) +*(.text.cdev_add) +*(.text.cap_ptrace) +*(.text.cap_bprm_secureexec) +*(.text.cache_alloc_refill) +*(.text.bmap) +*(.text.blk_run_queue) +*(.text.blk_queue_dma_alignment) +*(.text.blk_ordered_req_seq) +*(.text.blk_backing_dev_unplug) +*(.text.__bitmap_subset) +*(.text.__bitmap_and) +*(.text.bio_unmap_user) +*(.text.__bforget) +*(.text.bd_forget) +*(.text.bad_pipe_w) +*(.text.bad_get_user) +*(.text.audit_free) +*(.text.anon_vma_ctor) +*(.text.anon_pipe_buf_map) +*(.text.alloc_sock_iocb) +*(.text.alloc_fdset) +*(.text.aio_kick_handler) +*(.text.__add_entropy_words) +*(.text.add_disk_randomness) diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 692c737fedd..6df05e6034f 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -26,6 +26,7 @@ */ .text + .section .bootstrap.text .code32 .globl startup_32 /* %bx: 1 if coming from smp trampoline on secondary cpu */ @@ -192,7 +193,8 @@ startup_64: movq initial_code(%rip),%rax jmp *%rax - /* SMP bootup changes these two */ + /* SMP bootup changes these two */ + .align 8 .globl initial_code initial_code: .quad x86_64_start_kernel @@ -213,6 +215,11 @@ ENTRY(early_idt_handler) cmpl $2,early_recursion_flag(%rip) jz 1f call dump_stack +#ifdef CONFIG_KALLSYMS + leaq early_idt_ripmsg(%rip),%rdi + movq 8(%rsp),%rsi # get rip again + call __print_symbol +#endif 1: hlt jmp 1b early_recursion_flag: @@ -220,6 +227,8 @@ early_recursion_flag: early_idt_msg: .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" +early_idt_ripmsg: + .asciz "RIP %s\n" .code32 ENTRY(no_long_mode) @@ -230,7 +239,7 @@ ENTRY(no_long_mode) .org 0xf00 .globl pGDT32 pGDT32: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 .long cpu_gdt_table-__START_KERNEL_map .org 0xf10 @@ -286,8 +295,6 @@ NEXT_PAGE(level2_kernel_pgt) /* Module mapping starts here */ .fill 492,8,0 -NEXT_PAGE(empty_zero_page) - NEXT_PAGE(level3_physmem_pgt) .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ .fill 511,8,0 @@ -330,7 +337,7 @@ ENTRY(boot_level4_pgt) .align 16 .globl cpu_gdt_descr cpu_gdt_descr: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 gdt: .quad cpu_gdt_table #ifdef CONFIG_SMP @@ -345,7 +352,8 @@ gdt: * Also sysret mandates a special GDT layout */ -.align PAGE_SIZE + .section .data.page_aligned, "aw" + .align PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ @@ -371,9 +379,12 @@ gdt_end: /* zero the remaining page */ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 -ENTRY(idt_table) - .rept 256 - .quad 0 - .quad 0 - .endr + .section .bss, "aw", @nobits + .align L1_CACHE_BYTES +ENTRY(idt_table) + .skip 256 * 16 + .section .bss.page_aligned, "aw", @nobits + .align PAGE_SIZE +ENTRY(empty_zero_page) + .skip PAGE_SIZE diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 4282d72b2a2..9cc7031b715 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -30,6 +30,9 @@ #include <linux/mc146818rtc.h> #include <linux/acpi.h> #include <linux/sysdev.h> +#ifdef CONFIG_ACPI +#include <acpi/acpi_bus.h> +#endif #include <asm/io.h> #include <asm/smp.h> @@ -47,6 +50,8 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; +int timer_over_8254 __initdata = 0; + /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -248,10 +253,36 @@ static int __init enable_ioapic_setup(char *str) __setup("noapic", disable_ioapic_setup); __setup("apic", enable_ioapic_setup); +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); + #include <asm/pci-direct.h> #include <linux/pci_ids.h> #include <linux/pci.h> + +#ifdef CONFIG_ACPI + +static int nvidia_hpet_detected __initdata; + +static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) +{ + nvidia_hpet_detected = 1; + return 0; +} +#endif + /* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC off. Check for an Nvidia or VIA PCI bridge and turn it off. Use pci direct infrastructure because this runs before the PCI subsystem. @@ -260,6 +291,8 @@ __setup("apic", enable_ioapic_setup); And another hack to disable the IOMMU on VIA chipsets. + ... and others. Really should move this somewhere else. + Kludge-O-Rama. */ void __init check_ioapic(void) { @@ -289,31 +322,43 @@ void __init check_ioapic(void) force_iommu) && !iommu_aperture_allowed) { printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n"); + "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n"); iommu_aperture_disabled = 1; } #endif return; case PCI_VENDOR_ID_NVIDIA: #ifdef CONFIG_ACPI - /* All timer overrides on Nvidia - seem to be wrong. Skip them. */ - acpi_skip_timer_override = 1; - printk(KERN_INFO - "Nvidia board detected. Ignoring ACPI timer override.\n"); + /* + * All timer overrides on Nvidia are + * wrong unless HPET is enabled. + */ + nvidia_hpet_detected = 0; + acpi_table_parse(ACPI_HPET, + nvidia_hpet_check); + if (nvidia_hpet_detected == 0) { + acpi_skip_timer_override = 1; + printk(KERN_INFO "Nvidia board " + "detected. Ignoring ACPI " + "timer override.\n"); + } #endif /* RED-PEN skip them on mptables too? */ return; + + /* This should be actually default, but + for 2.6.16 let's do it for ATI only where + it's really needed. */ case PCI_VENDOR_ID_ATI: - if (apic_runs_main_timer != 0) - break; + if (timer_over_8254 == 1) { + timer_over_8254 = 0; printk(KERN_INFO - "ATI board detected. Using APIC/PM timer.\n"); - apic_runs_main_timer = 1; - nohpet = 1; + "ATI board detected. Disabling timer routing over 8254.\n"); + } return; } + /* No multi-function device? */ type = read_pci_config_byte(num,slot,func, PCI_HEADER_TYPE); @@ -1752,11 +1797,15 @@ static inline void unlock_ExtINT_logic(void) spin_unlock_irqrestore(&ioapic_lock, flags); } +int timer_uses_ioapic_pin_0; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for modern platforms only. */ static inline void check_timer(void) { @@ -1779,13 +1828,17 @@ static inline void check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; + if (pin1 == 0) + timer_uses_ioapic_pin_0 = 1; + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); @@ -1820,7 +1873,7 @@ static inline void check_timer(void) */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { - printk("works.\n"); + apic_printk(APIC_VERBOSE," works.\n"); nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -1832,9 +1885,9 @@ static inline void check_timer(void) */ clear_IO_APIC_pin(apic2, pin2); } - printk(" failed.\n"); + apic_printk(APIC_VERBOSE," failed.\n"); - if (nmi_watchdog) { + if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = 0; } @@ -1847,7 +1900,7 @@ static inline void check_timer(void) enable_8259A_irq(0); if (timer_irq_works()) { - apic_printk(APIC_QUIET, " works.\n"); + apic_printk(APIC_VERBOSE," works.\n"); return; } apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 30d2a1e545f..d8bd0b345b1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -38,9 +38,8 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for (j=0; j<NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "CPU%d ",j); + for_each_online_cpu(j) + seq_printf(p, "CPU%d ",j); seq_putc(p, '\n'); } @@ -53,10 +52,8 @@ int show_interrupts(struct seq_file *p, void *v) #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else - for (j=0; j<NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", - kstat_cpu(j).irqs[i]); + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif seq_printf(p, " %14s", irq_desc[i].handler->typename); @@ -68,15 +65,13 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 8b866a8572c..fa1d19ca700 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -37,10 +37,12 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/preempt.h> +#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/kdebug.h> +#include <asm/uaccess.h> void jprobe_return_end(void); static void __kprobes arch_copy_kprobe(struct kprobe *p); @@ -51,7 +53,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * returns non-zero if opcode modifies the interrupt flag. */ -static inline int is_IF_modifier(kprobe_opcode_t *insn) +static __always_inline int is_IF_modifier(kprobe_opcode_t *insn) { switch (*insn) { case 0xfa: /* cli */ @@ -82,7 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) * If it does, return the address of the 32-bit displacement word. * If not, return null. */ -static inline s32 *is_riprel(u8 *insn) +static s32 __kprobes *is_riprel(u8 *insn) { #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -222,12 +224,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } -static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -235,7 +237,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; } -static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; kcb->kprobe_status = kcb->prev_kprobe.status; @@ -243,7 +245,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = p; @@ -512,13 +514,13 @@ static void __kprobes resume_execution(struct kprobe *p, *tos = orig_rip + (*tos - copy_rip); break; case 0xff: - if ((*insn & 0x30) == 0x10) { + if ((insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ /* Fix return addr; rip is correct. */ next_rip = regs->rip; *tos = orig_rip + (*tos - copy_rip); - } else if (((*insn & 0x31) == 0x20) || /* jmp near, absolute indirect */ - ((*insn & 0x31) == 0x21)) { /* jmp far, absolute indirect */ + } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ + ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ /* rip is correct. */ next_rip = regs->rip; } @@ -578,16 +580,62 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + const struct exception_table_entry *fixup; - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs, kcb); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the rip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->rip = (unsigned long)cur->addr; regs->eflags |= kcb->kprobe_old_rflags; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + fixup = search_exception_tables(regs->rip); + if (fixup) { + regs->rip = fixup->fixup; + return 1; + } + + /* + * fixup() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } @@ -601,6 +649,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 89fab51e20f..25ac8a3faae 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -140,7 +140,7 @@ static void load_segments(void) "\tmovl %0,%%ss\n" "\tmovl %0,%%fs\n" "\tmovl %0,%%gs\n" - : : "a" (__KERNEL_DS) + : : "a" (__KERNEL_DS) : "memory" ); } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index b8b9529fa89..c69fc43cee7 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -29,6 +29,8 @@ #define MISC_MCELOG_MINOR 227 #define NR_BANKS 6 +atomic_t mce_entry; + static int mce_dont_init; /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, @@ -139,8 +141,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) static int mce_available(struct cpuinfo_x86 *c) { - return test_bit(X86_FEATURE_MCE, &c->x86_capability) && - test_bit(X86_FEATURE_MCA, &c->x86_capability); + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) @@ -173,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) int i; int panicm_found = 0; + atomic_inc(&mce_entry); + if (regs) notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); if (!banks) - return; + goto out2; memset(&m, 0, sizeof(struct mce)); m.cpu = safe_smp_processor_id(); @@ -267,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) out: /* Last thing done in the machine check exception to clear state. */ wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); } /* @@ -502,7 +507,7 @@ static struct miscdevice mce_log_device = { static int __init mcheck_disable(char *str) { mce_dont_init = 1; - return 0; + return 1; } /* mce=off disables machine check. Note you can reenable it later @@ -522,7 +527,7 @@ static int __init mcheck_enable(char *str) get_option(&str, &tolerant); else printk("mce= argument %s ignored. Please use /sys", str); - return 0; + return 1; } __setup("nomce", mcheck_disable); @@ -624,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) #endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int +static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d3ad7d81266..d13b241ad09 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -482,7 +482,7 @@ static void threshold_remove_device(unsigned int cpu) #endif /* get notified when a cpu comes on/off */ -static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, +static int threshold_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index dc49bfb6db0..083da7e606b 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -106,11 +106,11 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) { int cpu; unsigned char ver; - static int found_bsp=0; + cpumask_t tmp_map; if (!(m->mpc_cpuflag & CPU_ENABLED)) { disabled_cpus++; @@ -133,8 +133,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - cpu = num_processors++; - + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + #if MAX_APICS < 255 if ((int)m->mpc_apicid > MAX_APICS) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", @@ -160,12 +162,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) * entry is BSP, and so on. */ cpu = 0; - - bios_cpu_apicid[0] = m->mpc_apicid; - x86_cpu_to_apicid[0] = m->mpc_apicid; - found_bsp = 1; - } else - cpu = num_processors - found_bsp; + } bios_cpu_apicid[cpu] = m->mpc_apicid; x86_cpu_to_apicid[cpu] = m->mpc_apicid; @@ -288,9 +285,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) memcpy(str,mpc->mpc_productid,12); str[12]=0; - printk(KERN_INFO "Product ID: %s ",str); + printk("Product ID: %s ",str); - printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic); + printk("APIC at: 0x%X\n",mpc->mpc_lapic); /* save the local APIC address, it might be non-default */ if (!acpi_lapic) @@ -691,7 +688,7 @@ void __init mp_register_lapic_address ( } -void __init mp_register_lapic ( +void __cpuinit mp_register_lapic ( u8 id, u8 enabled) { @@ -971,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - if (gsi > 15) + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) gsi = pci_irq++; /* * Don't assign IRQ used by ACPI SCI diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 8be407a1f62..4e6357fe0ec 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -34,6 +34,7 @@ #include <asm/proto.h> #include <asm/kdebug.h> #include <asm/local.h> +#include <asm/mce.h> /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -162,9 +163,7 @@ int __init check_nmi_watchdog (void) local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) - continue; + for_each_online_cpu(cpu) { if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { endflag = 1; printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", @@ -236,6 +235,7 @@ static void enable_lapic_nmi_watchdog(void) { if (nmi_active < 0) { nmi_watchdog = NMI_LOCAL_APIC; + touch_nmi_watchdog(); setup_apic_nmi_watchdog(); } } @@ -456,15 +456,17 @@ static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog (void) { - int i; + if (nmi_watchdog > 0) { + unsigned cpu; - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for (i = 0; i < NR_CPUS; i++) - per_cpu(nmi_touch, i) = 1; + /* + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. + */ + for_each_present_cpu (cpu) + per_cpu(nmi_touch, cpu) = 1; + } touch_softlockup_watchdog(); } @@ -479,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) __get_cpu_var(nmi_touch) = 0; touched = 1; } +#ifdef CONFIG_X86_MCE + /* Could check oops_in_progress here too, but it's safer + not too */ + if (atomic_read(&mce_entry) > 0) + touched = 1; +#endif if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... @@ -533,6 +541,7 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 4ed391edd47..a9275c9557c 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -48,10 +48,16 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { struct page *page; int node; +#ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) node = pcibus_to_node(to_pci_dev(dev)->bus); else +#endif node = numa_node_id(); + + if (node < first_node(node_online_map)) + node = first_node(node_online_map); + page = alloc_pages_node(node, gfp, order); return page ? page_address(page) : NULL; } @@ -73,6 +79,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_mask == 0) dma_mask = 0xffffffff; + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; + /* Kludge to make it bug-to-bug compatible with i386. i386 uses the normal dma_mask for alloc_coherent. */ dma_mask &= *dev->dma_mask; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 2fe23a6c361..82a7c9bfdfa 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -65,9 +65,7 @@ static u32 gart_unmapped_entry; #define for_all_nb(dev) \ dev = NULL; \ - while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\ - if (dev->bus->number == 0 && \ - (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31)) + while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL) static struct pci_dev *northbridges[MAX_NB]; static u32 northbridge_flush_word[MAX_NB]; @@ -114,10 +112,6 @@ static unsigned long alloc_iommu(int size) static void free_iommu(unsigned long offset, int size) { unsigned long flags; - if (size == 1) { - clear_bit(offset, iommu_gart_bitmap); - return; - } spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); @@ -148,9 +142,12 @@ static void flush_gart(struct device *dev) if (!northbridges[i]) continue; /* Make sure the hardware actually executed the flush. */ - do { + for (;;) { pci_read_config_dword(northbridges[i], 0x9c, &w); - } while (w & 1); + if (!(w & 1)) + break; + cpu_relax(); + } } if (!flushed) printk("nothing to flush?\n"); @@ -228,11 +225,6 @@ static inline int need_iommu(struct device *dev, unsigned long addr, size_t size int mmu = high; if (force_iommu) mmu = 1; - if (no_iommu) { - if (high) - panic("PCI-DMA: high address but no IOMMU.\n"); - mmu = 0; - } return mmu; } @@ -241,11 +233,6 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t u64 mask = *dev->dma_mask; int high = addr + size >= mask; int mmu = high; - if (no_iommu) { - if (high) - panic("PCI-DMA: high address but no IOMMU.\n"); - mmu = 0; - } return mmu; } @@ -379,7 +366,7 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; - } + } } BUG_ON(iommu_page - iommu_start != pages); return 0; @@ -634,28 +621,30 @@ static int __init pci_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) { - no_iommu = 1; + if (swiotlb) return -1; - } - + if (no_iommu || (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - no_iommu = 1; - no_iommu_init(); printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " - "but IOMMU not compiled in.\n" - KERN_ERR "WARNING 32bit PCI may malfunction.\n" - KERN_ERR "You might want to enable " - "CONFIG_GART_IOMMU\n"); + "but IOMMU not available.\n" + KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } return -1; } + i = 0; + for_all_nb(dev) + i++; + if (i > MAX_NB) { + printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); + return -1; + } + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 44adcc2d5e5..1f6ecc62061 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c @@ -12,9 +12,10 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { if (hwdev && bus + size > *hwdev->dma_mask) { - printk(KERN_ERR - "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", - name, (long long)bus, size, (long long)*hwdev->dma_mask); + if (*hwdev->dma_mask >= 0xffffffffULL) + printk(KERN_ERR + "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", + name, (long long)bus, size, (long long)*hwdev->dma_mask); return 0; } return 1; diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c index 5c51d10408a..bf421ed2680 100644 --- a/arch/x86_64/kernel/pmtimer.c +++ b/arch/x86_64/kernel/pmtimer.c @@ -68,7 +68,7 @@ int pmtimer_mark_offset(void) offset_delay = delta % (USEC_PER_SEC / HZ); rdtscll(tsc); - vxtime.last_tsc = tsc - offset_delay * cpu_khz; + vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000; /* don't calculate delay for first run, or if we've got less then a tick */ @@ -86,7 +86,7 @@ static unsigned pmtimer_wait_tick(void) for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK; a == b; b = inl(pmtmr_ioport) & ACPI_PM_MASK) - ; + cpu_relax(); return b; } @@ -97,6 +97,7 @@ void pmtimer_wait(unsigned us) a = pmtimer_wait_tick(); do { b = inl(pmtmr_ioport); + cpu_relax(); } while (cyc2us(b - a) < us); } @@ -120,7 +121,7 @@ unsigned int do_gettimeoffset_pm(void) static int __init nopmtimer_setup(char *s) { pmtmr_ioport = 0; - return 0; + return 1; } __setup("nopmtimer", nopmtimer_setup); diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 22a05dec81a..fb903e65e07 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -35,8 +35,8 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> -#include <linux/kprobes.h> #include <linux/notifier.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -66,24 +66,17 @@ EXPORT_SYMBOL(boot_option_idle_override); void (*pm_idle)(void); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); -static struct notifier_block *idle_notifier; -static DEFINE_SPINLOCK(idle_notifier_lock); +static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) { - unsigned long flags; - spin_lock_irqsave(&idle_notifier_lock, flags); - notifier_chain_register(&idle_notifier, n); - spin_unlock_irqrestore(&idle_notifier_lock, flags); + atomic_notifier_chain_register(&idle_notifier, n); } EXPORT_SYMBOL_GPL(idle_notifier_register); void idle_notifier_unregister(struct notifier_block *n) { - unsigned long flags; - spin_lock_irqsave(&idle_notifier_lock, flags); - notifier_chain_unregister(&idle_notifier, n); - spin_unlock_irqrestore(&idle_notifier_lock, flags); + atomic_notifier_chain_unregister(&idle_notifier, n); } EXPORT_SYMBOL(idle_notifier_unregister); @@ -93,13 +86,13 @@ static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; void enter_idle(void) { __get_cpu_var(idle_state) = CPU_IDLE; - notifier_call_chain(&idle_notifier, IDLE_START, NULL); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { __get_cpu_var(idle_state) = CPU_NOT_IDLE; - notifier_call_chain(&idle_notifier, IDLE_END, NULL); + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } /* Called from interrupts to signify idle end */ @@ -114,7 +107,7 @@ void exit_idle(void) * We use this if we don't have any better * idle routine.. */ -void default_idle(void) +static void default_idle(void) { local_irq_enable(); @@ -353,13 +346,6 @@ void exit_thread(void) struct task_struct *me = current; struct thread_struct *t = &me->thread; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(me); - if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); @@ -508,7 +494,7 @@ out: /* * This special macro can be used to load a debugging register */ -#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) /* * switch_to(x,y) should switch tasks from x to y. @@ -527,8 +513,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); - unlazy_fpu(prev_p); - /* * Reload esp0, LDT and the page table pointer: */ @@ -586,11 +570,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) } /* - * Switch the PDA context. + * Switch the PDA and FPU contexts. */ prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); + + /* This must be here to ensure both math_state_restore() and + kernel_fpu_begin() work consistently. + And the AMD workaround requires it to be after DS reload. */ + unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); @@ -794,10 +783,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) } case ARCH_GET_GS: { unsigned long base; + unsigned gsindex; if (task->thread.gsindex == GS_TLS_SEL) base = read_32bit_tls(task, GS_TLS); - else if (doit) - rdmsrl(MSR_KERNEL_GS_BASE, base); + else if (doit) { + asm("movl %%gs,%0" : "=r" (gsindex)); + if (gsindex) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gs; + } else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index 53205622351..2d50024c9f3 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -274,11 +274,6 @@ static int putreg(struct task_struct *child, return -EIO; value &= 0xffff; break; - case offsetof(struct user_regs_struct, rip): - /* Check if the new RIP address is canonical */ - if (value >= TASK_SIZE_OF(child)) - return -EIO; - break; } put_stack_long(child, regno - sizeof(struct pt_regs), value); return 0; @@ -420,9 +415,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case offsetof(struct user, u_debugreg[7]): /* See arch/i386/kernel/ptrace.c for an explanation of * this awkward check.*/ - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) break; if (i == 4) { child->thread.debugreg7 = data; @@ -605,12 +600,12 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) if (unlikely(current->audit_context)) { if (test_thread_flag(TIF_IA32)) { - audit_syscall_entry(current, AUDIT_ARCH_I386, + audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_rax, regs->rbx, regs->rcx, regs->rdx, regs->rsi); } else { - audit_syscall_entry(current, AUDIT_ARCH_X86_64, + audit_syscall_entry(AUDIT_ARCH_X86_64, regs->orig_rax, regs->rdi, regs->rsi, regs->rdx, regs->r10); @@ -621,7 +616,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) asmlinkage void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); + audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 9435ab7d6fb..655b9192eeb 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -46,6 +46,7 @@ #include <linux/cpufreq.h> #include <linux/dmi.h> #include <linux/dma-mapping.h> +#include <linux/ctype.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -67,6 +68,7 @@ #include <asm/swiotlb.h> #include <asm/sections.h> #include <asm/gart-mapping.h> +#include <asm/dmi.h> /* * Machine setup.. @@ -91,6 +93,12 @@ int bootloader_type; unsigned long saved_video_mode; +/* + * Early DMI memory + */ +int dmi_alloc_index; +char dmi_alloc_data[DMI_MAX_DATA]; + /* * Setup options */ @@ -270,6 +278,13 @@ static void __init probe_roms(void) } } +/* Check for full argument with no trailing characters */ +static int fullarg(char *p, char *arg) +{ + int l = strlen(arg); + return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); +} + static __init void parse_cmdline_early (char ** cmdline_p) { char c = ' ', *to = command_line, *from = COMMAND_LINE; @@ -293,10 +308,10 @@ static __init void parse_cmdline_early (char ** cmdline_p) #endif #ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter init */ - if (!memcmp(from, "acpi=off", 8)) + if (fullarg(from,"acpi=off")) disable_acpi(); - if (!memcmp(from, "acpi=force", 10)) { + if (fullarg(from, "acpi=force")) { /* add later when we do DMI horrors: */ acpi_force = 1; acpi_disabled = 0; @@ -304,52 +319,49 @@ static __init void parse_cmdline_early (char ** cmdline_p) /* acpi=ht just means: do ACPI MADT parsing at bootup, but don't enable the full ACPI interpreter */ - if (!memcmp(from, "acpi=ht", 7)) { + if (fullarg(from, "acpi=ht")) { if (!acpi_force) disable_acpi(); acpi_ht = 1; } - else if (!memcmp(from, "pci=noacpi", 10)) + else if (fullarg(from, "pci=noacpi")) acpi_disable_pci(); - else if (!memcmp(from, "acpi=noirq", 10)) + else if (fullarg(from, "acpi=noirq")) acpi_noirq_set(); - else if (!memcmp(from, "acpi_sci=edge", 13)) + else if (fullarg(from, "acpi_sci=edge")) acpi_sci_flags.trigger = 1; - else if (!memcmp(from, "acpi_sci=level", 14)) + else if (fullarg(from, "acpi_sci=level")) acpi_sci_flags.trigger = 3; - else if (!memcmp(from, "acpi_sci=high", 13)) + else if (fullarg(from, "acpi_sci=high")) acpi_sci_flags.polarity = 1; - else if (!memcmp(from, "acpi_sci=low", 12)) + else if (fullarg(from, "acpi_sci=low")) acpi_sci_flags.polarity = 3; /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { + else if (fullarg(from, "acpi=strict")) { acpi_strict = 1; } #ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) + else if (fullarg(from, "acpi_skip_timer_override")) acpi_skip_timer_override = 1; #endif #endif - if (!memcmp(from, "disable_timer_pin_1", 19)) + if (fullarg(from, "disable_timer_pin_1")) disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) + if (fullarg(from, "enable_timer_pin_1")) disable_timer_pin_1 = -1; - if (!memcmp(from, "nolapic", 7) || - !memcmp(from, "disableapic", 11)) + if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) { + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); disable_apic = 1; + } - /* Don't confuse with noapictimer */ - if (!memcmp(from, "noapic", 6) && - (from[6] == ' ' || from[6] == 0)) + if (fullarg(from, "noapic")) skip_ioapic_setup = 1; - /* Make sure to not confuse with apic= */ - if (!memcmp(from, "apic", 4) && - (from[4] == ' ' || from[4] == 0)) { + if (fullarg(from,"apic")) { skip_ioapic_setup = 0; ioapic_force = 1; } @@ -388,7 +400,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) iommu_setup(from+6); } - if (!memcmp(from,"oops=panic", 10)) + if (fullarg(from,"oops=panic")) panic_on_oops = 1; if (!memcmp(from, "noexec=", 7)) @@ -423,6 +435,12 @@ static __init void parse_cmdline_early (char ** cmdline_p) else if(!memcmp(from, "elfcorehdr=", 11)) elfcorehdr_addr = memparse(from+11, &from); #endif + +#ifdef CONFIG_HOTPLUG_CPU + else if (!memcmp(from, "additional_cpus=", 16)) + setup_additional_cpus(from+16); +#endif + next_char: c = *(from++); if (!c) @@ -524,7 +542,7 @@ void __init alternative_instructions(void) static int __init noreplacement_setup(char *s) { no_replacement = 1; - return 0; + return 1; } __setup("noreplacement", noreplacement_setup); @@ -553,17 +571,28 @@ static inline void copy_edd(void) #endif #define EBDA_ADDR_POINTER 0x40E -static void __init reserve_ebda_region(void) + +unsigned __initdata ebda_addr; +unsigned __initdata ebda_size; + +static void discover_ebda(void) { - unsigned int addr; - /** + /* * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E */ - addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); - addr <<= 4; - if (addr) - reserve_bootmem_generic(addr, PAGE_SIZE); + ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; + ebda_addr <<= 4; + + ebda_size = *(unsigned short *)(unsigned long)ebda_addr; + + /* Round EBDA up to pages */ + if (ebda_size == 0) + ebda_size = 1; + ebda_size <<= 10; + ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); + if (ebda_size > 64*1024) + ebda_size = 64*1024; } void __init setup_arch(char **cmdline_p) @@ -605,11 +634,16 @@ void __init setup_arch(char **cmdline_p) * we are rounding upwards: */ end_pfn = e820_end_of_ram(); + num_physpages = end_pfn; /* for pfn_valid */ check_efer(); + discover_ebda(); + init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); + dmi_scan_machine(); + zap_low_mappings(0); #ifdef CONFIG_ACPI @@ -648,7 +682,8 @@ void __init setup_arch(char **cmdline_p) reserve_bootmem_generic(0, PAGE_SIZE); /* reserve ebda region */ - reserve_ebda_region(); + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); #ifdef CONFIG_SMP /* @@ -702,6 +737,12 @@ void __init setup_arch(char **cmdline_p) check_ioapic(); + /* + * set this early, so we dont allocate cpu0 + * if MADT list doesnt list BSP first + * mpparse.c/MP_processor_info() allocates logical cpu numbers. + */ + cpu_set(0, cpu_present_map); #ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. @@ -830,7 +871,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) unsigned bits; #ifdef CONFIG_NUMA int node = 0; - unsigned apicid = phys_proc_id[cpu]; + unsigned apicid = hard_smp_processor_id(); #endif bits = 0; @@ -840,7 +881,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) /* Low order bits define the core id (index of core in socket) */ cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); /* Convert the APIC ID into the socket ID */ - phys_proc_id[cpu] >>= bits; + phys_proc_id[cpu] = phys_pkg_id(bits); #ifdef CONFIG_NUMA node = phys_proc_id[cpu]; @@ -866,8 +907,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_max_cores, node, cpu_core_id[cpu]); + printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", + cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]); #endif #endif } @@ -903,6 +944,10 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); + r = get_model_name(c); if (!r) { switch (c->x86) { @@ -921,8 +966,6 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) { c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_max_cores & (c->x86_max_cores - 1)) - c->x86_max_cores = 1; amd_detect_cmp(c); } @@ -939,7 +982,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id(0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -1009,7 +1051,7 @@ static void srat_detect_node(void) for now. */ node = apicid_to_node[hard_smp_processor_id()]; if (node == NUMA_NO_NODE) - node = 0; + node = first_node(node_online_map); numa_set_node(cpu, node); if (acpi_numa > 0) @@ -1148,6 +1190,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } + c->apicid = phys_pkg_id(0); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -1255,7 +1299,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1338,8 +1382,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { int i; for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, &c->x86_capability) && - x86_cap_flags[i] != NULL ) + if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); } @@ -1397,10 +1440,22 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -static int __init run_dmi_scan(void) +#ifdef CONFIG_INPUT_PCSPKR +#include <linux/platform_device.h> +static __init int add_pcspkr(void) { - dmi_scan_machine(); - return 0; -} -core_initcall(run_dmi_scan); + struct platform_device *pd; + int ret; + + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; + ret = platform_device_add(pd); + if (ret) + platform_device_put(pd); + + return ret; +} +device_initcall(add_pcspkr); +#endif diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 70f1bb808a2..8a691fa6d39 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -33,7 +33,7 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; -struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); @@ -55,11 +55,11 @@ int __init nonx_setup(char *str) do_not_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } - return 0; + return 1; } __setup("noexec=", nonx_setup); /* parsed early actually */ -int force_personality32 = READ_IMPLIES_EXEC; +int force_personality32 = 0; /* noexec32=on|off Control non executable heap for 32bit processes. @@ -74,7 +74,7 @@ static int __init nonx32_setup(char *str) force_personality32 &= ~READ_IMPLIES_EXEC; else if (!strcmp(str, "off")) force_personality32 |= READ_IMPLIES_EXEC; - return 0; + return 1; } __setup("noexec32=", nonx32_setup); @@ -248,7 +248,7 @@ void __cpuinit cpu_init (void) switch (v + 1) { #if DEBUG_STKSZ > EXCEPTION_STKSZ case DEBUG_STACK: - cpu_pda[cpu].debugstack = (unsigned long)estacks; + cpu_pda(cpu)->debugstack = (unsigned long)estacks; estacks += DEBUG_STKSZ; break; #endif @@ -281,12 +281,12 @@ void __cpuinit cpu_init (void) * Clear all 6 debug registers: */ - set_debug(0UL, 0); - set_debug(0UL, 1); - set_debug(0UL, 2); - set_debug(0UL, 3); - set_debug(0UL, 6); - set_debug(0UL, 7); + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); fpu_init(); } diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 5876df116c9..e5f5ce7909a 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -443,9 +443,6 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) if (!user_mode(regs)) return 1; - if (try_to_freeze()) - goto no_signal; - if (!oldset) oldset = ¤t->blocked; @@ -463,7 +460,6 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) return handle_signal(signr, &info, &ka, oldset, regs); } - no_signal: /* Did we come from a system call? */ if ((long)regs->orig_rax >= 0) { /* Restart the system call - no handlers present */ diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 19ef012b1f1..4a6628b14d9 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -75,7 +75,7 @@ static inline void leave_mm(int cpu) { if (read_pda(mmu_state) == TLBSTATE_OK) BUG(); - clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } @@ -85,7 +85,7 @@ static inline void leave_mm(int cpu) * [cpu0: the cpu that switches] * 1) switch_mm() either 1a) or 1b) * 1a) thread switch to a different mm - * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask); + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis * for the wrong mm, and in the worst case we perform a superfluous @@ -95,7 +95,7 @@ static inline void leave_mm(int cpu) * was in lazy tlb mode. * 1a3) update cpu active_mm * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask); + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); * Now the other cpus will send tlb flush ipis. * 1a4) change cr3. * 1b) thread switch without mm change diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 67e4e28f4df..71a7222cf9c 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* Last level cache ID of each logical CPU */ +u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -350,7 +353,7 @@ static void __cpuinit tsc_sync_wait(void) static __init int notscsync_setup(char *s) { notscsync = 1; - return 0; + return 1; } __setup("notscsync", notscsync_setup); @@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); @@ -1152,8 +1176,6 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif - time_init_gtod(); - check_nmi_watchdog(); } @@ -1244,7 +1266,7 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } -static __init int setup_additional_cpus(char *s) +__init int setup_additional_cpus(char *s) { return get_option(&s, &additional_cpus); } diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index dba7237be5c..7392570f975 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -48,6 +48,8 @@ static void cpufreq_delayed_get(void); extern void i8254_timer_resume(void); extern int using_apic_timer; +static char *time_init_gtod(void); + DEFINE_SPINLOCK(rtc_lock); DEFINE_SPINLOCK(i8253_lock); @@ -59,7 +61,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ -static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ +int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -86,7 +88,8 @@ static inline unsigned int do_gettimeoffset_tsc(void) unsigned long t; unsigned long x; t = get_cycles_sync(); - if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ + if (t < vxtime.last_tsc) + t = vxtime.last_tsc; /* hack */ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; return x; } @@ -176,8 +179,9 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - /* Assume the lock function has either no stack frame or only a single word. - This checks if the address on the stack looks like a kernel text address. + /* Assume the lock function has either no stack frame or only a single + word. This checks if the address on the stack looks like a kernel + text address. There is a small window for false hits, but in that case the tick is just accounted to the spinlock function. Better would be to write these functions in assembler again @@ -240,17 +244,10 @@ static void set_rtc_mmss(unsigned long nowtime) real_minutes += 30; /* correct for half hour time zone */ real_minutes %= 60; -#if 0 - /* AMD 8111 is a really bad time keeper and hits this regularly. - It probably was an attempt to avoid screwing up DST, but ignore - that for now. */ if (abs(real_minutes - cmos_minutes) >= 30) { printk(KERN_WARNING "time.c: can't update CMOS clock " "from %d to %d\n", cmos_minutes, real_minutes); - } else -#endif - - { + } else { BIN_TO_BCD(real_seconds); BIN_TO_BCD(real_minutes); CMOS_WRITE(real_seconds, RTC_SECONDS); @@ -291,8 +288,7 @@ unsigned long long monotonic_clock(void) this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); - offset *=(NSEC_PER_SEC/HZ)/hpet_tick; - return base + offset; + offset *= (NSEC_PER_SEC/HZ) / hpet_tick; } else { do { seq = read_seqbegin(&xtime_lock); @@ -301,47 +297,46 @@ unsigned long long monotonic_clock(void) base = monotonic_base; } while (read_seqretry(&xtime_lock, seq)); this_offset = get_cycles_sync(); - offset = (this_offset - last_offset)*1000/cpu_khz; - return base + offset; + offset = (this_offset - last_offset)*1000 / cpu_khz; } + return base + offset; } EXPORT_SYMBOL(monotonic_clock); static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) { - static long lost_count; - static int warned; - - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING - "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " + static long lost_count; + static int warned; + if (report_lost_ticks) { + printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost); + print_symbol("rip %s)\n", regs->rip); + } + + if (lost_count == 1000 && !warned) { + printk(KERN_WARNING "warning: many lost ticks.\n" + KERN_WARNING "Your time source seems to be instable or " "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; + print_symbol("rip %s\n", regs->rip); + if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { + printk(KERN_WARNING "Falling back to HPET\n"); + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - + hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); + vxtime.mode = VXTIME_HPET; + do_gettimeoffset = do_gettimeoffset_hpet; + } + /* else should fall back to PIT, but code missing. */ + warned = 1; + } else + lost_count++; #ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - (like going into thermal throttle) - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) { - cpufreq_delayed_get(); - } + /* In some cases the CPU can change frequency without us noticing + Give cpufreq a change to catch up. */ + if ((lost_count+1) % 25 == 0) + cpufreq_delayed_get(); #endif } @@ -349,7 +344,7 @@ void main_timer_handler(struct pt_regs *regs) { static unsigned long rtc_update = 0; unsigned long tsc; - int delay, offset = 0, lost = 0; + int delay = 0, offset = 0, lost = 0; /* * Here we are in the timer irq handler. We have irqs locally disabled (so we @@ -370,7 +365,7 @@ void main_timer_handler(struct pt_regs *regs) */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; - } else { + } else if (!pmtmr_ioport) { spin_lock(&i8253_lock); outb_p(0x00, 0x43); delay = inb_p(0x40); @@ -472,7 +467,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static unsigned int cyc2ns_scale; +static unsigned int cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) @@ -509,43 +504,32 @@ unsigned long long sched_clock(void) static unsigned long get_cmos_time(void) { - unsigned int timeout = 1000000, year, mon, day, hour, min, sec; - unsigned char uip = 0, this = 0; + unsigned int year, mon, day, hour, min, sec; unsigned long flags; - -/* - * The Linux interpretation of the CMOS clock register contents: When the - * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the - * second which has precisely just started. Waiting for this can take up to 1 - * second, we timeout approximately after 2.4 seconds on a machine with - * standard 8.3 MHz ISA bus. - */ + unsigned extyear = 0; spin_lock_irqsave(&rtc_lock, flags); - while (timeout && (!uip || this)) { - uip |= this; - this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; - timeout--; - } - - /* - * Here we are safe to assume the registers won't change for a whole - * second, so we just go ahead and read them. - */ - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); + do { + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); +#ifdef CONFIG_ACPI + if (acpi_fadt.revision >= FADT2_REVISION_ID && + acpi_fadt.century) + extyear = CMOS_READ(acpi_fadt.century); +#endif + } while (sec != CMOS_READ(RTC_SECONDS)); spin_unlock_irqrestore(&rtc_lock, flags); /* * We know that x86-64 always uses BCD format, no need to check the * config register. - */ + */ BCD_TO_BIN(sec); BCD_TO_BIN(min); @@ -554,11 +538,17 @@ static unsigned long get_cmos_time(void) BCD_TO_BIN(mon); BCD_TO_BIN(year); - /* - * x86-64 systems only exists since 2002. - * This will work up to Dec 31, 2100 - */ - year += 2000; + if (extyear) { + BCD_TO_BIN(extyear); + year += extyear; + printk(KERN_INFO "Extended CMOS year: %d\n", extyear); + } else { + /* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ + year += 2000; + } return mktime(year, mon, day, hour, min, sec); } @@ -601,7 +591,8 @@ static void cpufreq_delayed_get(void) cpufreq_delayed_issched = 1; if (!warned) { warned = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); + printk(KERN_DEBUG + "Losing some ticks... checking if CPU frequency changed.\n"); } schedule_work(&cpufreq_delayed_get_work); } @@ -624,9 +615,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data[freq->cpu].loops_per_jiffy; #else - lpj = &boot_cpu_data.loops_per_jiffy; + lpj = &boot_cpu_data.loops_per_jiffy; #endif if (!ref_freq) { @@ -735,7 +726,7 @@ static __init int late_hpet_init(void) unsigned int ntimer; if (!vxtime.hpet_address) - return -1; + return 0; memset(&hd, 0, sizeof (hd)); @@ -763,9 +754,8 @@ static __init int late_hpet_init(void) int i; hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - - for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; - timer++, i++) + timer = &hpet->hpet_timers[2]; + for (i = 2; i < ntimer; timer++, i++) hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; @@ -898,6 +888,7 @@ static struct irqaction irq0 = { void __init time_init(void) { char *timename; + char *gtod; #ifdef HPET_HACK_ENABLE_DANGEROUS if (!vxtime.hpet_address) { @@ -921,12 +912,13 @@ void __init time_init(void) -xtime.tv_sec, -xtime.tv_nsec); if (!hpet_init()) - vxtime_hz = (1000000000000000L + hpet_period / 2) / - hpet_period; + vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; else vxtime.hpet_address = 0; if (hpet_use_timer) { + /* set tick_nsec to use the proper rate for HPET */ + tick_nsec = TICK_NSEC_HPET; cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER @@ -942,21 +934,19 @@ void __init time_init(void) timename = "PIT"; } - printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n", - vxtime_hz / 1000000, vxtime_hz % 1000000, timename); + vxtime.mode = VXTIME_TSC; + gtod = time_init_gtod(); + + printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", + vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz); - -#ifndef CONFIG_SMP - time_init_gtod(); -#endif } /* @@ -978,9 +968,9 @@ __cpuinit int unsynchronized_tsc(void) } /* - * Decide after all CPUs are booted what mode gettimeofday should use. + * Decide what mode gettimeofday should use. */ -void __init time_init_gtod(void) +__init static char *time_init_gtod(void) { char *timetype; @@ -988,7 +978,10 @@ void __init time_init_gtod(void) notsc = 1; if (vxtime.hpet_address && notsc) { timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; #ifdef CONFIG_X86_PM_TIMER @@ -1005,8 +998,7 @@ void __init time_init_gtod(void) timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } - - printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype); + return timetype; } __setup("report_lost_ticks", time_setup); @@ -1316,16 +1308,15 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) static int __init nohpet_setup(char *s) { nohpet = 1; - return 0; + return 1; } __setup("nohpet", nohpet_setup); - -static int __init notsc_setup(char *s) +int __init notsc_setup(char *s) { notsc = 1; - return 0; + return 1; } __setup("notsc", notsc_setup); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index ee1b2da9e5e..cea335e8746 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/moduleparam.h> #include <linux/nmi.h> #include <linux/kprobes.h> +#include <linux/kexec.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -47,8 +48,6 @@ #include <asm/proto.h> #include <asm/nmi.h> -extern struct gate_struct idt_table[256]; - asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); @@ -71,18 +70,20 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); -struct notifier_block *die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + vmalloc_sync_all(); + return atomic_notifier_chain_register(&die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&die_chain, nb); } +EXPORT_SYMBOL(unregister_die_notifier); static inline void conditional_sti(struct pt_regs *regs) { @@ -90,6 +91,22 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void preempt_conditional_sti(struct pt_regs *regs) +{ + preempt_disable(); + if (regs->eflags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_cli(struct pt_regs *regs) +{ + if (regs->eflags & X86_EFLAGS_IF) + local_irq_disable(); + /* Make sure to not schedule here because we could be running + on an exception stack. */ + preempt_enable_no_resched(); +} + static int kstack_depth_to_print = 10; #ifdef CONFIG_KALLSYMS @@ -108,7 +125,7 @@ int printk_address(unsigned long address) if (!modname) modname = delim = ""; return printk("<%016lx>{%s%s%s%s%+ld}", - address,delim,modname,delim,symname,offset); + address, delim, modname, delim, symname, offset); } #else int printk_address(unsigned long address) @@ -320,13 +337,12 @@ void show_registers(struct pt_regs *regs) show_stack(NULL, (unsigned long*)rsp); printk("\nCode: "); - if(regs->rip < PAGE_OFFSET) + if (regs->rip < PAGE_OFFSET) goto bad; - for(i=0;i<20;i++) - { + for (i=0; i<20; i++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->rip)[i])) { + if (__get_user(c, &((unsigned char*)regs->rip)[i])) { bad: printk(" Bad RIP value."); break; @@ -371,6 +387,7 @@ void out_of_line_bug(void) static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; +static unsigned int die_nest_count; unsigned __kprobes long oops_begin(void) { @@ -385,6 +402,7 @@ unsigned __kprobes long oops_begin(void) else spin_lock(&die_lock); } + die_nest_count++; die_owner = cpu; console_verbose(); bust_spinlocks(1); @@ -395,7 +413,13 @@ void __kprobes oops_end(unsigned long flags) { die_owner = -1; bust_spinlocks(0); - spin_unlock_irqrestore(&die_lock, flags); + die_nest_count--; + if (die_nest_count) + /* We still own the lock */ + local_irq_restore(flags); + else + /* Nest count reaches zero, release the lock. */ + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) panic("Oops"); } @@ -420,6 +444,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err) printk(KERN_ALERT "RIP "); printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); + if (kexec_should_crash(current)) + crash_kexec(regs); } void die(const char * str, struct pt_regs * regs, long err) @@ -442,10 +468,14 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs) */ printk(str, safe_smp_processor_id()); show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); oops_end(flags); + nmi_exit(); + local_irq_enable(); do_exit(SIGSEGV); } @@ -455,8 +485,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { struct task_struct *tsk = current; - conditional_sti(regs); - tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; @@ -465,7 +493,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, printk(KERN_INFO "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, str, - regs->rip,regs->rsp,error_code); + regs->rip, regs->rsp, error_code); if (info) force_sig_info(signr, info, tsk); @@ -479,9 +507,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->rip); - if (fixup) { + if (fixup) regs->rip = fixup->fixup; - } else + else die(str, regs, error_code); return; } @@ -493,6 +521,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ } @@ -507,6 +536,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ } @@ -520,7 +550,17 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR(18, SIGSEGV, "reserved", reserved) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) + +/* Runs on IST stack */ +asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_TRAP, "stack segment", regs, error_code, + 12, SIGBUS) == NOTIFY_STOP) + return; + preempt_conditional_sti(regs); + do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); +} asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) { @@ -554,7 +594,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, printk(KERN_INFO "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, - regs->rip,regs->rsp,error_code); + regs->rip, regs->rsp, error_code); force_sig(SIGSEGV, tsk); return; @@ -654,8 +694,9 @@ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } + preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - return; + preempt_conditional_cli(regs); } /* Help handler running on IST stack to switch back to user stack @@ -693,7 +734,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, SIGTRAP) == NOTIFY_STOP) return; - conditional_sti(regs); + preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -738,11 +779,13 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, clear_dr7: set_debugreg(0UL, 7); + preempt_conditional_cli(regs); return; clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->eflags &= ~TF_MASK; + preempt_conditional_cli(regs); } static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) @@ -958,14 +1001,14 @@ void __init trap_init(void) static int __init oops_dummy(char *s) { panic_on_oops = 1; - return -1; + return 1; } __setup("oops=", oops_dummy); static int __init kstack_setup(char *s) { kstack_depth_to_print = simple_strtoul(s,NULL,0); - return 0; + return 1; } __setup("kstack=", kstack_setup); diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 74db0062d4a..b81f473c4a1 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -20,6 +20,12 @@ SECTIONS phys_startup_64 = startup_64 - LOAD_OFFSET; _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + /* First the code that has to be first for bootstrapping */ + *(.bootstrap.text) + /* Then all the functions that are "hot" in profiles, to group them + onto the same hugetlb entry */ + #include "functionlist" + /* Then the rest */ *(.text) SCHED_TEXT LOCK_TEXT @@ -59,7 +65,7 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 3496abc8d37..1def21c9f7c 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -102,8 +102,6 @@ EXPORT_SYMBOL(cpu_callout_map); EXPORT_SYMBOL(screen_info); #endif -EXPORT_SYMBOL(get_wchan); - EXPORT_SYMBOL(rtc_lock); EXPORT_SYMBOL_GPL(set_nmi_callback); @@ -114,7 +112,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); #undef memcpy #undef memset #undef memmove -#undef strlen extern void * memset(void *,int,__kernel_size_t); extern size_t strlen(const char *); @@ -123,7 +120,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t); extern void * __memcpy(void *,const void *,__kernel_size_t); EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); @@ -143,16 +139,12 @@ EXPORT_SYMBOL(rwsem_down_write_failed_thunk); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(die_chain); -EXPORT_SYMBOL(register_die_notifier); #ifdef CONFIG_SMP EXPORT_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(smp_num_siblings); #endif -extern void do_softirq_thunk(void); -EXPORT_SYMBOL(do_softirq_thunk); - #ifdef CONFIG_BUG EXPORT_SYMBOL(out_of_line_bug); #endif |