diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 391 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 31 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 231 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 73 |
13 files changed, 645 insertions, 164 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f3477bb8456..b67efd1cf59 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..51d959528b1 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,391 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include <linux/irqflags.h> +#include <linux/notifier.h> +#include <linux/kallsyms.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kdebug.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/debugreg.h> + +/* Unmasked kernel DR7 value */ +static unsigned long kdr7; + +/* + * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. + * Used to clear and verify the status of bits corresponding to DR0 - DR3 + */ +static const unsigned long dr7_masks[HBP_NUM] = { + 0x000f0003, /* LEN0, R/W0, G0, L0 */ + 0x00f0000c, /* LEN1, R/W1, G1, L1 */ + 0x0f000030, /* LEN2, R/W2, G2, L2 */ + 0xf00000c0 /* LEN3, R/W3, G3, L3 */ +}; + + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | + DR_GLOBAL_SLOWDOWN; + return bp_info; +} + +void arch_update_kernel_hw_breakpoint(void *unused) +{ + struct hw_breakpoint *bp; + int i, cpu = get_cpu(); + unsigned long temp_kdr7 = 0; + + /* Don't allow debug exceptions while we update the registers */ + set_debugreg(0UL, 7); + + for (i = hbp_kernel_pos; i < HBP_NUM; i++) { + per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; + if (bp) { + temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); + set_debugreg(bp->info.address, i); + } + } + + /* No need to set DR6. Update the debug registers with kernel-space + * breakpoint values from kdr7 and user-space requests from the + * current process + */ + kdr7 = temp_kdr7; + set_debugreg(kdr7 | current->thread.debugreg7, 7); + put_cpu(); +} + +/* + * Install the thread breakpoints in their debug registers. + */ +void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + + switch (hbp_kernel_pos) { + case 4: + set_debugreg(thread->debugreg[3], 3); + case 3: + set_debugreg(thread->debugreg[2], 2); + case 2: + set_debugreg(thread->debugreg[1], 1); + case 1: + set_debugreg(thread->debugreg[0], 0); + default: + break; + } + + /* No need to set DR6 */ + set_debugreg((kdr7 | thread->debugreg7), 7); +} + +/* + * Install the debug register values for just the kernel, no thread. + */ +void arch_uninstall_thread_hw_breakpoint() +{ + /* Clear the user-space portion of debugreg7 by setting only kdr7 */ + set_debugreg(kdr7, 7); + +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case HW_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case HW_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case HW_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +{ + /* + * User-space requests will always have the address field populated + * Symbol names from user-space are rejected + */ + if (tsk && bp->info.name) + return -EINVAL; + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (bp->info.name) + bp->info.address = (unsigned long) + kallsyms_lookup_name(bp->info.name); + if (bp->info.address) + return 0; + return -EINVAL; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk) +{ + unsigned int align; + int ret = -EINVAL; + + switch (bp->info.type) { + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + case HW_BREAKPOINT_EXECUTE: + if ((!arch_check_va_in_userspace(bp->info.address, + bp->info.len)) && + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) + return ret; + break; + case HW_BREAKPOINT_WRITE: + break; + case HW_BREAKPOINT_RW: + break; + default: + return ret; + } + + switch (bp->info.len) { + case HW_BREAKPOINT_LEN_1: + align = 0; + break; + case HW_BREAKPOINT_LEN_2: + align = 1; + break; + case HW_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->triggered) + ret = arch_store_info(bp, tsk); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (bp->info.address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(bp->info.address, + bp->info.len)) + return -EFAULT; + } + return 0; +} + +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + struct hw_breakpoint *bp = thread->hbp[pos]; + + thread->debugreg7 &= ~dr7_masks[pos]; + if (bp) { + thread->debugreg[pos] = bp->info.address; + thread->debugreg7 |= encode_dr7(pos, bp->info.len, + bp->info.type); + } else + thread->debugreg[pos] = 0; +} + +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + thread->debugreg7 = 0; + for (i = 0; i < HBP_NUM; i++) + thread->debugreg[i] = 0; +} + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap<n> set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct hw_breakpoint *bp; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + /* Lazy debug register switching */ + if (!test_tsk_thread_flag(current, TIF_DEBUG)) + arch_uninstall_thread_hw_breakpoint(); + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + /* + * Find the corresponding hw_breakpoint structure and + * invoke its triggered callback. + */ + if (i >= hbp_kernel_pos) + bp = per_cpu(this_hbp_kernel[i], cpu); + else { + bp = current->thread.hbp[i]; + if (bp) + rc = NOTIFY_DONE; + } + /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* + * bp can be NULL due to lazy debug register switching + * or due to the delay between updates of hbp_kernel_pos + * and this_hbp_kernel. + */ + if (!bp) + continue; + + (bp->triggered)(bp, args->regs); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu(); + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3..34e86b67550 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include <linux/smp.h> #include <linux/nmi.h> +#include <asm/debugreg.h> #include <asm/apicdef.h> #include <asm/system.h> @@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b00..b5b1848c533 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -54,6 +54,7 @@ #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/alternative.h> +#include <asm/debugreg.h> void jprobe_return_end(void); @@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d0013..c843f8406da 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include <asm/desc.h> #include <asm/system.h> #include <asm/cacheflush.h> +#include <asm/debugreg.h> static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e9..4a8bb82248a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/debugreg.h> static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994dd6a4a2a..fc6e4b773fc 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -17,6 +17,8 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/ds.h> +#include <asm/debugreg.h> +#include <asm/hw_breakpoint.h> unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -48,6 +50,8 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -108,12 +112,8 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -195,16 +195,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524984a..00a8fe4c58b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -58,6 +58,8 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> +#include <asm/hw_breakpoint.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -262,7 +264,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); @@ -282,10 +290,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); +out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -424,6 +435,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb5407b9..89c46f1259d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,6 +52,8 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> +#include <asm/hw_breakpoint.h> asmlinkage extern void ret_from_fork(void); @@ -245,6 +247,8 @@ void release_thread(struct task_struct *dead_task) BUG(); } } + if (unlikely(dead_task->thread.debugreg7)) + flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(me, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -344,6 +354,9 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); + return err; } @@ -489,6 +502,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); + return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c1..b457f78b7db 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -34,6 +34,7 @@ #include <asm/prctl.h> #include <asm/proto.h> #include <asm/ds.h> +#include <asm/hw_breakpoint.h> #include <trace/syscall.h> @@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target, } /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, + unsigned *type) { - switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; - } - return 0; + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) { + struct thread_struct *thread = &(current->thread); int i; - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < hbp_kernel_pos; i++) + /* + * We will check bp->info.address against the address stored in + * thread's hbp structure and not debugreg[i]. This is to ensure + * that the corresponding bit for 'i' in DR7 register is enabled + */ + if (bp->info.address == thread->hbp[i]->info.address) + break; - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + thread->debugreg6 |= (DR_TRAP0 << i); +} - switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7 = thread->debugreg7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct hw_breakpoint *bp; + + data &= ~DR_CONTROL_RESERVED; +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->hbp[i]; + + if (!enabled) { + if (bp) { + /* Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + unregister_user_hw_breakpoint(tsk, bp); + kfree(bp); + } + continue; + } + if (!bp) { + rc = -ENOMEM; + bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (bp) { + bp->info.address = thread->debugreg[i]; + bp->triggered = ptrace_triggered; + bp->info.len = len; + bp->info.type = type; + rc = register_user_hw_breakpoint(tsk, bp); + if (rc) + kfree(bp); + } + } else + rc = modify_user_hw_breakpoint(tsk, bp); + if (rc) + break; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; + + if (n < HBP_NUM) + val = thread->debugreg[n]; + else if (n == 6) + val = thread->debugreg6; + else if (n == 7) + val = thread->debugreg7; + return val; +} - case 7: - /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) - */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + tsk->thread.debugreg6 = val; + goto ret_path; } + if (n < HBP_NUM) { + if (thread->hbp[n]) { + if (arch_check_va_in_userspace(val, + thread->hbp[n]->info.len) == 0) { + rc = -EIO; + goto ret_path; + } + thread->hbp[n]->info.address = val; + } + thread->debugreg[n] = val; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); - return 0; +ret_path: + return rc; } /* diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94..0f89a4f20db 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda69ee6..dee0f3d814a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -63,6 +63,7 @@ #include <asm/apic.h> #include <asm/setup.h> #include <asm/uv/uv.h> +#include <asm/debugreg.h> #include <linux/mc146818rtc.h> #include <asm/smpboot_hooks.h> @@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused) setup_secondary_clock(); wmb(); + load_debug_registers(); cpu_idle(); } @@ -1254,6 +1256,7 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5f935f0d586..286d64eba31 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -530,77 +530,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); /* Catch kmemcheck conditions first of all! */ - if (condition & DR_STEP && kmemcheck_trap(regs)) + if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } |