diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 13 | ||||
-rw-r--r-- | arch/x86_64/Kconfig.debug | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 183 | ||||
-rw-r--r-- | arch/x86_64/kernel/ptrace.c | 43 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86_64/oprofile/Kconfig | 6 |
6 files changed, 105 insertions, 152 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 21afa69a086..4cce2f6f170 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -532,8 +532,21 @@ source "drivers/firmware/Kconfig" source fs/Kconfig +menu "Instrumentation Support" + depends on EXPERIMENTAL + source "arch/x86_64/oprofile/Kconfig" +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". +endmenu + source "arch/x86_64/Kconfig.debug" source "security/Kconfig" diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 9cf1410d2f5..d584ecc27ea 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -33,16 +33,6 @@ config IOMMU_DEBUG options. See Documentation/x86_64/boot-options.txt for more details. -config KPROBES - bool "Kprobes" - depends on DEBUG_KERNEL - help - Kprobes allows you to trap at almost any kernel address and - execute a callback function. register_kprobe() establishes - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - config IOMMU_LEAK bool "IOMMU leak tracing" depends on DEBUG_KERNEL diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 76a28b007be..dddeb678b44 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -34,7 +34,6 @@ #include <linux/config.h> #include <linux/kprobes.h> #include <linux/ptrace.h> -#include <linux/spinlock.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/preempt.h> @@ -44,17 +43,10 @@ #include <asm/kdebug.h> static DECLARE_MUTEX(kprobe_mutex); - -static struct kprobe *current_kprobe; -static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; -static struct kprobe *kprobe_prev; -static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; -static struct pt_regs jprobe_saved_regs; -static long *jprobe_saved_rsp; void jprobe_return_end(void); -/* copy of the kernel stack at the probe fire time */ -static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * returns non-zero if opcode modifies the interrupt flag. @@ -236,29 +228,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) up(&kprobe_mutex); } -static inline void save_previous_kprobe(void) +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { - kprobe_prev = current_kprobe; - kprobe_status_prev = kprobe_status; - kprobe_old_rflags_prev = kprobe_old_rflags; - kprobe_saved_rflags_prev = kprobe_saved_rflags; + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; + kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; } -static inline void restore_previous_kprobe(void) +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - current_kprobe = kprobe_prev; - kprobe_status = kprobe_status_prev; - kprobe_old_rflags = kprobe_old_rflags_prev; - kprobe_saved_rflags = kprobe_saved_rflags_prev; + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; + kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { - current_kprobe = p; - kprobe_saved_rflags = kprobe_old_rflags + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags = (regs->eflags & (TF_MASK | IF_MASK)); if (is_IF_modifier(p->ainsn.insn)) - kprobe_saved_rflags &= ~IF_MASK; + kcb->kprobe_saved_rflags &= ~IF_MASK; } static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -272,6 +265,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } +/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -292,32 +286,30 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, } } -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. - */ int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); + struct kprobe_ctlblk *kcb; - /* We're in an interrupt, but this is clear and BUG()-safe. */ + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Check we're not actually recursing */ if (kprobe_running()) { - /* We *are* holding lock here, so this is safe. - Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS && + if (kcb->kprobe_status == KPROBE_HIT_SS && *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; - regs->eflags |= kprobe_saved_rflags; - unlock_kprobes(); + regs->eflags |= kcb->kprobe_saved_rflags; goto no_kprobe; - } else if (kprobe_status == KPROBE_HIT_SSDONE) { + } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { /* TODO: Provide re-entrancy from * post_kprobes_handler() and avoid exception * stack corruption while single-stepping on @@ -325,6 +317,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ arch_disarm_kprobe(p); regs->rip = (unsigned long)p->addr; + reset_current_kprobe(); ret = 1; } else { /* We have reentered the kprobe_handler(), since @@ -334,27 +327,24 @@ int __kprobes kprobe_handler(struct pt_regs *regs) * of the new probe without calling any user * handlers. */ - save_previous_kprobe(); - set_current_kprobe(p, regs); + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); p->nmissed++; prepare_singlestep(p, regs); - kprobe_status = KPROBE_REENTER; + kcb->kprobe_status = KPROBE_REENTER; return 1; } } else { - p = current_kprobe; + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } } - /* If it's not ours, can't be delete race, (we hold lock). */ goto no_kprobe; } - lock_kprobes(); p = get_kprobe(addr); if (!p) { - unlock_kprobes(); if (*addr != BREAKPOINT_INSTRUCTION) { /* * The breakpoint instruction was removed right @@ -372,8 +362,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p, regs); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -381,7 +371,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) ss_probe: prepare_singlestep(p, regs); - kprobe_status = KPROBE_HIT_SS; + kcb->kprobe_status = KPROBE_HIT_SS; return 1; no_kprobe: @@ -409,9 +399,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct kretprobe_instance *ri = NULL; struct hlist_head *head; struct hlist_node *node, *tmp; - unsigned long orig_ret_address = 0; + unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); /* @@ -450,13 +441,14 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); regs->rip = orig_ret_address; - unlock_kprobes(); + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); /* * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) */ return 1; } @@ -483,7 +475,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, + struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = (unsigned long *)regs->rsp; unsigned long next_rip = 0; @@ -498,7 +491,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) switch (*insn) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); - *tos |= kprobe_old_rflags; + *tos |= kcb->kprobe_old_rflags; break; case 0xc3: /* ret/lret */ case 0xcb: @@ -537,30 +530,28 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) } } -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. And we hold kprobe lock. - */ int __kprobes post_kprobe_handler(struct pt_regs *regs) { - if (!kprobe_running()) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) return 0; - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { - kprobe_status = KPROBE_HIT_SSDONE; - current_kprobe->post_handler(current_kprobe, regs, 0); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); } - resume_execution(current_kprobe, regs); - regs->eflags |= kprobe_saved_rflags; + resume_execution(cur, regs, kcb); + regs->eflags |= kcb->kprobe_saved_rflags; /* Restore the original saved kprobes variables and continue. */ - if (kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); goto out; - } else { - unlock_kprobes(); } + reset_current_kprobe(); out: preempt_enable_no_resched(); @@ -575,18 +566,19 @@ out: return 1; } -/* Interrupts disabled, kprobe_lock held. */ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { - if (current_kprobe->fault_handler - && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; - if (kprobe_status & KPROBE_HIT_SS) { - resume_execution(current_kprobe, regs); - regs->eflags |= kprobe_old_rflags; + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs, kcb); + regs->eflags |= kcb->kprobe_old_rflags; - unlock_kprobes(); + reset_current_kprobe(); preempt_enable_no_resched(); } return 0; @@ -599,39 +591,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; case DIE_DEBUG: if (post_kprobe_handler(args->regs)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; case DIE_GPF: - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; - break; case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - return NOTIFY_DONE; + return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - jprobe_saved_regs = *regs; - jprobe_saved_rsp = (long *) regs->rsp; - addr = (unsigned long)jprobe_saved_rsp; + kcb->jprobe_saved_regs = *regs; + kcb->jprobe_saved_rsp = (long *) regs->rsp; + addr = (unsigned long)(kcb->jprobe_saved_rsp); /* * As Linus pointed out, gcc assumes that the callee * owns the argument space and could overwrite it, e.g. @@ -639,7 +633,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * we also save and restore enough stack bytes to cover * the argument area. */ - memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); + memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, + MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; regs->rip = (unsigned long)(jp->entry); return 1; @@ -647,36 +642,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) void __kprobes jprobe_return(void) { - preempt_enable_no_resched(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + asm volatile (" xchg %%rbx,%%rsp \n" " int3 \n" " .globl jprobe_return_end \n" " jprobe_return_end: \n" " nop \n"::"b" - (jprobe_saved_rsp):"memory"); + (kcb->jprobe_saved_rsp):"memory"); } int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->rip - 1); - unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; + unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); struct jprobe *jp = container_of(p, struct jprobe, kp); if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if ((long *)regs->rsp != jprobe_saved_rsp) { + if ((long *)regs->rsp != kcb->jprobe_saved_rsp) { struct pt_regs *saved_regs = - container_of(jprobe_saved_rsp, struct pt_regs, rsp); + container_of(kcb->jprobe_saved_rsp, + struct pt_regs, rsp); printk("current rsp %p does not match saved rsp %p\n", - (long *)regs->rsp, jprobe_saved_rsp); + (long *)regs->rsp, kcb->jprobe_saved_rsp); printk("Saved registers for jprobe %p\n", jp); show_registers(saved_regs); printk("Current registers\n"); show_registers(regs); BUG(); } - *regs = jprobe_saved_regs; - memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, + *regs = kcb->jprobe_saved_regs; + memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); return 1; } return 0; diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index bbf64b59a21..a87b6cebe80 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -313,48 +313,11 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno) } -asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; long i, ret; unsigned ui; - /* This lock_kernel fixes a subtle race with suid exec */ - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -608,10 +571,6 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 658a81b33f3..4b5b088ec10 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -65,8 +65,6 @@ int smp_num_siblings = 1; /* Package ID of each logical CPU */ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; -EXPORT_SYMBOL(phys_proc_id); -EXPORT_SYMBOL(cpu_core_id); /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; diff --git a/arch/x86_64/oprofile/Kconfig b/arch/x86_64/oprofile/Kconfig index 5ade19801b9..d8a84088471 100644 --- a/arch/x86_64/oprofile/Kconfig +++ b/arch/x86_64/oprofile/Kconfig @@ -1,7 +1,3 @@ - -menu "Profiling support" - depends on EXPERIMENTAL - config PROFILING bool "Profiling support (EXPERIMENTAL)" help @@ -19,5 +15,3 @@ config OPROFILE If unsure, say N. -endmenu - |