From ad9e39c70f46c5e17b1ed5912e8693454fec1455 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 6 Feb 2008 13:57:46 -0800 Subject: [IA64] Wire up timerfd_{create,settime,gettime} syscalls Add ia64 hooks for the new syscalls that were added in commit 4d672e7ac79b5ec5cdc90e450823441e20464691 Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index f5d3efbfbed..3c331c464b4 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1573,7 +1573,7 @@ sys_call_table: data8 sys_fchmodat data8 sys_faccessat data8 sys_pselect6 - data8 sys_ppoll + data8 sys_ppoll // 1295 data8 sys_unshare data8 sys_splice data8 sys_set_robust_list @@ -1588,5 +1588,8 @@ sys_call_table: data8 sys_signalfd data8 sys_ni_syscall data8 sys_eventfd + data8 sys_timerfd_create // 1310 + data8 sys_timerfd_settime + data8 sys_timerfd_gettime .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls -- cgit v1.2.3 From 5aa92ffda1b6244b4a248df0b95c07d183ab96d2 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 12 Dec 2007 15:21:16 +0100 Subject: [IA64] Rename TIF_PERFMON_WORK back to TIF_NOTIFY_RESUME Since the RSE synchronization will need a TIF_ flag, but all work-to-be-done bits are already used, so we have to multiplex TIF_NOTIFY_RESUME again. Signed-off-by: Shaohua Li Signed-off-by: Petr Tesarik Signed-off-by: Tony Luck --- arch/ia64/kernel/perfmon.c | 21 +++------------------ arch/ia64/kernel/process.c | 9 +++++++++ 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 78acd9fe97e..f6b99719f10 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -585,21 +585,6 @@ pfm_put_task(struct task_struct *task) if (task != current) put_task_struct(task); } -static inline void -pfm_set_task_notify(struct task_struct *task) -{ - struct thread_info *info; - - info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); - set_bit(TIF_PERFMON_WORK, &info->flags); -} - -static inline void -pfm_clear_task_notify(void) -{ - clear_thread_flag(TIF_PERFMON_WORK); -} - static inline void pfm_reserve_page(unsigned long a) { @@ -3724,7 +3709,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) PFM_SET_WORK_PENDING(task, 1); - pfm_set_task_notify(task); + tsk_set_notify_resume(task); /* * XXX: send reschedule if task runs on another CPU @@ -5082,7 +5067,7 @@ pfm_handle_work(void) PFM_SET_WORK_PENDING(current, 0); - pfm_clear_task_notify(); + tsk_clear_notify_resume(current); regs = task_pt_regs(current); @@ -5450,7 +5435,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str * when coming from ctxsw, current still points to the * previous task, therefore we must work with task and not current. */ - pfm_set_task_notify(task); + tsk_set_notify_resume(task); } /* * defer until state is changed (shorten spin window). the context is locked diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 7377d323131..5c9efe62656 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -157,6 +157,15 @@ show_regs (struct pt_regs *regs) show_stack(NULL, NULL); } +void tsk_clear_notify_resume(struct task_struct *tsk) +{ +#ifdef CONFIG_PERFMON + if (tsk->thread.pfm_needs_checking) + return; +#endif + clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); +} + void do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall) { -- cgit v1.2.3 From 3b2ce0b17824c42bc2e46f7dd903b4acf5e9fff9 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 12 Dec 2007 15:23:34 +0100 Subject: [IA64] Synchronize kernel RSE to user-space and back This is base kernel patch for ptrace RSE bug. It's basically a backport from the utrace RSE patch I sent out several weeks ago. please review. when a thread is stopped (ptraced), debugger might change thread's user stack (change memory directly), and we must avoid the RSE stored in kernel to override user stack (user space's RSE is newer than kernel's in the case). To workaround the issue, we copy kernel RSE to user RSE before the task is stopped, so user RSE has updated data. we then copy user RSE to kernel after the task is resummed from traced stop and kernel will use the newer RSE to return to user. Signed-off-by: Shaohua Li Signed-off-by: Petr Tesarik CC: Roland McGrath Signed-off-by: Tony Luck --- arch/ia64/kernel/process.c | 6 ++++ arch/ia64/kernel/ptrace.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 5c9efe62656..be6c6f7be02 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -163,6 +163,8 @@ void tsk_clear_notify_resume(struct task_struct *tsk) if (tsk->thread.pfm_needs_checking) return; #endif + if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) + return; clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); } @@ -184,6 +186,10 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall /* deal with pending signal delivery */ if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) ia64_do_signal(scr, in_syscall); + + /* copy user rbs to kernel rbs */ + if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) + ia64_sync_krbs(); } static int pal_halt = 1; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2e96f17b2f3..2de5a524a0e 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -547,6 +547,72 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, return 0; } +static long +ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from user rbs to kernel rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + if (access_process_vm(child, addr, &val, sizeof(val), 0) + != sizeof(val)) + return -EIO; + + ret = ia64_poke(child, sw, user_rbs_end, addr, val); + if (ret < 0) + return ret; + } + return 0; +} + +typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + +static void do_sync_rbs(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + unsigned long urbs_end; + syncfunc_t fn = arg; + + if (unw_unwind_to_user(info) < 0) + return; + pt = task_pt_regs(info->task); + urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL); + + fn(info->task, info->sw, pt->ar_bspstore, urbs_end); +} + +/* + * when a thread is stopped (ptraced), debugger might change thread's user + * stack (change memory directly), and we must avoid the RSE stored in kernel + * to override user stack (user space's RSE is newer than kernel's in the + * case). To workaround the issue, we copy kernel RSE to user RSE before the + * task is stopped, so user RSE has updated data. we then copy user RSE to + * kernel after the task is resummed from traced stop and kernel will use the + * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need + * synchronize user RSE to kernel. + */ +void ia64_ptrace_stop(void) +{ + if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) + return; + tsk_set_notify_resume(current); + unw_init_running(do_sync_rbs, ia64_sync_user_rbs); +} + +/* + * This is called to read back the register backing store. + */ +void ia64_sync_krbs(void) +{ + clear_tsk_thread_flag(current, TIF_RESTORE_RSE); + tsk_clear_notify_resume(current); + + unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); +} + static inline int thread_matches (struct task_struct *thread, unsigned long addr) { @@ -1422,6 +1488,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) struct task_struct *child; struct switch_stack *sw; long ret; + struct unw_frame_info info; lock_kernel(); ret = -EPERM; @@ -1453,6 +1520,8 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); + if (!ret) + arch_ptrace_attach(child); goto out_tsk; } @@ -1481,6 +1550,11 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) /* write the word at location addr */ urbs_end = ia64_get_user_rbs_end(child, pt, NULL); ret = ia64_poke(child, sw, urbs_end, addr, data); + + /* Make sure user RBS has the latest data */ + unw_init_from_blocked_task(&info, child); + do_sync_rbs(&info, ia64_sync_user_rbs); + goto out_tsk; case PTRACE_PEEKUSR: @@ -1634,6 +1708,10 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, && (current->ptrace & PT_PTRACED)) syscall_trace(); + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); + if (unlikely(current->audit_context)) { long syscall; int arch; @@ -1671,4 +1749,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, || test_thread_flag(TIF_SINGLESTEP)) && (current->ptrace & PT_PTRACED)) syscall_trace(); + + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); } -- cgit v1.2.3 From aa91a2e90044b88228bdb0620e771f2ea7798804 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 12 Dec 2007 15:24:25 +0100 Subject: [IA64] Synchronize RBS on PTRACE_ATTACH When attaching to a stopped process, the RSE must be explicitly synced to user-space, so the debugger can read the correct values. Signed-off-by: Petr Tesarik CC: Roland McGrath Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2de5a524a0e..331d6768b5d 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -613,6 +613,63 @@ void ia64_sync_krbs(void) unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); } +/* + * After PTRACE_ATTACH, a thread's register backing store area in user + * space is assumed to contain correct data whenever the thread is + * stopped. arch_ptrace_stop takes care of this on tracing stops. + * But if the child was already stopped for job control when we attach + * to it, then it might not ever get into ptrace_stop by the time we + * want to examine the user memory containing the RBS. + */ +void +ptrace_attach_sync_user_rbs (struct task_struct *child) +{ + int stopped = 0; + struct unw_frame_info info; + + /* + * If the child is in TASK_STOPPED, we need to change that to + * TASK_TRACED momentarily while we operate on it. This ensures + * that the child won't be woken up and return to user mode while + * we are doing the sync. (It can only be woken up for SIGKILL.) + */ + + read_lock(&tasklist_lock); + if (child->signal) { + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_STOPPED && + !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { + tsk_set_notify_resume(child); + + child->state = TASK_TRACED; + stopped = 1; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); + + if (!stopped) + return; + + unw_init_from_blocked_task(&info, child); + do_sync_rbs(&info, ia64_sync_user_rbs); + + /* + * Now move the child back into TASK_STOPPED if it should be in a + * job control stop, so that SIGCONT can be used to wake it up. + */ + read_lock(&tasklist_lock); + if (child->signal) { + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_TRACED && + (child->signal->flags & SIGNAL_STOP_STOPPED)) { + child->state = TASK_STOPPED; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); +} + static inline int thread_matches (struct task_struct *thread, unsigned long addr) { -- cgit v1.2.3 From 427639354ff346710012b53e1ceed5e3f3200e0c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 8 Feb 2008 11:53:09 -0800 Subject: [IA64] Simplify cpu_idle_wait This is just Venki's patch[*] for x86 ported to ia64. * http://marc.info/?l=linux-kernel&m=120249201318159&w=2 Acked-by: Venkatesh Pallipadi Signed-off-by: Tony Luck --- arch/ia64/kernel/process.c | 44 +++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index be6c6f7be02..49937a383b2 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -52,7 +52,6 @@ #include "sigframe.h" void (*ia64_mark_idle)(int); -static DEFINE_PER_CPU(unsigned int, cpu_idle_state); unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -254,33 +253,23 @@ static inline void play_dead(void) } #endif /* CONFIG_HOTPLUG_CPU */ -void cpu_idle_wait(void) +static void do_nothing(void *unused) { - unsigned int cpu, this_cpu = get_cpu(); - cpumask_t map; - cpumask_t tmp = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); - put_cpu(); - - cpus_clear(map); - for_each_online_cpu(cpu) { - per_cpu(cpu_idle_state, cpu) = 1; - cpu_set(cpu, map); - } - - __get_cpu_var(cpu_idle_state) = 0; +} - wmb(); - do { - ssleep(1); - for_each_online_cpu(cpu) { - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) - cpu_clear(cpu, map); - } - cpus_and(map, map, cpu_online_map); - } while (!cpus_empty(map)); - set_cpus_allowed(current, tmp); +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 0, 1); } EXPORT_SYMBOL_GPL(cpu_idle_wait); @@ -308,9 +297,6 @@ cpu_idle (void) #ifdef CONFIG_SMP min_xtp(); #endif - if (__get_cpu_var(cpu_idle_state)) - __get_cpu_var(cpu_idle_state) = 0; - rmb(); if (mark_idle) (*mark_idle)(1); -- cgit v1.2.3 From 785285fc8bc7f846ab68a063a8bf5a009d67725d Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Tue, 5 Feb 2008 17:12:32 -0600 Subject: [IA64] Fix large MCA bootmem allocation The MCA code allocates bootmem memory for NR_CPUS, regardless of how many cpus the system actually has. This change allocates memory only for cpus that actually exist. On my test system with NR_CPUS = 1024, reserved memory was reduced by 130944k. Before: Memory: 27886976k/28111168k available (8282k code, 242304k reserved, 5928k data, 1792k init) After: Memory: 28017920k/28111168k available (8282k code, 111360k reserved, 5928k data, 1792k init) Signed-off-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/kernel/mca.c | 55 ++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 846e7e036b1..6e17aed5313 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -17,7 +17,7 @@ * Copyright (C) 2000 Intel * Copyright (C) Chuck Fleckenstein * - * Copyright (C) 1999, 2004 Silicon Graphics, Inc. + * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc. * Copyright (C) Vijay Chander * * Copyright (C) 2006 FUJITSU LIMITED @@ -1762,11 +1762,8 @@ format_mca_init_stack(void *mca_data, unsigned long offset, /* Caller prevents this from being called after init */ static void * __init_refok mca_bootmem(void) { - void *p; - - p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + - KERNEL_STACK_SIZE); - return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE); + return __alloc_bootmem(sizeof(struct ia64_mca_cpu), + KERNEL_STACK_SIZE, 0); } /* Do per-CPU MCA-related initialization. */ @@ -1774,33 +1771,33 @@ void __cpuinit ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; + void *data; + long sz = sizeof(struct ia64_mca_cpu); + int cpu = smp_processor_id(); static int first_time = 1; - if (first_time) { - void *mca_data; - int cpu; - - first_time = 0; - mca_data = mca_bootmem(); - for (cpu = 0; cpu < NR_CPUS; cpu++) { - format_mca_init_stack(mca_data, - offsetof(struct ia64_mca_cpu, mca_stack), - "MCA", cpu); - format_mca_init_stack(mca_data, - offsetof(struct ia64_mca_cpu, init_stack), - "INIT", cpu); - __per_cpu_mca[cpu] = __pa(mca_data); - mca_data += sizeof(struct ia64_mca_cpu); - } - } - /* - * The MCA info structure was allocated earlier and its - * physical address saved in __per_cpu_mca[cpu]. Copy that - * address * to ia64_mca_data so we can access it as a per-CPU - * variable. + * Structure will already be allocated if cpu has been online, + * then offlined. */ - __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; + if (__per_cpu_mca[cpu]) { + data = __va(__per_cpu_mca[cpu]); + } else { + if (first_time) { + data = mca_bootmem(); + first_time = 0; + } else + data = page_address(alloc_pages_node(numa_node_id(), + GFP_KERNEL, get_order(sz))); + if (!data) + panic("Could not allocate MCA memory for cpu %d\n", + cpu); + } + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack), + "MCA", cpu); + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), + "INIT", cpu); + __get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data); /* * Stash away a copy of the PTE needed to map the per-CPU page. -- cgit v1.2.3