diff options
Diffstat (limited to 'arch/s390')
70 files changed, 1847 insertions, 294 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2eca5fe0e75..a14dba0e4d6 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -82,6 +82,11 @@ config S390 select USE_GENERIC_SMP_HELPERS if SMP select HAVE_SYSCALL_WRAPPERS select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FTRACE_SYSCALLS + select HAVE_DYNAMIC_FTRACE + select HAVE_FUNCTION_GRAPH_TRACER select HAVE_DEFAULT_NO_SPIN_MUTEXES select HAVE_OPROFILE select HAVE_KPROBES @@ -343,6 +348,9 @@ config ARCH_ENABLE_MEMORY_HOTPLUG config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y +config ARCH_HIBERNATION_POSSIBLE + def_bool y if 64BIT + source "mm/Kconfig" comment "I/O subsystem configuration" @@ -567,6 +575,30 @@ bool "s390 guest support for KVM (EXPERIMENTAL)" the KVM hypervisor. This will add detection for KVM as well as a virtio transport. If KVM is detected, the virtio console will be the default console. + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. + +endmenu + +menu "Power Management" + +source "kernel/power/Kconfig" + endmenu source "net/Kconfig" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 578c61f15a4..0ff387cebf8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -88,7 +88,9 @@ LDFLAGS_vmlinux := -e start head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ + arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ \ + arch/s390/power/ + libs-y += arch/s390/lib/ drivers-y += drivers/s390/ drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 1dfc7100c7e..264528e4f58 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -5,7 +5,7 @@ * Exports appldata_register_ops() and appldata_unregister_ops() for the * data gathering modules. * - * Copyright IBM Corp. 2003, 2008 + * Copyright IBM Corp. 2003, 2009 * * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ @@ -26,6 +26,8 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/workqueue.h> +#include <linux/suspend.h> +#include <linux/platform_device.h> #include <asm/appldata.h> #include <asm/timer.h> #include <asm/uaccess.h> @@ -41,6 +43,9 @@ #define TOD_MICRO 0x01000 /* nr. of TOD clock units for 1 microsecond */ + +static struct platform_device *appldata_pdev; + /* * /proc entries (sysctl) */ @@ -86,6 +91,7 @@ static atomic_t appldata_expire_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(appldata_timer_lock); static int appldata_interval = APPLDATA_CPU_INTERVAL; static int appldata_timer_active; +static int appldata_timer_suspended = 0; /* * Work queue @@ -475,6 +481,93 @@ void appldata_unregister_ops(struct appldata_ops *ops) /********************** module-ops management <END> **************************/ +/**************************** suspend / resume *******************************/ +static int appldata_freeze(struct device *dev) +{ + struct appldata_ops *ops; + int rc; + struct list_head *lh; + + get_online_cpus(); + spin_lock(&appldata_timer_lock); + if (appldata_timer_active) { + __appldata_vtimer_setup(APPLDATA_DEL_TIMER); + appldata_timer_suspended = 1; + } + spin_unlock(&appldata_timer_lock); + put_online_cpus(); + + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) + pr_err("Stopping the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + } + } + mutex_unlock(&appldata_ops_mutex); + return 0; +} + +static int appldata_restore(struct device *dev) +{ + struct appldata_ops *ops; + int rc; + struct list_head *lh; + + get_online_cpus(); + spin_lock(&appldata_timer_lock); + if (appldata_timer_suspended) { + __appldata_vtimer_setup(APPLDATA_ADD_TIMER); + appldata_timer_suspended = 0; + } + spin_unlock(&appldata_timer_lock); + put_online_cpus(); + + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + ops->callback(ops->data); // init record + rc = appldata_diag(ops->record_nr, + APPLDATA_START_INTERVAL_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) { + pr_err("Starting the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + } + } + } + mutex_unlock(&appldata_ops_mutex); + return 0; +} + +static int appldata_thaw(struct device *dev) +{ + return appldata_restore(dev); +} + +static struct dev_pm_ops appldata_pm_ops = { + .freeze = appldata_freeze, + .thaw = appldata_thaw, + .restore = appldata_restore, +}; + +static struct platform_driver appldata_pdrv = { + .driver = { + .name = "appldata", + .owner = THIS_MODULE, + .pm = &appldata_pm_ops, + }, +}; +/************************* suspend / resume <END> ****************************/ + + /******************************* init / exit *********************************/ static void __cpuinit appldata_online_cpu(int cpu) @@ -531,11 +624,23 @@ static struct notifier_block __cpuinitdata appldata_nb = { */ static int __init appldata_init(void) { - int i; + int i, rc; + + rc = platform_driver_register(&appldata_pdrv); + if (rc) + return rc; + appldata_pdev = platform_device_register_simple("appldata", -1, NULL, + 0); + if (IS_ERR(appldata_pdev)) { + rc = PTR_ERR(appldata_pdev); + goto out_driver; + } appldata_wq = create_singlethread_workqueue("appldata"); - if (!appldata_wq) - return -ENOMEM; + if (!appldata_wq) { + rc = -ENOMEM; + goto out_device; + } get_online_cpus(); for_each_online_cpu(i) @@ -547,6 +652,12 @@ static int __init appldata_init(void) appldata_sysctl_header = register_sysctl_table(appldata_dir_table); return 0; + +out_device: + platform_device_unregister(appldata_pdev); +out_driver: + platform_driver_unregister(&appldata_pdrv); + return rc; } __initcall(appldata_init); diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index de432f2de2d..fca9dffcc66 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -275,6 +275,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() -#include <asm-generic/atomic.h> +#include <asm-generic/atomic-long.h> #endif /* __KERNEL__ */ #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/bitsperlong.h b/arch/s390/include/asm/bitsperlong.h new file mode 100644 index 00000000000..6b235aea9c6 --- /dev/null +++ b/arch/s390/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_S390_BITSPERLONG_H +#define __ASM_S390_BITSPERLONG_H + +#ifndef __s390x__ +#define __BITS_PER_LONG 32 +#else +#define __BITS_PER_LONG 64 +#endif + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_S390_BITSPERLONG_H */ + diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index ba007d8df94..2a541955117 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -1,11 +1,9 @@ /* - * include/asm-s390/ccwdev.h - * include/asm-s390x/ccwdev.h + * Copyright IBM Corp. 2002, 2009 * - * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Arnd Bergmann <arndb@de.ibm.com> + * Author(s): Arnd Bergmann <arndb@de.ibm.com> * - * Interface for CCW device drivers + * Interface for CCW device drivers */ #ifndef _S390_CCWDEV_H_ #define _S390_CCWDEV_H_ @@ -104,6 +102,11 @@ struct ccw_device { * @set_offline: called when setting device offline * @notify: notify driver of device state changes * @shutdown: called at device shutdown + * @prepare: prepare for pm state transition + * @complete: undo work done in @prepare + * @freeze: callback for freezing during hibernation snapshotting + * @thaw: undo work done in @freeze + * @restore: callback for restoring after hibernation * @driver: embedded device driver structure * @name: device driver name */ @@ -116,6 +119,11 @@ struct ccw_driver { int (*set_offline) (struct ccw_device *); int (*notify) (struct ccw_device *, int); void (*shutdown) (struct ccw_device *); + int (*prepare) (struct ccw_device *); + void (*complete) (struct ccw_device *); + int (*freeze)(struct ccw_device *); + int (*thaw) (struct ccw_device *); + int (*restore)(struct ccw_device *); struct device_driver driver; char *name; }; @@ -184,6 +192,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) extern struct ccw_device *ccw_device_probe_console(void); +extern int ccw_device_force_console(void); // FIXME: these have to go extern int _ccw_device_get_subchannel_number(struct ccw_device *); diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index a27f68985a7..c79c1e787b8 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -38,6 +38,11 @@ struct ccwgroup_device { * @set_online: function called when device is set online * @set_offline: function called when device is set offline * @shutdown: function called when device is shut down + * @prepare: prepare for pm state transition + * @complete: undo work done in @prepare + * @freeze: callback for freezing during hibernation snapshotting + * @thaw: undo work done in @freeze + * @restore: callback for restoring after hibernation * @driver: embedded driver structure */ struct ccwgroup_driver { @@ -51,6 +56,11 @@ struct ccwgroup_driver { int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); void (*shutdown)(struct ccwgroup_device *); + int (*prepare) (struct ccwgroup_device *); + void (*complete) (struct ccwgroup_device *); + int (*freeze)(struct ccwgroup_device *); + int (*thaw) (struct ccwgroup_device *); + int (*restore)(struct ccwgroup_device *); struct device_driver driver; }; diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index de065b32381..01a08020bc0 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -5,6 +5,7 @@ */ #include <linux/types.h> #include <linux/sched.h> +#include <linux/thread_info.h> #define PSW32_MASK_PER 0x40000000UL #define PSW32_MASK_DAT 0x04000000UL @@ -163,12 +164,28 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } +#ifdef CONFIG_COMPAT + +static inline int is_compat_task(void) +{ + return test_thread_flag(TIF_31BIT); +} + +#else + +static inline int is_compat_task(void) +{ + return 0; +} + +#endif + static inline void __user *compat_alloc_user_space(long len) { unsigned long stack; stack = KSTK_ESP(current); - if (test_thread_flag(TIF_31BIT)) + if (is_compat_task()) stack &= 0x7fffffffUL; return (void __user *) (stack - len); } diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h deleted file mode 100644 index d60a2eefb17..00000000000 --- a/arch/s390/include/asm/cpu.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * include/asm-s390/cpu.h - * - * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - */ - -#ifndef _ASM_S390_CPU_H_ -#define _ASM_S390_CPU_H_ - -#include <linux/types.h> -#include <linux/percpu.h> -#include <linux/spinlock.h> - -struct s390_idle_data { - spinlock_t lock; - unsigned long long idle_count; - unsigned long long idle_enter; - unsigned long long idle_time; -}; - -DECLARE_PER_CPU(struct s390_idle_data, s390_idle); - -void vtime_start_cpu(void); - -static inline void s390_idle_check(void) -{ - if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) - vtime_start_cpu(); -} - -#endif /* _ASM_S390_CPU_H_ */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 941384fbd39..ec917d42ee6 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -9,6 +9,9 @@ #ifndef _S390_CPUTIME_H #define _S390_CPUTIME_H +#include <linux/types.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> #include <asm/div64.h> /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ @@ -174,8 +177,24 @@ cputime64_to_clock_t(cputime64_t cputime) return __div(cputime, 4096000000ULL / USER_HZ); } +struct s390_idle_data { + spinlock_t lock; + unsigned long long idle_count; + unsigned long long idle_enter; + unsigned long long idle_time; +}; + +DECLARE_PER_CPU(struct s390_idle_data, s390_idle); + +void vtime_start_cpu(void); cputime64_t s390_get_idle_time(int cpu); #define arch_idle_time(cpu) s390_get_idle_time(cpu) +static inline void s390_idle_check(void) +{ + if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) + vtime_start_cpu(); +} + #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 5a5bc75e19d..96c14a9102b 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -2,7 +2,28 @@ #define _ASM_S390_FTRACE_H #ifndef __ASSEMBLY__ + extern void _mcount(void); +extern unsigned long ftrace_dyn_func; + +struct dyn_arch_ftrace { }; + +#define MCOUNT_ADDR ((long)_mcount) + +#ifdef CONFIG_64BIT +#define MCOUNT_OFFSET_RET 18 +#define MCOUNT_INSN_SIZE 24 +#define MCOUNT_OFFSET 14 +#else +#define MCOUNT_OFFSET_RET 26 +#define MCOUNT_INSN_SIZE 30 +#define MCOUNT_OFFSET 8 #endif +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr - MCOUNT_OFFSET; +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h index fd157464822..94ec3ee0798 100644 --- a/arch/s390/include/asm/kmap_types.h +++ b/arch/s390/include/asm/kmap_types.h @@ -2,22 +2,7 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H -enum km_type { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_TYPE_NR -}; +#include <asm-generic/kmap_types.h> #endif #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 54ea39f96ec..a27d0d5a6f8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,6 +13,8 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H +#include <linux/hrtimer.h> +#include <linux/interrupt.h> #include <linux/kvm_host.h> #include <asm/debug.h> #include <asm/cpuid.h> @@ -210,7 +212,8 @@ struct kvm_vcpu_arch { s390_fp_regs guest_fpregs; unsigned int guest_acrs[NUM_ACRS]; struct kvm_s390_local_interrupt local_int; - struct timer_list ckc_timer; + struct hrtimer ckc_timer; + struct tasklet_struct tasklet; union { cpuid_t cpu_id; u64 stidp_data; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 3aeca492b14..5046ad6b7a6 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -30,6 +30,7 @@ #define __LC_SUBCHANNEL_NR 0x00ba #define __LC_IO_INT_PARM 0x00bc #define __LC_IO_INT_WORD 0x00c0 +#define __LC_STFL_FAC_LIST 0x00c8 #define __LC_MCCK_CODE 0x00e8 #define __LC_DUMP_REIPL 0x0e00 @@ -67,6 +68,7 @@ #define __LC_CPUID 0x02b0 #define __LC_INT_CLOCK 0x02c8 #define __LC_MACHINE_FLAGS 0x02d8 +#define __LC_FTRACE_FUNC 0x02dc #define __LC_IRB 0x0300 #define __LC_PFAULT_INTPARM 0x0080 #define __LC_CPU_TIMER_SAVE_AREA 0x00d8 @@ -112,6 +114,7 @@ #define __LC_INT_CLOCK 0x0340 #define __LC_VDSO_PER_CPU 0x0350 #define __LC_MACHINE_FLAGS 0x0358 +#define __LC_FTRACE_FUNC 0x0360 #define __LC_IRB 0x0380 #define __LC_PASTE 0x03c0 #define __LC_PFAULT_INTPARM 0x11b8 @@ -280,7 +283,8 @@ struct _lowcore __u64 int_clock; /* 0x02c8 */ __u64 clock_comparator; /* 0x02d0 */ __u32 machine_flags; /* 0x02d8 */ - __u8 pad_0x02dc[0x0300-0x02dc]; /* 0x02dc */ + __u32 ftrace_func; /* 0x02dc */ + __u8 pad_0x02f0[0x0300-0x02f0]; /* 0x02f0 */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -385,7 +389,8 @@ struct _lowcore __u64 clock_comparator; /* 0x0348 */ __u64 vdso_per_cpu_data; /* 0x0350 */ __u64 machine_flags; /* 0x0358 */ - __u8 pad_0x0360[0x0380-0x0360]; /* 0x0360 */ + __u64 ftrace_func; /* 0x0360 */ + __u8 pad_0x0368[0x0380-0x0368]; /* 0x0368 */ /* Interrupt response block. */ __u8 irb[64]; /* 0x0380 */ diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index da01432e8f4..f63fe7b431e 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -9,7 +9,7 @@ #ifndef __S390_MMAN_H__ #define __S390_MMAN_H__ -#include <asm-generic/mman.h> +#include <asm-generic/mman-common.h> #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 32e8f6aa438..3e3594d01f8 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -150,7 +150,7 @@ void arch_alloc_page(struct page *page, int order); VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include <asm-generic/memory_model.h> -#include <asm-generic/page.h> +#include <asm-generic/getorder.h> #define __HAVE_ARCH_GATE_AREA 1 diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5caddd4f7be..60a7b1a1702 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -112,12 +112,15 @@ extern char empty_zero_page[PAGE_SIZE]; * effect, this also makes sure that 64 bit module code cannot be used * as system call address. */ + +extern unsigned long VMALLOC_START; + #ifndef __s390x__ -#define VMALLOC_START 0x78000000UL +#define VMALLOC_SIZE (96UL << 20) #define VMALLOC_END 0x7e000000UL #define VMEM_MAP_END 0x80000000UL #else /* __s390x__ */ -#define VMALLOC_START 0x3e000000000UL +#define VMALLOC_SIZE (1UL << 30) #define VMALLOC_END 0x3e040000000UL #define VMEM_MAP_END 0x40000000000UL #endif /* __s390x__ */ diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h new file mode 100644 index 00000000000..781a9cf9b00 --- /dev/null +++ b/arch/s390/include/asm/seccomp.h @@ -0,0 +1,16 @@ +#ifndef _ASM_S390_SECCOMP_H +#define _ASM_S390_SECCOMP_H + +#include <linux/unistd.h> + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_sigreturn + +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_sigreturn + +#endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h index f6cfddb278c..cdf5cb2fe03 100644 --- a/arch/s390/include/asm/signal.h +++ b/arch/s390/include/asm/signal.h @@ -115,7 +115,7 @@ typedef unsigned long sigset_t; #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include <asm-generic/signal.h> +#include <asm-generic/signal-defs.h> #ifdef __KERNEL__ struct old_sigaction { diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f3861b09ebb..c9af0d19c7a 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -122,8 +122,10 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lp) #define __raw_write_can_lock(x) ((x)->lock == 0) extern void _raw_read_lock_wait(raw_rwlock_t *lp); +extern void _raw_read_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags); extern int _raw_read_trylock_retry(raw_rwlock_t *lp); extern void _raw_write_lock_wait(raw_rwlock_t *lp); +extern void _raw_write_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags); extern int _raw_write_trylock_retry(raw_rwlock_t *lp); static inline void __raw_read_lock(raw_rwlock_t *rw) @@ -134,6 +136,14 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) _raw_read_lock_wait(rw); } +static inline void __raw_read_lock_flags(raw_rwlock_t *rw, unsigned long flags) +{ + unsigned int old; + old = rw->lock & 0x7fffffffU; + if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + _raw_read_lock_wait_flags(rw, flags); +} + static inline void __raw_read_unlock(raw_rwlock_t *rw) { unsigned int old, cmp; @@ -151,6 +161,12 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) _raw_write_lock_wait(rw); } +static inline void __raw_write_lock_flags(raw_rwlock_t *rw, unsigned long flags) +{ + if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + _raw_write_lock_wait_flags(rw, flags); +} + static inline void __raw_write_unlock(raw_rwlock_t *rw) { _raw_compare_and_swap(&rw->lock, 0x80000000, 0); @@ -172,9 +188,6 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) return _raw_write_trylock_retry(rw); } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) - #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h index 1f34580e67a..dc75c616eaf 100644 --- a/arch/s390/include/asm/suspend.h +++ b/arch/s390/include/asm/suspend.h @@ -1,5 +1,10 @@ #ifndef __ASM_S390_SUSPEND_H #define __ASM_S390_SUSPEND_H +static inline int arch_prepare_suspend(void) +{ + return 0; +} + #endif diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 2429b87eb28..e0a73d3eb83 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -12,6 +12,7 @@ #ifndef _ASM_SYSCALL_H #define _ASM_SYSCALL_H 1 +#include <linux/sched.h> #include <asm/ptrace.h> static inline long syscall_get_nr(struct task_struct *task, diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 3a8b26eb1f2..4fb83c1cdb7 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -1,11 +1,7 @@ /* - * include/asm-s390/system.h + * Copyright IBM Corp. 1999, 2009 * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * - * Derived from "include/asm-i386/system.h" + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ #ifndef __ASM_SYSTEM_H @@ -469,6 +465,20 @@ extern psw_t sysc_restore_trace_psw; extern psw_t io_restore_trace_psw; #endif +static inline int tprot(unsigned long addr) +{ + int rc = -EFAULT; + + asm volatile( + " tprot 0(%1),0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "a" (addr) : "cc"); + return rc; +} + #endif /* __KERNEL__ */ #endif diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h index 67f66278f53..bc3a35cefc9 100644 --- a/arch/s390/include/asm/termios.h +++ b/arch/s390/include/asm/termios.h @@ -60,7 +60,7 @@ struct termio { #define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) -#include <asm-generic/termios.h> +#include <asm-generic/termios-base.h> #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 461f2abd2e6..925bcc64903 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -83,14 +83,16 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTART_SVC 4 /* restart svc with new svc number */ -#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SINGLE_STEP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_SYSCALL_FTRACE 11 /* ftrace syscall instrumentation */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -99,15 +101,17 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ #define TIF_FREEZE 21 /* thread is freezing for suspend */ -#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_RESTART_SVC (1<<TIF_RESTART_SVC) -#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) #define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) +#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1<<TIF_SECCOMP) +#define _TIF_SYSCALL_FTRACE (1<<TIF_SYSCALL_FTRACE) #define _TIF_USEDFPU (1<<TIF_USEDFPU) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index 3dc3fc22881..04d6b95a89c 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -28,12 +28,6 @@ typedef __signed__ long saddr_t; */ #ifdef __KERNEL__ -#ifndef __s390x__ -#define BITS_PER_LONG 32 -#else -#define BITS_PER_LONG 64 -#endif - #ifndef __ASSEMBLY__ typedef u64 dma64_addr_t; diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 0235970278f..8377e91533d 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -131,7 +131,7 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) #define put_user(x, ptr) \ ({ \ - might_sleep(); \ + might_fault(); \ __put_user(x, ptr); \ }) @@ -180,7 +180,7 @@ extern int __put_user_bad(void) __attribute__((noreturn)); #define get_user(x, ptr) \ ({ \ - might_sleep(); \ + might_fault(); \ __get_user(x, ptr); \ }) @@ -231,7 +231,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); + might_fault(); if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; @@ -282,7 +282,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); else @@ -299,7 +299,7 @@ __copy_in_user(void __user *to, const void __user *from, unsigned long n) static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__access_ok(from,n) && __access_ok(to,n)) n = __copy_in_user(to, from, n); return n; @@ -312,7 +312,7 @@ static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; - might_sleep(); + might_fault(); if (access_ok(VERIFY_READ, src, 1)) res = uaccess.strncpy_from_user(count, src, dst); return res; @@ -321,7 +321,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) static inline unsigned long strnlen_user(const char __user * src, unsigned long n) { - might_sleep(); + might_fault(); return uaccess.strnlen_user(n, src); } @@ -354,7 +354,7 @@ __clear_user(void __user *to, unsigned long n) static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { - might_sleep(); + might_fault(); if (access_ok(VERIFY_WRITE, to, n)) n = uaccess.clear_user(n, to); return n; diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index f0f19e6ace6..c80602d7c88 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -267,7 +267,9 @@ #define __NR_epoll_create1 327 #define __NR_preadv 328 #define __NR_pwritev 329 -#define NR_syscalls 330 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_counter_open 331 +#define NR_syscalls 332 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 228e3105ded..c75ed43b1a1 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -3,8 +3,9 @@ # ifdef CONFIG_FUNCTION_TRACER -# Do not trace early boot code +# Don't trace early setup code and tracing code CFLAGS_REMOVE_early.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg endif # @@ -22,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ - vdso.o vtime.o sysinfo.o nmi.o + vdso.o vtime.o sysinfo.o nmi.o sclp.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) @@ -41,6 +42,8 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index fb38af6316b..88a83366819 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1823,3 +1823,20 @@ compat_sys_pwritev_wrapper: llgfr %r5,%r5 # u32 llgfr %r6,%r6 # u32 jg compat_sys_pwritev # branch to system call + + .globl compat_sys_rt_tgsigqueueinfo_wrapper +compat_sys_rt_tgsigqueueinfo_wrapper: + lgfr %r2,%r2 # compat_pid_t + lgfr %r3,%r3 # compat_pid_t + lgfr %r4,%r4 # int + llgtr %r5,%r5 # struct compat_siginfo * + jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call + + .globl sys_perf_counter_open_wrapper +sys_perf_counter_open_wrapper: + llgtr %r2,%r2 # const struct perf_counter_attr * + lgfr %r3,%r3 # pid_t + lgfr %r4,%r4 # int + lgfr %r5,%r5 # int + llgfr %r6,%r6 # unsigned long + jg sys_perf_counter_open # branch to system call diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cf09948faad..f9b144049dc 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -1,7 +1,7 @@ /* * arch/s390/kernel/early.c * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang <hongjie@us.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com> */ @@ -11,6 +11,7 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/ctype.h> +#include <linux/ftrace.h> #include <linux/lockdep.h> #include <linux/module.h> #include <linux/pfn.h> @@ -209,7 +210,7 @@ static noinline __init void detect_machine_type(void) machine_flags |= MACHINE_FLAG_VM; } -static __init void early_pgm_check_handler(void) +static void early_pgm_check_handler(void) { unsigned long addr; const struct exception_table_entry *fixup; @@ -221,7 +222,7 @@ static __init void early_pgm_check_handler(void) S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; } -static noinline __init void setup_lowcore_early(void) +void setup_lowcore_early(void) { psw_t psw; @@ -410,5 +411,8 @@ void __init startup_init(void) sclp_facilities_detect(); detect_memory_layout(memory_chunk); S390_lowcore.machine_flags = machine_flags; +#ifdef CONFIG_DYNAMIC_FTRACE + S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; +#endif lockdep_on(); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f3e27593421..c4c80a22bc1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -53,6 +53,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) +_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ + _TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -265,7 +267,7 @@ sysc_do_restart: sth %r7,SP_SVCNR(%r15) sll %r7,2 # svc number *4 l %r8,BASED(.Lsysc_table) - tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) + tm __TI_flags+2(%r9),_TIF_SYSCALL l %r8,0(%r7,%r8) # get system call addr. bnz BASED(sysc_tracesys) basr %r14,%r8 # call sys_xxxx @@ -405,7 +407,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) + tm __TI_flags+2(%r9),_TIF_SYSCALL bz BASED(sysc_return) l %r1,BASED(.Ltrace_exit) la %r2,SP_PTREGS(%r15) # load pt_regs @@ -1107,6 +1109,7 @@ cleanup_io_leave_insn: .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esa + .globl sys_call_table sys_call_table: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 84a105838e0..f6618e9e15e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -56,6 +56,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) +_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ + _TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8) #define BASED(name) name-system_call(%r13) @@ -260,7 +262,7 @@ sysc_do_restart: larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) + tm __TI_flags+6(%r9),_TIF_SYSCALL lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys basr %r14,%r8 # call sys_xxxx @@ -391,7 +393,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) + tm __TI_flags+6(%r9),_TIF_SYSCALL jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return @@ -1058,6 +1060,7 @@ cleanup_io_leave_insn: .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esame + .globl sys_call_table sys_call_table: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c new file mode 100644 index 00000000000..82ddfd3a75a --- /dev/null +++ b/arch/s390/kernel/ftrace.c @@ -0,0 +1,260 @@ +/* + * Dynamic function tracer architecture backend. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/ftrace.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <trace/syscall.h> +#include <asm/lowcore.h> + +#ifdef CONFIG_DYNAMIC_FTRACE + +void ftrace_disable_code(void); +void ftrace_disable_return(void); +void ftrace_call_code(void); +void ftrace_nop_code(void); + +#define FTRACE_INSN_SIZE 4 + +#ifdef CONFIG_64BIT + +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " j 0f\n" + " .word 0x0024\n" + " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n" + " basr %r14,%r1\n" + "ftrace_disable_return:\n" + " lg %r14,8(15)\n" + " lgr %r0,%r0\n" + "0:\n"); + +asm( + " .align 4\n" + "ftrace_nop_code:\n" + " j .+"__stringify(MCOUNT_INSN_SIZE)"\n"); + +asm( + " .align 4\n" + "ftrace_call_code:\n" + " stg %r14,8(%r15)\n"); + +#else /* CONFIG_64BIT */ + +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " j 0f\n" + " l %r1,"__stringify(__LC_FTRACE_FUNC)"\n" + " basr %r14,%r1\n" + "ftrace_disable_return:\n" + " l %r14,4(%r15)\n" + " j 0f\n" + " bcr 0,%r7\n" + " bcr 0,%r7\n" + " bcr 0,%r7\n" + " bcr 0,%r7\n" + " bcr 0,%r7\n" + " bcr 0,%r7\n" + "0:\n"); + +asm( + " .align 4\n" + "ftrace_nop_code:\n" + " j .+"__stringify(MCOUNT_INSN_SIZE)"\n"); + +asm( + " .align 4\n" + "ftrace_call_code:\n" + " st %r14,4(%r15)\n"); + +#endif /* CONFIG_64BIT */ + +static int ftrace_modify_code(unsigned long ip, + void *old_code, int old_size, + void *new_code, int new_size) +{ + unsigned char replaced[MCOUNT_INSN_SIZE]; + + /* + * Note: Due to modules code can disappear and change. + * We need to protect against faulting as well as code + * changing. We do this by using the probe_kernel_* + * functions. + * This however is just a simple sanity check. + */ + if (probe_kernel_read(replaced, (void *)ip, old_size)) + return -EFAULT; + if (memcmp(replaced, old_code, old_size) != 0) + return -EINVAL; + if (probe_kernel_write((void *)ip, new_code, new_size)) + return -EPERM; + return 0; +} + +static int ftrace_make_initial_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + return ftrace_modify_code(rec->ip, + ftrace_call_code, FTRACE_INSN_SIZE, + ftrace_disable_code, MCOUNT_INSN_SIZE); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + if (addr == MCOUNT_ADDR) + return ftrace_make_initial_nop(mod, rec, addr); + return ftrace_modify_code(rec->ip, + ftrace_call_code, FTRACE_INSN_SIZE, + ftrace_nop_code, FTRACE_INSN_SIZE); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + return ftrace_modify_code(rec->ip, + ftrace_nop_code, FTRACE_INSN_SIZE, + ftrace_call_code, FTRACE_INSN_SIZE); +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + ftrace_dyn_func = (unsigned long)func; + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *)data = 0; + return 0; +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Patch the kernel code at ftrace_graph_caller location: + * The instruction there is branch relative on condition. The condition mask + * is either all ones (always branch aka disable ftrace_graph_caller) or all + * zeroes (nop aka enable ftrace_graph_caller). + * Instruction format for brc is a7m4xxxx where m is the condition mask. + */ +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned short opcode = 0xa704; + + return probe_kernel_write(ftrace_graph_caller, &opcode, sizeof(opcode)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned short opcode = 0xa7f4; + + return probe_kernel_write(ftrace_graph_caller, &opcode, sizeof(opcode)); +} + +static inline unsigned long ftrace_mcount_call_adjust(unsigned long addr) +{ + return addr - (ftrace_disable_return - ftrace_disable_code); +} + +#else /* CONFIG_DYNAMIC_FTRACE */ + +static inline unsigned long ftrace_mcount_call_adjust(unsigned long addr) +{ + return addr - MCOUNT_OFFSET_RET; +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +/* + * Hook the return address and push it in the stack of return addresses + * in current thread info. + */ +unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent) +{ + struct ftrace_graph_ent trace; + + /* Nmi's are currently unsupported. */ + if (unlikely(in_nmi())) + goto out; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY) + goto out; + trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN; + /* Only trace if the calling function expects to. */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + goto out; + } + parent = (unsigned long)return_to_handler; +out: + return parent; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_FTRACE_SYSCALLS + +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; +extern unsigned int sys_call_table[]; + +static struct syscall_metadata **syscalls_metadata; + +struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +void arch_init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + int i; + static atomic_t refs; + + if (atomic_inc_return(&refs) != 1) + goto out; + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, + GFP_KERNEL); + if (!syscalls_metadata) + goto out; + for (i = 0; i < NR_syscalls; i++) { + meta = find_syscall_meta((unsigned long)sys_call_table[i]); + syscalls_metadata[i] = meta; + } + return; +out: + atomic_dec(&refs); +} +#endif diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 22596d70fc2..ec688234852 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head.S - * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999,2009 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -64,7 +62,7 @@ __HEAD .org 0x100 # # subroutine for loading from tape -# Paramters: +# Parameters: # R1 = device number # R2 = load address .Lloader: @@ -479,27 +477,58 @@ startup:basr %r13,0 # get base mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) mvc __LC_EXIT_TIMER(8),5f-.LPG0(%r13) #ifndef CONFIG_MARCH_G5 - # check processor version against MARCH_{G5,Z900,Z990,Z9_109,Z10} - stidp __LC_CPUID # store cpuid - lhi %r0,(3f-2f) / 2 - la %r1,2f-.LPG0(%r13) -0: clc __LC_CPUID+4(2),0(%r1) - jne 3f - lpsw 1f-.LPG0(13) # machine type not good enough, crash + # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} + xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST + stfl __LC_STFL_FAC_LIST # store facility list + tm __LC_STFL_FAC_LIST,0x01 # stfle available ? + jz 0f + la %r0,0 + .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended +0: l %r0,__LC_STFL_FAC_LIST + n %r0,2f+8-.LPG0(%r13) + cl %r0,2f+8-.LPG0(%r13) + jne 1f + l %r0,__LC_STFL_FAC_LIST+4 + n %r0,2f+12-.LPG0(%r13) + cl %r0,2f+12-.LPG0(%r13) + je 3f +1: l %r15,.Lstack-.LPG0(%r13) + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE + ahi %r15,-96 + la %r2,.Lals_string-.LPG0(%r13) + l %r3,.Lsclp_print-.LPG0(%r13) + basr %r14,%r3 + lpsw 2f-.LPG0(%r13) # machine type not good enough, crash +.Lals_string: + .asciz "The Linux kernel requires more recent processor hardware" +.Lsclp_print: + .long _sclp_print_early +.Lstack: + .long init_thread_union .align 16 -1: .long 0x000a0000,0x00000000 -2: +2: .long 0x000a0000,0x8badcccc +#if defined(CONFIG_64BIT) #if defined(CONFIG_MARCH_Z10) - .short 0x9672, 0x2064, 0x2066, 0x2084, 0x2086, 0x2094, 0x2096 + .long 0xc100efe3, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) - .short 0x9672, 0x2064, 0x2066, 0x2084, 0x2086 + .long 0xc100efc3, 0x00000000 #elif defined(CONFIG_MARCH_Z990) - .short 0x9672, 0x2064, 0x2066 + .long 0xc0002000, 0x00000000 #elif defined(CONFIG_MARCH_Z900) - .short 0x9672 + .long 0xc0000000, 0x00000000 +#endif +#else +#if defined(CONFIG_MARCH_Z10) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z9_109) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z990) + .long 0x80002000, 0x00000000 +#elif defined(CONFIG_MARCH_Z900) + .long 0x80000000, 0x00000000 +#endif #endif -3: la %r1,2(%r1) - brct %r0,0b +3: #endif l %r13,4f-.LPG0(%r13) diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index 7db95c0b869..fe787f9e5f3 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -18,10 +18,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - /* * Initial thread structure. * diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index a01cf0284db..9bb2f6241d9 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -25,9 +25,9 @@ #include <linux/preempt.h> #include <linux/stop_machine.h> #include <linux/kdebug.h> +#include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/sections.h> -#include <asm/uaccess.h> #include <linux/module.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -155,35 +155,8 @@ void __kprobes get_instruction_type(struct arch_specific_insn *ainsn) static int __kprobes swap_instruction(void *aref) { struct ins_replace_args *args = aref; - u32 *addr; - u32 instr; - int err = -EFAULT; - /* - * Text segment is read-only, hence we use stura to bypass dynamic - * address translation to exchange the instruction. Since stura - * always operates on four bytes, but we only want to exchange two - * bytes do some calculations to get things right. In addition we - * shall not cross any page boundaries (vmalloc area!) when writing - * the new instruction. - */ - addr = (u32 *)((unsigned long)args->ptr & -4UL); - if ((unsigned long)args->ptr & 2) - instr = ((*addr) & 0xffff0000) | args->new; - else - instr = ((*addr) & 0x0000ffff) | args->new << 16; - - asm volatile( - " lra %1,0(%1)\n" - "0: stura %2,%1\n" - "1: la %0,0\n" - "2:\n" - EX_TABLE(0b,2b) - : "+d" (err) - : "a" (addr), "d" (instr) - : "memory", "cc"); - - return err; + return probe_kernel_write(args->ptr, &args->new, sizeof(args->new)); } void __kprobes arch_arm_kprobe(struct kprobe *p) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 80641224a09..2a0a5e97ba8 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2009 * * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, * @@ -7,36 +7,64 @@ #include <asm/asm-offsets.h> -#ifndef CONFIG_64BIT -.globl _mcount + .globl ftrace_stub +ftrace_stub: + br %r14 + +#ifdef CONFIG_64BIT + +#ifdef CONFIG_DYNAMIC_FTRACE + + .globl _mcount _mcount: - stm %r0,%r5,8(%r15) - st %r14,56(%r15) - lr %r1,%r15 - ahi %r15,-96 - l %r3,100(%r15) - la %r2,0(%r14) - st %r1,__SF_BACKCHAIN(%r15) - la %r3,0(%r3) - bras %r14,0f - .long ftrace_trace_function -0: l %r14,0(%r14) - l %r14,0(%r14) - basr %r14,%r14 - ahi %r15,96 - lm %r0,%r5,8(%r15) - l %r14,56(%r15) br %r14 -.globl ftrace_stub -ftrace_stub: + .globl ftrace_caller +ftrace_caller: + larl %r1,function_trace_stop + icm %r1,0xf,0(%r1) + bnzr %r14 + stmg %r2,%r5,32(%r15) + stg %r14,112(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + lgr %r2,%r14 + lg %r3,168(%r15) + larl %r14,ftrace_dyn_func + lg %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_caller +ftrace_graph_caller: + # This unconditional branch gets runtime patched. Change only if + # you know what you are doing. See ftrace_enable_graph_caller(). + j 0f + lg %r2,272(%r15) + lg %r3,168(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,168(%r15) +0: +#endif + aghi %r15,160 + lmg %r2,%r5,32(%r15) + lg %r14,112(%r15) br %r14 -#else /* CONFIG_64BIT */ + .data + .globl ftrace_dyn_func +ftrace_dyn_func: + .quad ftrace_stub + .previous + +#else /* CONFIG_DYNAMIC_FTRACE */ -.globl _mcount + .globl _mcount _mcount: - stmg %r0,%r5,16(%r15) + larl %r1,function_trace_stop + icm %r1,0xf,0(%r1) + bnzr %r14 + stmg %r2,%r5,32(%r15) stg %r14,112(%r15) lgr %r1,%r15 aghi %r15,-160 @@ -46,13 +74,143 @@ _mcount: larl %r14,ftrace_trace_function lg %r14,0(%r14) basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + lg %r2,272(%r15) + lg %r3,168(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,168(%r15) +#endif aghi %r15,160 - lmg %r0,%r5,16(%r15) + lmg %r2,%r5,32(%r15) lg %r14,112(%r15) br %r14 -.globl ftrace_stub -ftrace_stub: +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + + .globl return_to_handler +return_to_handler: + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,160 + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) + br %r14 + +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#else /* CONFIG_64BIT */ + +#ifdef CONFIG_DYNAMIC_FTRACE + + .globl _mcount +_mcount: + br %r14 + + .globl ftrace_caller +ftrace_caller: + stm %r2,%r5,16(%r15) + bras %r1,2f +0: .long ftrace_trace_function +1: .long function_trace_stop +2: l %r2,1b-0b(%r1) + icm %r2,0xf,0(%r2) + jnz 3f + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + l %r3,100(%r15) + la %r2,0(%r14) + st %r0,__SF_BACKCHAIN(%r15) + la %r3,0(%r3) + l %r14,0b-0b(%r1) + l %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_caller +ftrace_graph_caller: + # This unconditional branch gets runtime patched. Change only if + # you know what you are doing. See ftrace_enable_graph_caller(). + j 1f + bras %r1,0f + .long prepare_ftrace_return +0: l %r2,152(%r15) + l %r4,0(%r1) + l %r3,100(%r15) + basr %r14,%r4 + st %r2,100(%r15) +1: +#endif + ahi %r15,96 + l %r14,56(%r15) +3: lm %r2,%r5,16(%r15) br %r14 + .data + .globl ftrace_dyn_func +ftrace_dyn_func: + .long ftrace_stub + .previous + +#else /* CONFIG_DYNAMIC_FTRACE */ + + .globl _mcount +_mcount: + stm %r2,%r5,16(%r15) + bras %r1,2f +0: .long ftrace_trace_function +1: .long function_trace_stop +2: l %r2,1b-0b(%r1) + icm %r2,0xf,0(%r2) + jnz 3f + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + l %r3,100(%r15) + la %r2,0(%r14) + st %r0,__SF_BACKCHAIN(%r15) + la %r3,0(%r3) + l %r14,0b-0b(%r1) + l %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + bras %r1,0f + .long prepare_ftrace_return +0: l %r2,152(%r15) + l %r4,0(%r1) + l %r3,100(%r15) + basr %r14,%r4 + st %r2,100(%r15) +#endif + ahi %r15,96 + l %r14,56(%r15) +3: lm %r2,%r5,16(%r15) + br %r14 + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + + .globl return_to_handler +return_to_handler: + stm %r2,%r5,16(%r15) + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + st %r0,__SF_BACKCHAIN(%r15) + bras %r1,0f + .long ftrace_return_to_handler +0: l %r2,0b-0b(%r1) + basr %r14,%r2 + lr %r14,%r2 + ahi %r15,96 + lm %r2,%r5,16(%r15) + br %r14 + +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + #endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 9872999c66d..559af0d0787 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -1,6 +1,7 @@ /* - * Copyright IBM Corp. 2008 - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ #include <linux/kernel.h> @@ -9,20 +10,6 @@ #include <asm/sclp.h> #include <asm/setup.h> -static inline int tprot(unsigned long addr) -{ - int rc = -EFAULT; - - asm volatile( - " tprot 0(%1),0\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "a" (addr) : "cc"); - return rc; -} - #define ADDR2G (1ULL << 31) static void find_memory_chunks(struct mem_chunk chunk[]) diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index eed4a00cb67..ab2e3ed28ab 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -56,8 +56,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } static void diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 28cf196ba77..015e27da40e 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -16,7 +16,7 @@ #include <asm/lowcore.h> #include <asm/smp.h> #include <asm/etr.h> -#include <asm/cpu.h> +#include <asm/cputime.h> #include <asm/nmi.h> #include <asm/crw.h> diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a3acd8e60af..355f7a30c3f 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/elfcore.h> #include <linux/kernel_stat.h> #include <linux/syscalls.h> +#include <asm/compat.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -204,7 +205,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, save_fp_regs(&p->thread.fp_regs); /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { - if (test_thread_flag(TIF_31BIT)) { + if (is_compat_task()) { p->thread.acrs[0] = (unsigned int) regs->gprs[6]; } else { p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 75c496f4f16..490b39934d6 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -36,7 +36,9 @@ #include <linux/elf.h> #include <linux/regset.h> #include <linux/tracehook.h> - +#include <linux/seccomp.h> +#include <trace/syscall.h> +#include <asm/compat.h> #include <asm/segment.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -69,7 +71,7 @@ FixPerRegisters(struct task_struct *task) if (per_info->single_step) { per_info->control_regs.bits.starting_addr = 0; #ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) + if (is_compat_task()) per_info->control_regs.bits.ending_addr = 0x7fffffffUL; else #endif @@ -482,8 +484,7 @@ static int peek_user_compat(struct task_struct *child, { __u32 tmp; - if (!test_thread_flag(TIF_31BIT) || - (addr & 3) || addr > sizeof(struct user) - 3) + if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) return -EIO; tmp = __peek_user_compat(child, addr); @@ -584,8 +585,7 @@ static int __poke_user_compat(struct task_struct *child, static int poke_user_compat(struct task_struct *child, addr_t addr, addr_t data) { - if (!test_thread_flag(TIF_31BIT) || - (addr & 3) || addr > sizeof(struct user32) - 3) + if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user32) - 3) return -EIO; return __poke_user_compat(child, addr, data); @@ -642,6 +642,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { long ret; + /* Do the secure computing check first. */ + secure_computing(regs->gprs[2]); + /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. @@ -659,8 +662,11 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) ret = -1; } + if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) + ftrace_syscall_enter(regs); + if (unlikely(current->audit_context)) - audit_syscall_entry(test_thread_flag(TIF_31BIT) ? + audit_syscall_entry(is_compat_task() ? AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, regs->gprs[2], regs->orig_gpr2, regs->gprs[3], regs->gprs[4], @@ -674,6 +680,9 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); + if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) + ftrace_syscall_exit(regs); + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); } diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index a0d2d55d7fb..0de305b598c 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -10,10 +10,11 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/ftrace.h> #include <linux/errno.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> -#include <asm/cpu.h> +#include <asm/cputime.h> #include <asm/lowcore.h> #include <asm/s390_ext.h> #include <asm/irq_regs.h> @@ -112,7 +113,7 @@ int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, return 0; } -void do_extint(struct pt_regs *regs, unsigned short code) +void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) { ext_int_info_t *p; int index; diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S new file mode 100644 index 00000000000..20639dfe0c4 --- /dev/null +++ b/arch/s390/kernel/sclp.S @@ -0,0 +1,327 @@ +/* + * Mini SCLP driver. + * + * Copyright IBM Corp. 2004,2009 + * + * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +LC_EXT_NEW_PSW = 0x58 # addr of ext int handler +LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter +LC_EXT_INT_CODE = 0x86 # addr of ext int code + +# +# Subroutine which waits synchronously until either an external interruption +# or a timeout occurs. +# +# Parameters: +# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds +# +# Returns: +# R2 = 0 on interrupt, 2 on timeout +# R3 = external interruption parameter if R2=0 +# + +.section ".init.text","ax" + +_sclp_wait_int: + stm %r6,%r15,24(%r15) # save registers + basr %r13,0 # get base register +.LbaseS1: + ahi %r15,-96 # create stack frame + la %r8,LC_EXT_NEW_PSW # register int handler + mvc .LoldpswS1-.LbaseS1(8,%r13),0(%r8) + mvc 0(8,%r8),.LextpswS1-.LbaseS1(%r13) + lhi %r6,0x0200 # cr mask for ext int (cr0.54) + ltr %r2,%r2 + jz .LsetctS1 + ahi %r6,0x0800 # cr mask for clock int (cr0.52) + stck .LtimeS1-.LbaseS1(%r13) # initiate timeout + al %r2,.LtimeS1-.LbaseS1(%r13) + st %r2,.LtimeS1-.LbaseS1(%r13) + sckc .LtimeS1-.LbaseS1(%r13) + +.LsetctS1: + stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts + l %r0,.LctlS1-.LbaseS1(%r13) + lhi %r1,~(0x200 | 0x800) # clear old values + nr %r1,%r0 + or %r1,%r6 # set new value + st %r1,.LctlS1-.LbaseS1(%r13) + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) + st %r0,.LctlS1-.LbaseS1(%r13) + lhi %r2,2 # return code for timeout +.LloopS1: + lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt +.LwaitS1: + lh %r7,LC_EXT_INT_CODE + chi %r7,0x1004 # timeout? + je .LtimeoutS1 + chi %r7,0x2401 # service int? + jne .LloopS1 + sr %r2,%r2 + l %r3,LC_EXT_INT_PARAM +.LtimeoutS1: + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting + # restore old handler + mvc 0(8,%r8),.LoldpswS1-.LbaseS1(%r13) + lm %r6,%r15,120(%r15) # restore registers + br %r14 # return to caller + + .align 8 +.LoldpswS1: + .long 0, 0 # old ext int PSW +.LextpswS1: + .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int +.LwaitpswS1: + .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int +.LtimeS1: + .quad 0 # current time +.LctlS1: + .long 0 # CT0 contents + +# +# Subroutine to synchronously issue a service call. +# +# Parameters: +# R2 = command word +# R3 = sccb address +# +# Returns: +# R2 = 0 on success, 1 on failure +# R3 = sccb response code if R2 = 0 +# + +_sclp_servc: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + lr %r6,%r2 # save command word + lr %r7,%r3 # save sccb address +.LretryS2: + lhi %r2,1 # error return code + .insn rre,0xb2200000,%r6,%r7 # servc + brc 1,.LendS2 # exit if not operational + brc 8,.LnotbusyS2 # go on if not busy + sr %r2,%r2 # wait until no longer busy + bras %r14,_sclp_wait_int + j .LretryS2 # retry +.LnotbusyS2: + sr %r2,%r2 # wait until result + bras %r14,_sclp_wait_int + sr %r2,%r2 + lh %r3,6(%r7) +.LendS2: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Subroutine to set up the SCLP interface. +# +# Parameters: +# R2 = 0 to activate, non-zero to deactivate +# +# Returns: +# R2 = 0 on success, non-zero on failure +# + +_sclp_setup: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS3: + l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb + mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13) + ltr %r2,%r2 # initialization? + jz .LdoinitS3 # go ahead + # clear masks + xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6) +.LdoinitS3: + l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word + lr %r3,%r6 # get sccb address + bras %r14,_sclp_servc # issue service call + ltr %r2,%r2 # servc successful? + jnz .LerrorS3 + chi %r3,0x20 # write mask successful? + jne .LerrorS3 + # check masks + la %r2,.LinitmaskS3-.LinitsccbS3(%r6) + l %r1,0(%r2) # receive mask ok? + n %r1,12(%r2) + cl %r1,0(%r2) + jne .LerrorS3 + l %r1,4(%r2) # send mask ok? + n %r1,8(%r2) + cl %r1,4(%r2) + sr %r2,%r2 + je .LendS3 +.LerrorS3: + lhi %r2,1 # error return code +.LendS3: + lm %r6,%r15,120(%r15) # restore registers + br %r14 +.LwritemaskS3: + .long 0x00780005 # SCLP command for write mask +.LinitsccbS3: + .word .LinitendS3-.LinitsccbS3 + .byte 0,0,0,0 + .word 0 + .word 0 + .word 4 +.LinitmaskS3: + .long 0x80000000 + .long 0x40000000 + .long 0 + .long 0 +.LinitendS3: + +# +# Subroutine which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +_sclp_print: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS4: + l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb + mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13) + la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr + sr %r0,%r0 + l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table +.LinitmtoS4: + # initialize mto + mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13) + lhi %r6,.LmtoendS4-.LmtoS4 # current mto length +.LloopS4: + ic %r0,0(%r2) # get character + ahi %r2,1 + ltr %r0,%r0 # end of string? + jz .LfinalizemtoS4 + chi %r0,0x15 # end of line (NL)? + jz .LfinalizemtoS4 + stc %r0,0(%r6,%r7) # copy to mto + la %r11,0(%r6,%r7) + tr 0(1,%r11),0(%r10) # translate to EBCDIC + ahi %r6,1 + j .LloopS4 +.LfinalizemtoS4: + sth %r6,0(%r7) # update mto length + lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length + ar %r9,%r6 + sth %r9,.LmdbS4-.LwritesccbS4(%r8) + lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length + ar %r9,%r6 + sth %r9,.LevbufS4-.LwritesccbS4(%r8) + lh %r9,0(%r8) # update sccb length + ar %r9,%r6 + sth %r9,0(%r8) + ar %r7,%r6 # update current mto adress + ltr %r0,%r0 # more characters? + jnz .LinitmtoS4 + l %r2,.LwritedataS4-.LbaseS4(%r13)# write data + lr %r3,%r8 + bras %r14,_sclp_servc + ltr %r2,%r2 # servc successful? + jnz .LendS4 + chi %r3,0x20 # write data successful? + je .LendS4 + lhi %r2,1 # error return code +.LendS4: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Function which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + + .globl _sclp_print_early +_sclp_print_early: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + lr %r10,%r2 # save string pointer + lhi %r2,0 + bras %r14,_sclp_setup # enable console + ltr %r2,%r2 + jnz .LendS5 + lr %r2,%r10 + bras %r14,_sclp_print # print string + ltr %r2,%r2 + jnz .LendS5 + lhi %r2,1 + bras %r14,_sclp_setup # disable console +.LendS5: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +.LwritedataS4: + .long 0x00760005 # SCLP command for write data +.LwritesccbS4: + # sccb + .word .LmtoS4-.LwritesccbS4 + .byte 0 + .byte 0,0,0 + .word 0 + + # evbuf +.LevbufS4: + .word .LmtoS4-.LevbufS4 + .byte 0x02 + .byte 0 + .word 0 + +.LmdbS4: + # mdb + .word .LmtoS4-.LmdbS4 + .word 1 + .long 0xd4c4c240 + .long 1 + + # go +.LgoS4: + .word .LmtoS4-.LgoS4 + .word 1 + .long 0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0 + .byte 0 + .byte 0,0,0,0,0,0,0 + .byte 0 + .word 0 + .byte 0,0,0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + +.LmtoS4: + .word .LmtoendS4-.LmtoS4 + .word 4 + .word 0x1000 + .byte 0 + .byte 0,0,0 +.LmtoendS4: + + # Global constants +.LsccbS0: + .long _sclp_work_area +.Lascebc: + .long _ascebc +.previous + +.section ".init.data","a" + .balign 4096 +_sclp_work_area: + .fill 4096 +.previous diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7402b6a39ea..9717717c6fe 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -42,6 +42,7 @@ #include <linux/ctype.h> #include <linux/reboot.h> #include <linux/topology.h> +#include <linux/ftrace.h> #include <asm/ipl.h> #include <asm/uaccess.h> @@ -442,6 +443,7 @@ setup_lowcore(void) lc->steal_timer = S390_lowcore.steal_timer; lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; + lc->ftrace_func = S390_lowcore.ftrace_func; set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 3cf74c3ccb6..062bd64e65f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -26,6 +26,7 @@ #include <linux/binfmts.h> #include <linux/tracehook.h> #include <linux/syscalls.h> +#include <linux/compat.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> @@ -482,7 +483,7 @@ void do_signal(struct pt_regs *regs) /* Whee! Actually deliver the signal. */ int ret; #ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) { + if (is_compat_task()) { ret = handle_signal32(signr, &ka, &info, oldset, regs); } else diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a985a3ba440..fd8e3111a4e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,7 +1,7 @@ /* * arch/s390/kernel/smp.c * - * Copyright IBM Corp. 1999,2007 + * Copyright IBM Corp. 1999, 2009 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * Heiko Carstens (heiko.carstens@de.ibm.com) @@ -47,7 +47,7 @@ #include <asm/timer.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include <asm/cpu.h> +#include <asm/cputime.h> #include <asm/vdso.h> #include "entry.h" @@ -572,6 +572,7 @@ int __cpuinit __cpu_up(unsigned int cpu) cpu_lowcore->cpu_nr = cpu; cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; cpu_lowcore->machine_flags = S390_lowcore.machine_flags; + cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; eieio(); while (signal_processor(cpu, sigp_restart) == sigp_busy) @@ -1030,6 +1031,42 @@ out: static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, dispatching_store); +/* + * If the resume kernel runs on another cpu than the suspended kernel, + * we have to switch the cpu IDs in the logical map. + */ +void smp_switch_boot_cpu_in_resume(u32 resume_phys_cpu_id, + struct _lowcore *suspend_lowcore) +{ + int cpu, suspend_cpu_id, resume_cpu_id; + u32 suspend_phys_cpu_id; + + suspend_phys_cpu_id = __cpu_logical_map[suspend_lowcore->cpu_nr]; + suspend_cpu_id = suspend_lowcore->cpu_nr; + + for_each_present_cpu(cpu) { + if (__cpu_logical_map[cpu] == resume_phys_cpu_id) { + resume_cpu_id = cpu; + goto found; + } + } + panic("Could not find resume cpu in logical map.\n"); + +found: + printk("Resume cpu ID: %i/%i\n", resume_phys_cpu_id, resume_cpu_id); + printk("Suspend cpu ID: %i/%i\n", suspend_phys_cpu_id, suspend_cpu_id); + + __cpu_logical_map[resume_cpu_id] = suspend_phys_cpu_id; + __cpu_logical_map[suspend_cpu_id] = resume_phys_cpu_id; + + lowcore_ptr[suspend_cpu_id]->cpu_addr = resume_phys_cpu_id; +} + +u32 smp_get_phys_cpu_id(void) +{ + return __cpu_logical_map[smp_processor_id()]; +} + static int __init topology_init(void) { int cpu; diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2c7739fe70b..ad1acd20038 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -338,3 +338,5 @@ SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) +SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ +SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ef596d02057..215330a2c12 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -70,7 +70,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { return ((get_clock_xt() - sched_clock_base_cc) * 125) >> 9; } @@ -95,12 +95,6 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime) xtime->tv_nsec = ((todval * 1000) >> 12); } -#ifdef CONFIG_PROFILING -#define s390_do_profile() profile_tick(CPU_PROFILING) -#else -#define s390_do_profile() do { ; } while(0) -#endif /* CONFIG_PROFILING */ - void clock_comparator_work(void) { struct clock_event_device *cd; @@ -109,7 +103,6 @@ void clock_comparator_work(void) set_clock_comparator(S390_lowcore.clock_comparator); cd = &__get_cpu_var(comparators); cd->event_handler(cd); - s390_do_profile(); } /* diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 89b2e7f1b7a..45e1708b70f 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -22,7 +22,7 @@ #include <linux/elf.h> #include <linux/security.h> #include <linux/bootmem.h> - +#include <linux/compat.h> #include <asm/pgtable.h> #include <asm/system.h> #include <asm/processor.h> @@ -53,8 +53,19 @@ unsigned int __read_mostly vdso_enabled = 1; static int __init vdso_setup(char *s) { - vdso_enabled = simple_strtoul(s, NULL, 0); - return 1; + unsigned long val; + int rc; + + rc = 0; + if (strncmp(s, "on", 3) == 0) + vdso_enabled = 1; + else if (strncmp(s, "off", 4) == 0) + vdso_enabled = 0; + else { + rc = strict_strtoul(s, 0, &val); + vdso_enabled = rc ? 0 : !!val; + } + return !rc; } __setup("vdso=", vdso_setup); @@ -203,7 +214,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) { + if (is_compat_task()) { vdso_pagelist = vdso32_pagelist; vdso_pages = vdso32_pages; } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 89399b8756c..a53db23ee09 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -34,6 +34,7 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT *(.fixup) *(.gnu.warning) } :text = 0x0700 diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c87f59bd824..c8eb7255332 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -23,7 +23,7 @@ #include <asm/s390_ext.h> #include <asm/timer.h> #include <asm/irq_regs.h> -#include <asm/cpu.h> +#include <asm/cputime.h> static ext_int_info_t ext_int_info_timer; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9d19803111b..98997ccba50 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu) { int viwhy = vcpu->arch.sie_block->ipb >> 16; + int rc; + vcpu->stat.exit_validity++; - if (viwhy == 0x37) { - fault_in_pages_writeable((char __user *) - vcpu->kvm->arch.guest_origin + - vcpu->arch.sie_block->prefix, - PAGE_SIZE); - return 0; - } - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return -ENOTSUPP; + if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix + <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){ + rc = fault_in_pages_writeable((char __user *) + vcpu->kvm->arch.guest_origin + + vcpu->arch.sie_block->prefix, + 2*PAGE_SIZE); + if (rc) + /* user will receive sigsegv, exit to user */ + rc = -ENOTSUPP; + } else + rc = -ENOTSUPP; + + if (rc) + VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", + viwhy); + return rc; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0189356fe20..f04f5301b1b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -12,6 +12,8 @@ #include <asm/lowcore.h> #include <asm/uaccess.h> +#include <linux/hrtimer.h> +#include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/signal.h> #include "kvm-s390.h" @@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) } if ((!rc) && atomic_read(&fi->active)) { - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); list_for_each_entry(inti, &fi->list, list) if (__interrupt_is_deliverable(vcpu, inti)) { rc = 1; break; } - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); } if ((!rc) && (vcpu->arch.sie_block->ckc < @@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) return rc; } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return 0; @@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return 0; } - sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; + sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; - vcpu->arch.ckc_timer.expires = jiffies + sltime; - - add_timer(&vcpu->arch.ckc_timer); - VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime); + hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); + VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: - spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); add_wait_queue(&vcpu->arch.local_int.wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && @@ -371,33 +377,46 @@ no_timer: !signal_pending(current)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + spin_unlock(&vcpu->arch.local_int.float_int->lock); vcpu_put(vcpu); schedule(); vcpu_load(vcpu); - spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); - del_timer(&vcpu->arch.ckc_timer); + spin_unlock(&vcpu->arch.local_int.float_int->lock); + hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); return 0; } -void kvm_s390_idle_wakeup(unsigned long data) +void kvm_s390_tasklet(unsigned long parm) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - spin_lock_bh(&vcpu->arch.local_int.lock); + spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; if (waitqueue_active(&vcpu->arch.local_int.wq)) wake_up_interruptible(&vcpu->arch.local_int.wq); - spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.lock); } +/* + * low level hrtimer wake routine. Because this runs in hardirq context + * we schedule a tasklet to do the real work. + */ +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + tasklet_schedule(&vcpu->arch.tasklet); + + return HRTIMER_NORESTART; +} void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { @@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (atomic_read(&fi->active)) { do { deliver = 0; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); list_for_each_entry_safe(inti, n, &fi->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); @@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if (list_empty(&fi->list)) atomic_set(&fi->active, 0); - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); list_add_tail(&inti->list, &fi->list); atomic_set(&fi->active, 1); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); @@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, if (waitqueue_active(&li->wq)) wake_up_interruptible(&li->wq); spin_unlock_bh(&li->lock); - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f4d56e9939c..c18b21d6991 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -15,6 +15,7 @@ #include <linux/compiler.h> #include <linux/err.h> #include <linux/fs.h> +#include <linux/hrtimer.h> #include <linux/init.h> #include <linux/kvm.h> #include <linux/kvm_host.h> @@ -195,6 +196,10 @@ out_nokvm: void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == + (__u64) vcpu->arch.sie_block) + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + smp_mb(); free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); kfree(vcpu); @@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->eca = 0xC1002001U; - setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, - (unsigned long) vcpu); + hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, + (unsigned long) vcpu); + vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; return 0; @@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->icpua = id; BUG_ON(!kvm->arch.sca); - BUG_ON(kvm->arch.sca->cpu[id].sda); - kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + if (!kvm->arch.sca->cpu[id].sda) + kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + else + BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; spin_lock_init(&vcpu->arch.local_int.lock); INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; - spin_lock_bh(&kvm->arch.float_int.lock); + spin_lock(&kvm->arch.float_int.lock); kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; init_waitqueue_head(&vcpu->arch.local_int.wq); vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - spin_unlock_bh(&kvm->arch.float_int.lock); + spin_unlock(&kvm->arch.float_int.lock); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) @@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + /* verify, that memory has been registered */ + if (!vcpu->kvm->arch.guest_memsize) { + vcpu_put(vcpu); + return -EINVAL; + } + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -497,7 +512,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) BUG(); } - might_sleep(); + might_fault(); do { __vcpu_run(vcpu); @@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc) { + int i; + /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a page boundary in userland and which has to end at a page boundary. @@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, vmas. It is okay to mmap() and munmap() stuff in this slot after doing this call at any time */ - if (mem->slot) + if (mem->slot || kvm->arch.guest_memsize) return -EINVAL; if (mem->guest_phys_addr) @@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (mem->memory_size & (PAGE_SIZE - 1)) return -EINVAL; + if (!user_alloc) + return -EINVAL; + + /* lock all vcpus */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!kvm->vcpus[i]) + continue; + if (!mutex_trylock(&kvm->vcpus[i]->mutex)) + goto fail_out; + } + kvm->arch.guest_origin = mem->userspace_addr; kvm->arch.guest_memsize = mem->memory_size; - /* FIXME: we do want to interrupt running CPUs and update their memory - configuration now to avoid race conditions. But hey, changing the - memory layout while virtual CPUs are running is usually bad - programming practice. */ + /* update sie control blocks, and unlock all vcpus */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm->vcpus[i]->arch.sie_block->gmsor = + kvm->arch.guest_origin; + kvm->vcpus[i]->arch.sie_block->gmslm = + kvm->arch.guest_memsize + + kvm->arch.guest_origin + + VIRTIODESCSPACE - 1ul; + mutex_unlock(&kvm->vcpus[i]->mutex); + } + } return 0; + +fail_out: + for (; i >= 0; i--) + mutex_unlock(&kvm->vcpus[i]->mutex); + return -EINVAL; } void kvm_arch_flush_shadow(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 00bbe69b78d..748fee87232 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -14,6 +14,7 @@ #ifndef ARCH_S390_KVM_S390_H #define ARCH_S390_KVM_S390_H +#include <linux/hrtimer.h> #include <linux/kvm.h> #include <linux/kvm_host.h> @@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); -void kvm_s390_idle_wakeup(unsigned long data); +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); +void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4b88834b8dd..93ecd06e1a7 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) int cpus = 0; int n; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); for (n = 0; n < KVM_MAX_VCPUS; n++) if (fi->local_int[n]) cpus++; - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); /* deal with other level 3 hypervisors */ if (stsi(mem, 3, 2, 2) == -ENOSYS) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index f27dbedf086..36678835034 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, if (cpu_addr >= KVM_MAX_VCPUS) return 3; /* not operational */ - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); if (fi->local_int[cpu_addr] == NULL) rc = 3; /* not operational */ else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) @@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, *reg |= SIGP_STAT_STOPPED; rc = 1; /* status stored */ } - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); return rc; @@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EMERGENCY; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; if (li == NULL) { rc = 3; /* not operational */ @@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ unlock: - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); return rc; } @@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) inti->type = KVM_S390_SIGP_STOP; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; if (li == NULL) { rc = 3; /* not operational */ @@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ unlock: - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); return rc; } @@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return 2; /* busy */ - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { @@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, out_li: spin_unlock_bh(&li->lock); out_fi: - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); return rc; } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index e41f4008afc..f7e0d30250b 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -124,6 +124,27 @@ void _raw_read_lock_wait(raw_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_lock_wait); +void _raw_read_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags) +{ + unsigned int old; + int count = spin_retry; + + local_irq_restore(flags); + while (1) { + if (count-- <= 0) { + _raw_yield(); + count = spin_retry; + } + if (!__raw_read_can_lock(rw)) + continue; + old = rw->lock & 0x7fffffffU; + local_irq_disable(); + if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + return; + } +} +EXPORT_SYMBOL(_raw_read_lock_wait_flags); + int _raw_read_trylock_retry(raw_rwlock_t *rw) { unsigned int old; @@ -157,6 +178,25 @@ void _raw_write_lock_wait(raw_rwlock_t *rw) } EXPORT_SYMBOL(_raw_write_lock_wait); +void _raw_write_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags) +{ + int count = spin_retry; + + local_irq_restore(flags); + while (1) { + if (count-- <= 0) { + _raw_yield(); + count = spin_retry; + } + if (!__raw_write_can_lock(rw)) + continue; + local_irq_disable(); + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + return; + } +} +EXPORT_SYMBOL(_raw_write_lock_wait_flags); + int _raw_write_trylock_retry(raw_rwlock_t *rw) { int count = spin_retry; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 2a745813454..db05661ac89 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PAGE_STATES) += page-states.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 833e8366c35..220a152c836 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -19,6 +19,7 @@ #include <linux/ptrace.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/compat.h> #include <linux/smp.h> #include <linux/kdebug.h> #include <linux/smp_lock.h> @@ -239,7 +240,7 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs, up_read(&mm->mmap_sem); clear_tsk_thread_flag(current, TIF_SINGLE_STEP); #ifdef CONFIG_COMPAT - compat = test_tsk_thread_flag(current, TIF_31BIT); + compat = is_compat_task(); if (compat && instruction == 0x0a77) sys32_sigreturn(); else if (compat && instruction == 0x0aad) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c new file mode 100644 index 00000000000..81756271dc4 --- /dev/null +++ b/arch/s390/mm/maccess.c @@ -0,0 +1,61 @@ +/* + * Access kernel memory without faulting -- s390 specific implementation. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/system.h> + +/* + * This function writes to kernel memory bypassing DAT and possible + * write protection. It copies one to four bytes from src to dst + * using the stura instruction. + * Returns the number of bytes copied or -EFAULT. + */ +static long probe_kernel_write_odd(void *dst, void *src, size_t size) +{ + unsigned long count, aligned; + int offset, mask; + int rc = -EFAULT; + + aligned = (unsigned long) dst & ~3UL; + offset = (unsigned long) dst & 3; + count = min_t(unsigned long, 4 - offset, size); + mask = (0xf << (4 - count)) & 0xf; + mask >>= offset; + asm volatile( + " bras 1,0f\n" + " icm 0,0,0(%3)\n" + "0: l 0,0(%1)\n" + " lra %1,0(%1)\n" + "1: ex %2,0(1)\n" + "2: stura 0,%1\n" + " la %0,0\n" + "3:\n" + EX_TABLE(0b,3b) EX_TABLE(1b,3b) EX_TABLE(2b,3b) + : "+d" (rc), "+a" (aligned) + : "a" (mask), "a" (src) : "cc", "memory", "0", "1"); + return rc ? rc : count; +} + +long probe_kernel_write(void *dst, void *src, size_t size) +{ + long copied = 0; + + while (size) { + copied = probe_kernel_write_odd(dst, src, size); + if (copied < 0) + break; + dst += copied; + src += copied; + size -= copied; + } + return copied < 0 ? -EFAULT : 0; +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index e008d236cc1..f4558ccf02b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -28,6 +28,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <asm/pgalloc.h> +#include <asm/compat.h> /* * Top of mmap area (just below the process stack). @@ -55,7 +56,7 @@ static inline int mmap_is_legacy(void) /* * Force standard allocation for 64 bit programs. */ - if (!test_thread_flag(TIF_31BIT)) + if (!is_compat_task()) return 1; #endif return sysctl_legacy_va_layout || @@ -91,7 +92,7 @@ EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); int s390_mmap_check(unsigned long addr, unsigned long len) { - if (!test_thread_flag(TIF_31BIT) && + if (!is_compat_task() && len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) return crst_table_upgrade(current->mm, 1UL << 53); return 0; @@ -108,8 +109,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr, area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); if (!(area & ~PAGE_MASK)) return area; - if (area == -ENOMEM && - !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) { + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { /* Upgrade the page table to 4 levels and retry. */ rc = crst_table_upgrade(mm, 1UL << 53); if (rc) @@ -131,8 +131,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); if (!(area & ~PAGE_MASK)) return area; - if (area == -ENOMEM && - !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) { + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { /* Upgrade the page table to 4 levels and retry. */ rc = crst_table_upgrade(mm, 1UL << 53); if (rc) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index be6c1cf4ad5..56566720798 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,7 +1,5 @@ /* - * arch/s390/mm/pgtable.c - * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2009 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -53,6 +51,18 @@ void clear_table_pgstes(unsigned long *table) #endif +unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; +EXPORT_SYMBOL(VMALLOC_START); + +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; + return 0; +} +early_param("vmalloc", parse_vmalloc); + unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); @@ -303,3 +313,22 @@ int s390_enable_sie(void) return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); + +#ifdef CONFIG_DEBUG_PAGEALLOC +#ifdef CONFIG_HIBERNATION +bool kernel_page_present(struct page *page) +{ + unsigned long addr; + int cc; + + addr = page_to_phys(page); + asm("lra %1,0(%1)\n" + "ipm %0\n" + "srl %0,28" + :"=d"(cc),"+a"(addr)::"cc"); + return cc == 0; +} + +#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ + diff --git a/arch/s390/power/Makefile b/arch/s390/power/Makefile new file mode 100644 index 00000000000..973bb45a8fe --- /dev/null +++ b/arch/s390/power/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for s390 PM support +# + +obj-$(CONFIG_HIBERNATION) += suspend.o +obj-$(CONFIG_HIBERNATION) += swsusp.o +obj-$(CONFIG_HIBERNATION) += swsusp_64.o +obj-$(CONFIG_HIBERNATION) += swsusp_asm64.o diff --git a/arch/s390/power/suspend.c b/arch/s390/power/suspend.c new file mode 100644 index 00000000000..b3351eceebb --- /dev/null +++ b/arch/s390/power/suspend.c @@ -0,0 +1,40 @@ +/* + * Suspend support specific for s390. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/suspend.h> +#include <linux/reboot.h> +#include <linux/pfn.h> +#include <asm/sections.h> +#include <asm/ipl.h> + +/* + * References to section boundaries + */ +extern const void __nosave_begin, __nosave_end; + +/* + * check if given pfn is in the 'nosave' or in the read only NSS section + */ +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) + >> PAGE_SHIFT; + unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1; + unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); + + if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) + return 1; + if (pfn >= stext_pfn && pfn <= eshared_pfn) { + if (ipl_info.type == IPL_TYPE_NSS) + return 1; + } else if ((tprot(pfn * PAGE_SIZE) && pfn > 0)) + return 1; + return 0; +} diff --git a/arch/s390/power/swsusp.c b/arch/s390/power/swsusp.c new file mode 100644 index 00000000000..e6a4fe9f5f2 --- /dev/null +++ b/arch/s390/power/swsusp.c @@ -0,0 +1,30 @@ +/* + * Support for suspend and resume on s390 + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + * + */ + + +/* + * save CPU registers before creating a hibernation image and before + * restoring the memory state from it + */ +void save_processor_state(void) +{ + /* implentation contained in the + * swsusp_arch_suspend function + */ +} + +/* + * restore the contents of CPU registers + */ +void restore_processor_state(void) +{ + /* implentation contained in the + * swsusp_arch_resume function + */ +} diff --git a/arch/s390/power/swsusp_64.c b/arch/s390/power/swsusp_64.c new file mode 100644 index 00000000000..9516a517d72 --- /dev/null +++ b/arch/s390/power/swsusp_64.c @@ -0,0 +1,17 @@ +/* + * Support for suspend and resume on s390 + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + * + */ + +#include <asm/system.h> +#include <linux/interrupt.h> + +void do_after_copyback(void) +{ + mb(); +} + diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/power/swsusp_asm64.S new file mode 100644 index 00000000000..3c74e7d827c --- /dev/null +++ b/arch/s390/power/swsusp_asm64.S @@ -0,0 +1,199 @@ +/* + * S390 64-bit swsusp implementation + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> + +/* + * Save register context in absolute 0 lowcore and call swsusp_save() to + * create in-memory kernel image. The context is saved in the designated + * "store status" memory locations (see POP). + * We return from this function twice. The first time during the suspend to + * disk process. The second time via the swsusp_arch_resume() function + * (see below) in the resume process. + * This function runs with disabled interrupts. + */ + .section .text + .align 2 + .globl swsusp_arch_suspend +swsusp_arch_suspend: + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Switch off lowcore protection */ + stctg %c0,%c0,__SF_EMPTY(%r15) + ni __SF_EMPTY+4(%r15),0xef + lctlg %c0,%c0,__SF_EMPTY(%r15) + + /* Store prefix register on stack */ + stpx __SF_EMPTY(%r15) + + /* Setup base register for lowcore (absolute 0) */ + llgf %r1,__SF_EMPTY(%r15) + + /* Get pointer to save area */ + aghi %r1,0x1000 + + /* Store registers */ + mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ + stfpc 0x31c(%r1) /* store fpu control */ + std 0,0x200(%r1) /* store f0 */ + std 1,0x208(%r1) /* store f1 */ + std 2,0x210(%r1) /* store f2 */ + std 3,0x218(%r1) /* store f3 */ + std 4,0x220(%r1) /* store f4 */ + std 5,0x228(%r1) /* store f5 */ + std 6,0x230(%r1) /* store f6 */ + std 7,0x238(%r1) /* store f7 */ + std 8,0x240(%r1) /* store f8 */ + std 9,0x248(%r1) /* store f9 */ + std 10,0x250(%r1) /* store f10 */ + std 11,0x258(%r1) /* store f11 */ + std 12,0x260(%r1) /* store f12 */ + std 13,0x268(%r1) /* store f13 */ + std 14,0x270(%r1) /* store f14 */ + std 15,0x278(%r1) /* store f15 */ + stam %a0,%a15,0x340(%r1) /* store access registers */ + stctg %c0,%c15,0x380(%r1) /* store control registers */ + stmg %r0,%r15,0x280(%r1) /* store general registers */ + + stpt 0x328(%r1) /* store timer */ + stckc 0x330(%r1) /* store clock comparator */ + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Setup lowcore */ + brasl %r14,setup_lowcore_early + + /* Save image */ + brasl %r14,swsusp_save + + /* Switch on lowcore protection */ + stctg %c0,%c0,__SF_EMPTY(%r15) + oi __SF_EMPTY+4(%r15),0x10 + lctlg %c0,%c0,__SF_EMPTY(%r15) + + /* Restore prefix register and return */ + lghi %r1,0x1000 + spx 0x318(%r1) + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + +/* + * Restore saved memory image to correct place and restore register context. + * Then we return to the function that called swsusp_arch_suspend(). + * swsusp_arch_resume() runs with disabled interrupts. + */ + .globl swsusp_arch_resume +swsusp_arch_resume: + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Save boot cpu number */ + brasl %r14,smp_get_phys_cpu_id + lgr %r10,%r2 + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Switch off lowcore protection */ + stctg %c0,%c0,__SF_EMPTY(%r15) + ni __SF_EMPTY+4(%r15),0xef + lctlg %c0,%c0,__SF_EMPTY(%r15) + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Restore saved image */ + larl %r1,restore_pblist + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 2f +0: + lg %r2,8(%r1) + lg %r4,0(%r1) + lghi %r3,PAGE_SIZE + lghi %r5,PAGE_SIZE +1: + mvcle %r2,%r4,0 + jo 1b + lg %r1,16(%r1) + ltgr %r1,%r1 + jnz 0b +2: + ptlb /* flush tlb */ + + /* Restore registers */ + lghi %r13,0x1000 /* %r1 = pointer to save arae */ + + spt 0x328(%r13) /* reprogram timer */ + //sckc 0x330(%r13) /* set clock comparator */ + + lctlg %c0,%c15,0x380(%r13) /* load control registers */ + lam %a0,%a15,0x340(%r13) /* load access registers */ + + lfpc 0x31c(%r13) /* load fpu control */ + ld 0,0x200(%r13) /* load f0 */ + ld 1,0x208(%r13) /* load f1 */ + ld 2,0x210(%r13) /* load f2 */ + ld 3,0x218(%r13) /* load f3 */ + ld 4,0x220(%r13) /* load f4 */ + ld 5,0x228(%r13) /* load f5 */ + ld 6,0x230(%r13) /* load f6 */ + ld 7,0x238(%r13) /* load f7 */ + ld 8,0x240(%r13) /* load f8 */ + ld 9,0x248(%r13) /* load f9 */ + ld 10,0x250(%r13) /* load f10 */ + ld 11,0x258(%r13) /* load f11 */ + ld 12,0x260(%r13) /* load f12 */ + ld 13,0x268(%r13) /* load f13 */ + ld 14,0x270(%r13) /* load f14 */ + ld 15,0x278(%r13) /* load f15 */ + + /* Load old stack */ + lg %r15,0x2f8(%r13) + + /* Pointer to save arae */ + lghi %r13,0x1000 + + /* Switch CPUs */ + lgr %r2,%r10 /* get cpu id */ + llgf %r3,0x318(%r13) + brasl %r14,smp_switch_boot_cpu_in_resume + + /* Restore prefix register */ + spx 0x318(%r13) + + /* Switch on lowcore protection */ + stctg %c0,%c0,__SF_EMPTY(%r15) + oi __SF_EMPTY+4(%r15),0x10 + lctlg %c0,%c0,__SF_EMPTY(%r15) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Return 0 */ + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 |