From 250cf776f74b5932a1977d0489cae9206e2351dd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 28 Oct 2008 11:10:15 +0100 Subject: [S390] pgtables: Fix race in enable_sie vs. page table ops The current enable_sie code sets the mm->context.pgstes bit to tell dup_mm that the new mm should have extended page tables. This bit is also used by the s390 specific page table primitives to decide about the page table layout - which means context.pgstes has two meanings. This can cause any kind of bugs. For example - e.g. shrink_zone can call ptep_clear_flush_young while enable_sie is running. ptep_clear_flush_young will test for context.pgstes. Since enable_sie changed that value of the old struct mm without changing the page table layout ptep_clear_flush_young will do the wrong thing. The solution is to split pgstes into two bits - one for the allocation - one for the current state Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 3 ++- arch/s390/include/asm/mmu_context.h | 19 ++++++++++++++++--- arch/s390/include/asm/pgtable.h | 8 ++++---- arch/s390/mm/pgtable.c | 16 ++++++++-------- 4 files changed, 30 insertions(+), 16 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 5dd5e7b3476..d2b4ff83147 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -7,7 +7,8 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; int noexec; - int pgstes; + int has_pgste; /* The mmu context has extended page tables */ + int alloc_pgste; /* cloned contexts will have extended page tables */ } mm_context_t; #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 4c2fbf48c9c..28ec870655a 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -20,12 +20,25 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm->context.pgstes) { + if (current->mm->context.alloc_pgste) { + /* + * alloc_pgste indicates, that any NEW context will be created + * with extended page tables. The old context is unchanged. The + * page table allocation and the page table operations will + * look at has_pgste to distinguish normal and extended page + * tables. The only way to create extended page tables is to + * set alloc_pgste and then create a new context (e.g. dup_mm). + * The page table allocation is called after init_new_context + * and if has_pgste is set, it will create extended page + * tables. + */ mm->context.noexec = 0; - mm->context.pgstes = 1; + mm->context.has_pgste = 1; + mm->context.alloc_pgste = 1; } else { mm->context.noexec = s390_noexec; - mm->context.pgstes = 0; + mm->context.has_pgste = 0; + mm->context.alloc_pgste = 0; } mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1a928f84afd..7fc76133b3e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -679,7 +679,7 @@ static inline void pmd_clear(pmd_t *pmd) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (mm->context.pgstes) + if (mm->context.has_pgste) ptep_rcp_copy(ptep); pte_val(*ptep) = _PAGE_TYPE_EMPTY; if (mm->context.noexec) @@ -763,7 +763,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, struct page *page; unsigned int skey; - if (!mm->context.pgstes) + if (!mm->context.has_pgste) return -EINVAL; rcp_lock(ptep); pgste = (unsigned long *) (ptep + PTRS_PER_PTE); @@ -794,7 +794,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, int young; unsigned long *pgste; - if (!vma->vm_mm->context.pgstes) + if (!vma->vm_mm->context.has_pgste) return 0; physpage = pte_val(*ptep) & PAGE_MASK; pgste = (unsigned long *) (ptep + PTRS_PER_PTE); @@ -844,7 +844,7 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) static inline void ptep_invalidate(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - if (mm->context.pgstes) { + if (mm->context.has_pgste) { rcp_lock(ptep); __ptep_ipte(address, ptep); ptep_rcp_copy(ptep); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3d98ba82ea6..ef3635b52fc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; spin_lock(&mm->page_table_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -186,7 +186,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pgtable_page_ctor(page); page->flags &= ~FRAG_MASK; table = (unsigned long *) page_to_phys(page); - if (mm->context.pgstes) + if (mm->context.has_pgste) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); @@ -210,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock(&mm->page_table_lock); @@ -257,7 +257,7 @@ int s390_enable_sie(void) struct mm_struct *mm, *old_mm; /* Do we have pgstes? if yes, we are done */ - if (tsk->mm->context.pgstes) + if (tsk->mm->context.has_pgste) return 0; /* lets check if we are allowed to replace the mm */ @@ -269,14 +269,14 @@ int s390_enable_sie(void) } task_unlock(tsk); - /* we copy the mm with pgstes enabled */ - tsk->mm->context.pgstes = 1; + /* we copy the mm and let dup_mm create the page tables with_pgstes */ + tsk->mm->context.alloc_pgste = 1; mm = dup_mm(tsk); - tsk->mm->context.pgstes = 0; + tsk->mm->context.alloc_pgste = 0; if (!mm) return -ENOMEM; - /* Now lets check again if somebody attached ptrace etc */ + /* Now lets check again if something happened */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { -- cgit v1.2.3 From da5aae7036692fa8d03da1b705c76fd750ed9e38 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:10:16 +0100 Subject: [S390] Fix sysdev class file creation. Use sysdev_class_create_file() to create create sysdev class attributes instead of sysfs_create_file(). Using sysfs_create_file() wasn't a very good idea since the show and store functions have a different amount of parameters for sysfs files and sysdev class files. In particular the pointer to the buffer is the last argument and therefore accesses to random memory regions happened. Still worked surprisingly well until we got a kernel panic. Cc: stable@kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 9e8b1f9b8f4..b5595688a47 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1119,9 +1119,7 @@ out: return rc; } -static ssize_t __ref rescan_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, +static ssize_t __ref rescan_store(struct sysdev_class *class, const char *buf, size_t count) { int rc; @@ -1129,12 +1127,10 @@ static ssize_t __ref rescan_store(struct sys_device *dev, rc = smp_rescan_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store); +static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sys_device *dev, - struct sysdev_attribute *attr, - char *buf) +static ssize_t dispatching_show(struct sysdev_class *class, char *buf) { ssize_t count; @@ -1144,9 +1140,8 @@ static ssize_t dispatching_show(struct sys_device *dev, return count; } -static ssize_t dispatching_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t count) +static ssize_t dispatching_store(struct sysdev_class *dev, const char *buf, + size_t count) { int val, rc; char delim; @@ -1168,7 +1163,8 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store); +static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); static int __init topology_init(void) { @@ -1178,13 +1174,11 @@ static int __init topology_init(void) register_cpu_notifier(&smp_cpu_nb); #ifdef CONFIG_HOTPLUG_CPU - rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_rescan.attr); + rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan); if (rc) return rc; #endif - rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_dispatching.attr); + rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching); if (rc) return rc; for_each_present_cpu(cpu) { -- cgit v1.2.3 From 13f8b7c5e6fa13622592042f3b5aa88ba785cec2 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Oct 2008 11:10:18 +0100 Subject: [S390] appldata: unsigned ops->size cannot be negative unsigned ops->size cannot be negative Signed-off-by: Roel Kluin Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index a7f8979fb92..a06a47cdd5e 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -424,7 +424,7 @@ out: */ int appldata_register_ops(struct appldata_ops *ops) { - if ((ops->size > APPLDATA_MAX_REC_SIZE) || (ops->size < 0)) + if (ops->size > APPLDATA_MAX_REC_SIZE) return -EINVAL; ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL); -- cgit v1.2.3 From 46e7951f9431b5e6bfbeb3044fdb4b522f58101c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:10:20 +0100 Subject: [S390] Change default IPL method to IPL_VM. allyesconfig and allmodconfig built kernels have a tape IPL record. A the vmreader record makes much more sense, since hardly anybody will ever IPL a kernel from tape. So change the default. As I side effect I can test these kernels without fiddling around with the kernel config ;) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 70b7645ce74..5f5c29269fb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -384,7 +384,7 @@ config IPL choice prompt "IPL method generated into head.S" depends on IPL - default IPL_TAPE + default IPL_VM help Select "tape" if you want to IPL the image from a Tape. -- cgit v1.2.3 From 7f5a8ba6b0297ca941f43f8f5cbf0e5c8c4dd916 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:10:21 +0100 Subject: [S390] No more 4kb stacks. We got a stack overflow with a small stack configuration on a 32 bit system. It just looks like as 4kb isn't enough and too dangerous. So lets get rid of 4kb stacks on 32 bit. But one thing I completely dislike about the call trace below is that just for debugging or tracing purposes sprintf gets called (cio_start_key): /* process condition code */ sprintf(dbf_txt, "ccode:%d", ccode); CIO_TRACE_EVENT(4, dbf_txt); But maybe its just me who thinks that this could be done better. <4>Kernel stack overflow. <4>Modules linked in: dm_multipath sunrpc bonding qeth_l2 dm_mod qeth ccwgroup vmur <4>CPU: 1 Not tainted 2.6.27-30.x.20081015-s390default #1 <4>Process httpd (pid: 3807, task: 20ae2df8, ksp: 1666fb78) <4>Krnl PSW : 040c0000 8027098a (number+0xe/0x348) <4> R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0 <4>Krnl GPRS: 00d43318 0027097c 1666f277 9666f270 <4> 00000000 00000000 0000000a ffffffff <4> 9666f270 1666f228 1666f277 1666f098 <4> 00000002 80270982 80271016 1666f098 <4>Krnl Code: 8027097e: f0340dd0a7f1 srp 3536(4,%r0),2033(%r10),4 <4> 80270984: 0f00 clcl %r0,%r0 <4> 80270986: a7840001 brc 8,80270988 <4> >8027098a: 18ef lr %r14,%r15 <4> 8027098c: a7faff68 ahi %r15,-152 <4> 80270990: 18bf lr %r11,%r15 <4> 80270992: 18a2 lr %r10,%r2 <4> 80270994: 1893 lr %r9,%r3 Modified calltrace with annotated stackframe size of each function: stackframe size | 0 304 vsnprintf+850 [0x271016] 1 72 sprintf+74 [0x271522] 2 56 cio_start_key+262 [0x2d4c16] 3 56 ccw_device_start_key+222 [0x2dfe92] 4 56 ccw_device_start+40 [0x2dff28] 5 48 raw3215_start_io+104 [0x30b0f8] 6 56 raw3215_write+494 [0x30ba0a] 7 40 con3215_write+68 [0x30bafc] 8 40 __call_console_drivers+146 [0x12b0fa] 9 32 _call_console_drivers+102 [0x12b192] 10 64 release_console_sem+268 [0x12b614] 11 168 vprintk+462 [0x12bca6] 12 72 printk+68 [0x12bfd0] 13 256 __print_symbol+50 [0x15a882] 14 56 __show_trace+162 [0x103d06] 15 32 show_trace+224 [0x103e70] 16 48 show_stack+152 [0x103f20] 17 56 dump_stack+126 [0x104612] 18 96 __alloc_pages_internal+592 [0x175004] 19 80 cache_alloc_refill+776 [0x196f3c] 20 40 __kmalloc+258 [0x1972ae] 21 40 __alloc_skb+94 [0x328086] 22 32 pskb_copy+50 [0x328252] 23 32 skb_realloc_headroom+110 [0x328a72] 24 104 qeth_l2_hard_start_xmit+378 [0x7803bfde] 25 56 dev_hard_start_xmit+450 [0x32ef6e] 26 56 __qdisc_run+390 [0x3425d6] 27 48 dev_queue_xmit+410 [0x331e06] 28 40 ip_finish_output+308 [0x354ac8] 29 56 ip_output+218 [0x355b6e] 30 24 ip_local_out+56 [0x354584] 31 120 ip_queue_xmit+300 [0x355cec] 32 96 tcp_transmit_skb+812 [0x367da8] 33 40 tcp_push_one+158 [0x369fda] 34 112 tcp_sendmsg+852 [0x35d5a0] 35 240 sock_sendmsg+164 [0x32035c] 36 56 kernel_sendmsg+86 [0x32064a] 37 88 sock_no_sendpage+98 [0x322b22] 38 104 tcp_sendpage+70 [0x35cc1e] 39 48 sock_sendpage+74 [0x31eb66] 40 64 pipe_to_sendpage+102 [0x1c4b2e] 41 64 __splice_from_pipe+120 [0x1c5340] 42 72 splice_from_pipe+90 [0x1c57e6] 43 56 generic_splice_sendpage+38 [0x1c5832] 44 48 do_splice_from+104 [0x1c4c38] 45 48 direct_splice_actor+52 [0x1c4c88] 46 80 splice_direct_to_actor+180 [0x1c4f80] 47 72 do_splice_direct+70 [0x1c5112] 48 64 do_sendfile+360 [0x19de18] 49 72 sys_sendfile64+126 [0x19df32] 50 336 sysc_do_restart+18 [0x111a1a] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 14 ++++++-------- arch/s390/include/asm/thread_info.h | 5 ----- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5f5c29269fb..8116a3328a1 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -241,19 +241,17 @@ config PACK_STACK Say Y if you are unsure. config SMALL_STACK - bool "Use 4kb/8kb for kernel stack instead of 8kb/16kb" - depends on PACK_STACK && !LOCKDEP + bool "Use 8kb for kernel stack instead of 16kb" + depends on PACK_STACK && 64BIT && !LOCKDEP help If you say Y here and the compiler supports the -mkernel-backchain - option the kernel will use a smaller kernel stack size. For 31 bit - the reduced size is 4kb instead of 8kb and for 64 bit it is 8kb - instead of 16kb. This allows to run more thread on a system and - reduces the pressure on the memory management for higher order - page allocations. + option the kernel will use a smaller kernel stack size. The reduced + size is 8kb instead of 16kb. This allows to run more threads on a + system and reduces the pressure on the memory management for higher + order page allocations. Say N if you are unsure. - config CHECK_STACK bool "Detect kernel stack overflow" help diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index de3fad60c68..c1eaf9604da 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -15,13 +15,8 @@ * Size of kernel stack for each process */ #ifndef __s390x__ -#ifndef __SMALL_STACK #define THREAD_ORDER 1 #define ASYNC_ORDER 1 -#else -#define THREAD_ORDER 0 -#define ASYNC_ORDER 0 -#endif #else /* __s390x__ */ #ifndef __SMALL_STACK #define THREAD_ORDER 2 -- cgit v1.2.3 From ea4bfdf52a5a84492cce881baadc5fab36adeade Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 28 Oct 2008 11:10:22 +0100 Subject: [S390] s390: Fix build for !CONFIG_S390_GUEST + CONFIG_VIRTIO_CONSOLE The s390 kernel does not compile if virtio console is enabled, but guest support is disabled: LD .tmp_vmlinux1 arch/s390/kernel/built-in.o: In function `setup_arch': /space/linux-2.5/arch/s390/kernel/setup.c:773: undefined reference to `s390_virtio_console_init' The fix is related to commit 99e65c92f2bbf84f43766a8bf701e36817d62822 Author: Christian Borntraeger Date: Fri Jul 25 15:50:04 2008 +0200 KVM: s390: Fix guest kconfig Which changed the build process to build kvm_virtio.c only if CONFIG_S390_GUEST is set. We must ifdef the prototype in the header file accordingly. Reported-by: Heiko Carstens Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kvm_virtio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index 146100224de..c13568b9351 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -52,7 +52,7 @@ struct kvm_vqconfig { #ifdef __KERNEL__ /* early virtio console setup */ -#ifdef CONFIG_VIRTIO_CONSOLE +#ifdef CONFIG_S390_GUEST extern void s390_virtio_console_init(void); #else static inline void s390_virtio_console_init(void) -- cgit v1.2.3 From fb2e7c5e33b341699f139b2ed972dca0a463a670 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 14 Nov 2008 18:18:00 +0100 Subject: [S390] Fix range for add_active_range() in setup_memory() add_active_range() expects start_pfn + size as end_pfn value, i.e. not the pfn of the last page frame but the one behind that. We used the pfn of the last page frame so far, which can lead to a BUG_ON in move_freepages(), when the kernelcore parameter is specified (page_zone(start_page) != page_zone(end_page)). Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 62122bad1e3..400b040df7f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -604,13 +604,13 @@ setup_memory(void) if (memory_chunk[i].type != CHUNK_READ_WRITE) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); - end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1; + end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); end_chunk = min(end_chunk, end_pfn); if (start_chunk >= end_chunk) continue; add_active_range(0, start_chunk, end_chunk); pfn = max(start_chunk, start_pfn); - for (; pfn <= end_chunk; pfn++) + for (; pfn < end_chunk; pfn++) page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); } -- cgit v1.2.3 From af4c68740e848019d8d14c52704ed8eacceddac6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:03 +0100 Subject: [S390] lockdep: fix compile bug arch/s390/kernel/built-in.o: In function `cleanup_io_leave_insn': mem_detect.c:(.text+0x10592): undefined reference to `lockdep_sys_exit' Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ed500ef799b..5f0c4fba87c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1116,6 +1116,8 @@ cleanup_io_leave_insn: .Ltrace_irq_on: .long trace_hardirqs_on .Ltrace_irq_off: .long trace_hardirqs_off +#endif +#ifdef CONFIG_LOCKDEP .Llockdep_sys_exit: .long lockdep_sys_exit #endif -- cgit v1.2.3 From 632448f65001c4935ed0d3bb362017d773da2eca Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:04 +0100 Subject: [S390] ftrace: disable tracing on idle psw Disable tracing on idle psw. Otherwise it would give us huge preempt off times for idle. Which is rather pointless. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 3e2c05cb6a8..04f8c67a610 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -136,9 +136,12 @@ static void default_idle(void) return; } trace_hardirqs_on(); + /* Don't trace preempt off for idle. */ + stop_critical_timings(); /* Wait for external, I/O or machine check interrupt. */ __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT); + start_critical_timings(); } void cpu_idle(void) -- cgit v1.2.3 From 50bec4ce5d36ebf96189dcc54e20c7fce4bf61bf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:05 +0100 Subject: [S390] ftrace: fix kernel stack backchain walking With CONFIG_IRQSOFF_TRACER the trace_hardirqs_off() function includes a call to __builtin_return_address(1). But we calltrace_hardirqs_off() from early entry code. There we have just a single stack frame. So this results in a kernel stack backchain walk that would walk beyond the kernel stack. Following the NULL terminated backchain this results in a lowcore read access. To fix this we simply call trace_hardirqs_off_caller() and pass the current instruction pointer. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 18 +++++++++++------- arch/s390/kernel/entry64.S | 11 +++++++---- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5f0c4fba87c..08844fc24a2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -61,22 +61,25 @@ STACK_SIZE = 1 << STACK_SHIFT #ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - l %r1,BASED(.Ltrace_irq_on) + basr %r2,%r0 + l %r1,BASED(.Ltrace_irq_on_caller) basr %r14,%r1 .endm .macro TRACE_IRQS_OFF - l %r1,BASED(.Ltrace_irq_off) + basr %r2,%r0 + l %r1,BASED(.Ltrace_irq_off_caller) basr %r14,%r1 .endm .macro TRACE_IRQS_CHECK + basr %r2,%r0 tm SP_PSW(%r15),0x03 # irqs enabled? jz 0f - l %r1,BASED(.Ltrace_irq_on) + l %r1,BASED(.Ltrace_irq_on_caller) basr %r14,%r1 j 1f -0: l %r1,BASED(.Ltrace_irq_off) +0: l %r1,BASED(.Ltrace_irq_off_caller) basr %r14,%r1 1: .endm @@ -1113,9 +1116,10 @@ cleanup_io_leave_insn: .Lschedtail: .long schedule_tail .Lsysc_table: .long sys_call_table #ifdef CONFIG_TRACE_IRQFLAGS -.Ltrace_irq_on: .long trace_hardirqs_on -.Ltrace_irq_off: - .long trace_hardirqs_off +.Ltrace_irq_on_caller: + .long trace_hardirqs_on_caller +.Ltrace_irq_off_caller: + .long trace_hardirqs_off_caller #endif #ifdef CONFIG_LOCKDEP .Llockdep_sys_exit: diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index d7ce150453f..41aca06682a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -61,19 +61,22 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ #ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - brasl %r14,trace_hardirqs_on + basr %r2,%r0 + brasl %r14,trace_hardirqs_on_caller .endm .macro TRACE_IRQS_OFF - brasl %r14,trace_hardirqs_off + basr %r2,%r0 + brasl %r14,trace_hardirqs_off_caller .endm .macro TRACE_IRQS_CHECK + basr %r2,%r0 tm SP_PSW(%r15),0x03 # irqs enabled? jz 0f - brasl %r14,trace_hardirqs_on + brasl %r14,trace_hardirqs_on_caller j 1f -0: brasl %r14,trace_hardirqs_off +0: brasl %r14,trace_hardirqs_off_caller 1: .endm #else -- cgit v1.2.3 From 74af283102b358b0da545460d0d176f473e110f6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Nov 2008 18:18:07 +0100 Subject: [S390] cpu topology: fix locking cpu_coregroup_map used to grab a mutex on s390 since it was only called from process context. Since c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 "block: add support for IO CPU affinity" this is not true anymore. It now also gets called from softirq context. To prevent possible deadlocks change this in architecture code and use a spinlock instead of a mutex. Cc: stable@kernel.org Cc: Jens Axboe Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 632b13e1005..a947899dcba 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -65,18 +65,21 @@ static int machine_has_topology_irq; static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); +/* topology_lock protects the core linked list */ +static DEFINE_SPINLOCK(topology_lock); cpumask_t cpu_core_map[NR_CPUS]; cpumask_t cpu_coregroup_map(unsigned int cpu) { struct core_info *core = &core_info; + unsigned long flags; cpumask_t mask; cpus_clear(mask); if (!machine_has_topology) return cpu_present_map; - mutex_lock(&smp_cpu_state_mutex); + spin_lock_irqsave(&topology_lock, flags); while (core) { if (cpu_isset(cpu, core->mask)) { mask = core->mask; @@ -84,7 +87,7 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) } core = core->next; } - mutex_unlock(&smp_cpu_state_mutex); + spin_unlock_irqrestore(&topology_lock, flags); if (cpus_empty(mask)) mask = cpumask_of_cpu(cpu); return mask; @@ -133,7 +136,7 @@ static void tl_to_cores(struct tl_info *info) union tl_entry *tle, *end; struct core_info *core = &core_info; - mutex_lock(&smp_cpu_state_mutex); + spin_lock_irq(&topology_lock); clear_cores(); tle = info->tle; end = (union tl_entry *)((unsigned long)info + info->length); @@ -157,7 +160,7 @@ static void tl_to_cores(struct tl_info *info) } tle = next_tle(tle); } - mutex_unlock(&smp_cpu_state_mutex); + spin_unlock_irq(&topology_lock); } static void topology_update_polarization_simple(void) -- cgit v1.2.3 From d2f019fe40e8fecd822f87bc759f74925a5c31d6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Nov 2008 18:18:09 +0100 Subject: [S390] fix s390x_newuname The uname system call for 64 bit compares current->personality without masking the upper 16 bits. If e.g. READ_IMPLIES_EXEC is set the result of a uname system call will always be s390x even if the process uses the s390 personality. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sys_s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 5fdb799062b..4fe952e557a 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -198,7 +198,7 @@ asmlinkage long s390x_newuname(struct new_utsname __user *name) { int ret = sys_newuname(name); - if (current->personality == PER_LINUX32 && !ret) { + if (personality(current->personality) == PER_LINUX32 && !ret) { ret = copy_to_user(name->machine, "s390\0\0\0\0", 8); if (ret) ret = -EFAULT; } -- cgit v1.2.3