From 12b101555f4a67db67a66966a516075bd477741f Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Fri, 21 Mar 2008 15:01:50 -0700 Subject: [IPV4]: Fix null dereference in ip_defrag Been seeing occasional panics in my testing of 2.6.25-rc in ip_defrag. Offending line in ip_defrag is here: net = skb->dev->nd_net where dev is NULL. Bisected the problem down to commit ac18e7509e7df327e30d6e073a787d922eaf211d ([NETNS][FRAGS]: Make the inet_frag_queue lookup work in namespaces). Below patch (idea from Patrick McHardy) fixes the problem for me. Signed-off-by: Phil Oester Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a2e92f9709d..3b2e5adca83 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -568,7 +568,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); - net = skb->dev->nd_net; + net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); -- cgit v1.2.3 From 1233823b0847190976d69a86d7bb1287992ba2c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Mar 2008 15:40:47 -0700 Subject: [SCTP]: Fix build warnings with IPV6 disabled. Introduced by 270637abff0cdf848b910b9f96ad342e1da61c66 ("[SCTP]: Fix a race between module load and protosw access") Reported by Gabriel C: In file included from net/sctp/sm_statetable.c:50: include/net/sctp/sctp.h: In function 'sctp_v6_pf_init': include/net/sctp/sctp.h:392: warning: 'return' with a value, in function returning void In file included from net/sctp/sm_statefuns.c:62: include/net/sctp/sctp.h: In function 'sctp_v6_pf_init': include/net/sctp/sctp.h:392: warning: 'return' with a value, in function returning void ... Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 57ed3e323d9..ea806732b08 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -389,7 +389,7 @@ void sctp_v6_del_protocol(void); #else /* #ifdef defined(CONFIG_IPV6) */ -static inline void sctp_v6_pf_init(void) { return 0; } +static inline void sctp_v6_pf_init(void) { return; } static inline void sctp_v6_pf_exit(void) { return; } static inline int sctp_v6_protosw_init(void) { return 0; } static inline void sctp_v6_protosw_exit(void) { return; } -- cgit v1.2.3 From 7512cbf6efc97644812f137527a54b8e92b6a90a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 21 Mar 2008 15:58:52 -0700 Subject: [DLCI]: Fix tiny race between module unload and sock_ioctl. This is a narrow pedantry :) but the dlci_ioctl_hook check and call should not be parted with the mutex lock. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index b6d35cd72a5..9d3fbfbc853 100644 --- a/net/socket.c +++ b/net/socket.c @@ -909,11 +909,10 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!dlci_ioctl_hook) request_module("dlci"); - if (dlci_ioctl_hook) { - mutex_lock(&dlci_ioctl_mutex); + mutex_lock(&dlci_ioctl_mutex); + if (dlci_ioctl_hook) err = dlci_ioctl_hook(cmd, argp); - mutex_unlock(&dlci_ioctl_mutex); - } + mutex_unlock(&dlci_ioctl_mutex); break; default: err = sock->ops->ioctl(sock, cmd, arg); -- cgit v1.2.3 From 64658743fdd40021e3ac91e8ff260ad06578dd23 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Mar 2008 17:01:38 -0700 Subject: [SPARC64]: Remove most limitations to kernel image size. Currently kernel images are limited to 8MB in size, and this causes problems especially when enabling features that take up a lot of kernel image space such as lockdep. The code now will align the kernel image size up to 4MB and map that many locked TLB entries. So, the only practical limitation is the number of available locked TLB entries which is 16 on Cheetah and 64 on pre-Cheetah sparc64 cpus. Niagara cpus don't actually have hw locked TLB entry support. Rather, the hypervisor transparently provides support for "locked" TLB entries since it runs with physical addressing and does the initial TLB miss processing. Fully utilizing this change requires some help from SILO, a patch for which will be submitted to the maintainer. Essentially, SILO will only currently map up to 8MB for the kernel image and that needs to be increased. Note that neither this patch nor the SILO bits will help with network booting. The openfirmware code will only map up to a certain amount of kernel image during a network boot and there isn't much we can to about that other than to implemented a layered network booting facility. Solaris has this, and calls it "wanboot" and we may implement something similar at some point. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 8 +- arch/sparc64/kernel/smp.c | 17 ++-- arch/sparc64/kernel/trampoline.S | 188 +++++++++++++-------------------------- arch/sparc64/mm/init.c | 38 +++----- include/asm-sparc64/hvtramp.h | 2 +- include/asm-sparc64/spitfire.h | 2 + 6 files changed, 96 insertions(+), 159 deletions(-) diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 44b105c04dd..34f8ff57c56 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -288,8 +288,12 @@ sun4v_chip_type: /* Leave arg2 as-is, prom_mmu_ihandle_cache */ mov -1, %l3 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: mode (-1 default) - sethi %hi(8 * 1024 * 1024), %l3 - stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: size (8MB) + /* 4MB align the kernel image size. */ + set (_end - KERNBASE), %l3 + set ((4 * 1024 * 1024) - 1), %l4 + add %l3, %l4, %l3 + andn %l3, %l4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: roundup(ksize, 4MB) sethi %hi(KERNBASE), %l3 stx %l3, [%sp + 2047 + 128 + 0x38] ! arg5: vaddr (KERNBASE) stx %g0, [%sp + 2047 + 128 + 0x40] ! arg6: empty diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index cc454731d87..5a1126b363a 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -284,14 +284,17 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) { extern unsigned long sparc64_ttable_tl0; extern unsigned long kern_locked_tte_data; - extern int bigkernel; struct hvtramp_descr *hdesc; unsigned long trampoline_ra; struct trap_per_cpu *tb; u64 tte_vaddr, tte_data; unsigned long hv_err; + int i; - hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL); + hdesc = kzalloc(sizeof(*hdesc) + + (sizeof(struct hvtramp_mapping) * + num_kernel_image_mappings - 1), + GFP_KERNEL); if (!hdesc) { printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " "hvtramp_descr.\n"); @@ -299,7 +302,7 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) } hdesc->cpu = cpu; - hdesc->num_mappings = (bigkernel ? 2 : 1); + hdesc->num_mappings = num_kernel_image_mappings; tb = &trap_block[cpu]; tb->hdesc = hdesc; @@ -312,13 +315,11 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) tte_vaddr = (unsigned long) KERNBASE; tte_data = kern_locked_tte_data; - hdesc->maps[0].vaddr = tte_vaddr; - hdesc->maps[0].tte = tte_data; - if (bigkernel) { + for (i = 0; i < hdesc->num_mappings; i++) { + hdesc->maps[i].vaddr = tte_vaddr; + hdesc->maps[i].tte = tte_data; tte_vaddr += 0x400000; tte_data += 0x400000; - hdesc->maps[1].vaddr = tte_vaddr; - hdesc->maps[1].tte = tte_data; } trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 4ae2e525d68..56ff5521134 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -105,7 +105,7 @@ startup_continue: wr %g2, 0, %tick_cmpr /* Call OBP by hand to lock KERNBASE into i/d tlbs. - * We lock 2 consequetive entries if we are 'bigkernel'. + * We lock 'num_kernel_image_mappings' consequetive entries. */ sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 @@ -119,6 +119,29 @@ startup_continue: add %l2, -(192 + 128), %sp flushw + /* Setup the loop variables: + * %l3: VADDR base + * %l4: TTE base + * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings' + * %l6: Number of TTE entries to map + * %l7: Highest TTE entry number, we count down + */ + sethi %hi(KERNBASE), %l3 + sethi %hi(kern_locked_tte_data), %l4 + ldx [%l4 + %lo(kern_locked_tte_data)], %l4 + clr %l5 + sethi %hi(num_kernel_image_mappings), %l6 + lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 + add %l6, 1, %l6 + + mov 15, %l7 + BRANCH_IF_ANY_CHEETAH(g1,g5,2f) + + mov 63, %l7 +2: + +3: + /* Lock into I-MMU */ sethi %hi(call_method), %g2 or %g2, %lo(call_method), %g2 stx %g2, [%sp + 2047 + 128 + 0x00] @@ -132,63 +155,26 @@ startup_continue: sethi %hi(prom_mmu_ihandle_cache), %g2 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 stx %g2, [%sp + 2047 + 128 + 0x20] - sethi %hi(KERNBASE), %g2 - stx %g2, [%sp + 2047 + 128 + 0x28] - sethi %hi(kern_locked_tte_data), %g2 - ldx [%g2 + %lo(kern_locked_tte_data)], %g2 - stx %g2, [%sp + 2047 + 128 + 0x30] - - mov 15, %g2 - BRANCH_IF_ANY_CHEETAH(g1,g5,1f) - mov 63, %g2 -1: - stx %g2, [%sp + 2047 + 128 + 0x38] - sethi %hi(p1275buf), %g2 - or %g2, %lo(p1275buf), %g2 - ldx [%g2 + 0x08], %o1 - call %o1 - add %sp, (2047 + 128), %o0 + /* Each TTE maps 4MB, convert index to offset. */ + sllx %l5, 22, %g1 - sethi %hi(bigkernel), %g2 - lduw [%g2 + %lo(bigkernel)], %g2 - brz,pt %g2, do_dtlb - nop + add %l3, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR + add %l4, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE - sethi %hi(call_method), %g2 - or %g2, %lo(call_method), %g2 - stx %g2, [%sp + 2047 + 128 + 0x00] - mov 5, %g2 - stx %g2, [%sp + 2047 + 128 + 0x08] - mov 1, %g2 - stx %g2, [%sp + 2047 + 128 + 0x10] - sethi %hi(itlb_load), %g2 - or %g2, %lo(itlb_load), %g2 - stx %g2, [%sp + 2047 + 128 + 0x18] - sethi %hi(prom_mmu_ihandle_cache), %g2 - lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 - stx %g2, [%sp + 2047 + 128 + 0x20] - sethi %hi(KERNBASE + 0x400000), %g2 - stx %g2, [%sp + 2047 + 128 + 0x28] - sethi %hi(kern_locked_tte_data), %g2 - ldx [%g2 + %lo(kern_locked_tte_data)], %g2 - sethi %hi(0x400000), %g1 - add %g2, %g1, %g2 - stx %g2, [%sp + 2047 + 128 + 0x30] - - mov 14, %g2 - BRANCH_IF_ANY_CHEETAH(g1,g5,1f) - - mov 62, %g2 -1: + /* TTE index is highest minus loop index. */ + sub %l7, %l5, %g2 stx %g2, [%sp + 2047 + 128 + 0x38] + sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 ldx [%g2 + 0x08], %o1 call %o1 add %sp, (2047 + 128), %o0 -do_dtlb: + /* Lock into D-MMU */ sethi %hi(call_method), %g2 or %g2, %lo(call_method), %g2 stx %g2, [%sp + 2047 + 128 + 0x00] @@ -202,65 +188,30 @@ do_dtlb: sethi %hi(prom_mmu_ihandle_cache), %g2 lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 stx %g2, [%sp + 2047 + 128 + 0x20] - sethi %hi(KERNBASE), %g2 - stx %g2, [%sp + 2047 + 128 + 0x28] - sethi %hi(kern_locked_tte_data), %g2 - ldx [%g2 + %lo(kern_locked_tte_data)], %g2 - stx %g2, [%sp + 2047 + 128 + 0x30] - mov 15, %g2 - BRANCH_IF_ANY_CHEETAH(g1,g5,1f) + /* Each TTE maps 4MB, convert index to offset. */ + sllx %l5, 22, %g1 - mov 63, %g2 -1: + add %l3, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR + add %l4, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE + /* TTE index is highest minus loop index. */ + sub %l7, %l5, %g2 stx %g2, [%sp + 2047 + 128 + 0x38] + sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 ldx [%g2 + 0x08], %o1 call %o1 add %sp, (2047 + 128), %o0 - sethi %hi(bigkernel), %g2 - lduw [%g2 + %lo(bigkernel)], %g2 - brz,pt %g2, do_unlock + add %l5, 1, %l5 + cmp %l5, %l6 + bne,pt %xcc, 3b nop - sethi %hi(call_method), %g2 - or %g2, %lo(call_method), %g2 - stx %g2, [%sp + 2047 + 128 + 0x00] - mov 5, %g2 - stx %g2, [%sp + 2047 + 128 + 0x08] - mov 1, %g2 - stx %g2, [%sp + 2047 + 128 + 0x10] - sethi %hi(dtlb_load), %g2 - or %g2, %lo(dtlb_load), %g2 - stx %g2, [%sp + 2047 + 128 + 0x18] - sethi %hi(prom_mmu_ihandle_cache), %g2 - lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 - stx %g2, [%sp + 2047 + 128 + 0x20] - sethi %hi(KERNBASE + 0x400000), %g2 - stx %g2, [%sp + 2047 + 128 + 0x28] - sethi %hi(kern_locked_tte_data), %g2 - ldx [%g2 + %lo(kern_locked_tte_data)], %g2 - sethi %hi(0x400000), %g1 - add %g2, %g1, %g2 - stx %g2, [%sp + 2047 + 128 + 0x30] - - mov 14, %g2 - BRANCH_IF_ANY_CHEETAH(g1,g5,1f) - - mov 62, %g2 -1: - - stx %g2, [%sp + 2047 + 128 + 0x38] - sethi %hi(p1275buf), %g2 - or %g2, %lo(p1275buf), %g2 - ldx [%g2 + 0x08], %o1 - call %o1 - add %sp, (2047 + 128), %o0 - -do_unlock: sethi %hi(prom_entry_lock), %g2 stb %g0, [%g2 + %lo(prom_entry_lock)] membar #StoreStore | #StoreLoad @@ -269,47 +220,36 @@ do_unlock: nop niagara_lock_tlb: + sethi %hi(KERNBASE), %l3 + sethi %hi(kern_locked_tte_data), %l4 + ldx [%l4 + %lo(kern_locked_tte_data)], %l4 + clr %l5 + sethi %hi(num_kernel_image_mappings), %l6 + lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 + add %l6, 1, %l6 + +1: mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 - sethi %hi(KERNBASE), %o0 + sllx %l5, 22, %g2 + add %l3, %g2, %o0 clr %o1 - sethi %hi(kern_locked_tte_data), %o2 - ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + add %l4, %g2, %o2 mov HV_MMU_IMMU, %o3 ta HV_FAST_TRAP mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 - sethi %hi(KERNBASE), %o0 + sllx %l5, 22, %g2 + add %l3, %g2, %o0 clr %o1 - sethi %hi(kern_locked_tte_data), %o2 - ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + add %l4, %g2, %o2 mov HV_MMU_DMMU, %o3 ta HV_FAST_TRAP - sethi %hi(bigkernel), %g2 - lduw [%g2 + %lo(bigkernel)], %g2 - brz,pt %g2, after_lock_tlb + add %l5, 1, %l5 + cmp %l5, %l6 + bne,pt %xcc, 1b nop - mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 - sethi %hi(KERNBASE + 0x400000), %o0 - clr %o1 - sethi %hi(kern_locked_tte_data), %o2 - ldx [%o2 + %lo(kern_locked_tte_data)], %o2 - sethi %hi(0x400000), %o3 - add %o2, %o3, %o2 - mov HV_MMU_IMMU, %o3 - ta HV_FAST_TRAP - - mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 - sethi %hi(KERNBASE + 0x400000), %o0 - clr %o1 - sethi %hi(kern_locked_tte_data), %o2 - ldx [%o2 + %lo(kern_locked_tte_data)], %o2 - sethi %hi(0x400000), %o3 - add %o2, %o3, %o2 - mov HV_MMU_DMMU, %o3 - ta HV_FAST_TRAP - after_lock_tlb: wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate wr %g0, 0, %fprs diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index b5c30416fda..466fd6cffac 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -166,7 +166,7 @@ unsigned long sparc64_kern_pri_context __read_mostly; unsigned long sparc64_kern_pri_nuc_bits __read_mostly; unsigned long sparc64_kern_sec_context __read_mostly; -int bigkernel = 0; +int num_kernel_image_mappings; #ifdef CONFIG_DEBUG_DCFLUSH atomic_t dcpage_flushes = ATOMIC_INIT(0); @@ -572,7 +572,7 @@ static unsigned long kern_large_tte(unsigned long paddr); static void __init remap_kernel(void) { unsigned long phys_page, tte_vaddr, tte_data; - int tlb_ent = sparc64_highest_locked_tlbent(); + int i, tlb_ent = sparc64_highest_locked_tlbent(); tte_vaddr = (unsigned long) KERNBASE; phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; @@ -582,27 +582,20 @@ static void __init remap_kernel(void) /* Now lock us into the TLBs via Hypervisor or OBP. */ if (tlb_type == hypervisor) { - hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); - hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); - if (bigkernel) { - tte_vaddr += 0x400000; - tte_data += 0x400000; + for (i = 0; i < num_kernel_image_mappings; i++) { hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); + tte_vaddr += 0x400000; + tte_data += 0x400000; } } else { - prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); - prom_itlb_load(tlb_ent, tte_data, tte_vaddr); - if (bigkernel) { - tlb_ent -= 1; - prom_dtlb_load(tlb_ent, - tte_data + 0x400000, - tte_vaddr + 0x400000); - prom_itlb_load(tlb_ent, - tte_data + 0x400000, - tte_vaddr + 0x400000); + for (i = 0; i < num_kernel_image_mappings; i++) { + prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr); + prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr); + tte_vaddr += 0x400000; + tte_data += 0x400000; } - sparc64_highest_unlocked_tlb_ent = tlb_ent - 1; + sparc64_highest_unlocked_tlb_ent = tlb_ent - i; } if (tlb_type == cheetah_plus) { sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | @@ -1352,12 +1345,9 @@ void __init paging_init(void) shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); real_end = (unsigned long)_end; - if ((real_end > ((unsigned long)KERNBASE + 0x400000))) - bigkernel = 1; - if ((real_end > ((unsigned long)KERNBASE + 0x800000))) { - prom_printf("paging_init: Kernel > 8MB, too large.\n"); - prom_halt(); - } + num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << 22); + printk("Kernel: Using %d locked TLB entries for main kernel image.\n", + num_kernel_image_mappings); /* Set kernel pgd to upper alias so physical page computations * work. diff --git a/include/asm-sparc64/hvtramp.h b/include/asm-sparc64/hvtramp.h index c7dd6ad056d..b2b9b947b3a 100644 --- a/include/asm-sparc64/hvtramp.h +++ b/include/asm-sparc64/hvtramp.h @@ -16,7 +16,7 @@ struct hvtramp_descr { __u64 fault_info_va; __u64 fault_info_pa; __u64 thread_reg; - struct hvtramp_mapping maps[2]; + struct hvtramp_mapping maps[1]; }; extern void hv_cpu_startup(unsigned long hvdescr_pa); diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 63b7040e813..985ea7e3199 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h @@ -63,6 +63,8 @@ extern void cheetah_enable_pcache(void); SPITFIRE_HIGHEST_LOCKED_TLBENT : \ CHEETAH_HIGHEST_LOCKED_TLBENT) +extern int num_kernel_image_mappings; + /* The data cache is write through, so this just invalidates the * specified line. */ -- cgit v1.2.3 From 69d1506731168d6845a76a303b2c45f7c05f3f2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 22 Mar 2008 15:47:05 -0700 Subject: [TCP]: Let skbs grow over a page on fast peers While testing the virtio-net driver on KVM with TSO I noticed that TSO performance with a 1500 MTU is significantly worse compared to the performance of non-TSO with a 16436 MTU. The packet dump shows that most of the packets sent are smaller than a page. Looking at the code this actually is quite obvious as it always stop extending the packet if it's the first packet yet to be sent and if it's larger than the MSS. Since each extension is bound by the page size, this means that (given a 1500 MTU) we're very unlikely to construct packets greater than a page, provided that the receiver and the path is fast enough so that packets can always be sent immediately. The fix is also quite obvious. The push calls inside the loop is just an optimisation so that we don't end up doing all the sending at the end of the loop. Therefore there is no specific reason why it has to do so at MSS boundaries. For TSO, the most natural extension of this optimisation is to do the pushing once the skb exceeds the TSO size goal. This is what the patch does and testing with KVM shows that the TSO performance with a 1500 MTU easily surpasses that of a 16436 MTU and indeed the packet sizes sent are generally larger than 16436. I don't see any obvious downsides for slower peers or connections, but it would be prudent to test this extensively to ensure that those cases don't regress. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 071e83a894a..39b629ac240 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -735,7 +735,7 @@ new_segment: if (!(psize -= copy)) goto out; - if (skb->len < mss_now || (flags & MSG_OOB)) + if (skb->len < size_goal || (flags & MSG_OOB)) continue; if (forced_push(tp)) { @@ -981,7 +981,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < mss_now || (flags & MSG_OOB)) + if (skb->len < size_goal || (flags & MSG_OOB)) continue; if (forced_push(tp)) { -- cgit v1.2.3 From 6440cc9e0f48ade57af7be28008cbfa6a991f287 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 22 Mar 2008 17:59:58 -0700 Subject: [IPV4] fib_trie: fix warning from rcu_assign_poinger This gets rid of a warning caused by the test in rcu_assign_pointer. I tried to fix rcu_assign_pointer, but that devolved into a long set of discussions about doing it right that came to no real solution. Since the test in rcu_assign_pointer for constant NULL would never succeed in fib_trie, just open code instead. Signed-off-by: Stephen Hemminger Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1ff446d0fa8..f6cdc012eec 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -177,10 +177,13 @@ static inline struct tnode *node_parent_rcu(struct node *node) return rcu_dereference(ret); } +/* Same as rcu_assign_pointer + * but that macro() assumes that value is a pointer. + */ static inline void node_set_parent(struct node *node, struct tnode *ptr) { - rcu_assign_pointer(node->parent, - (unsigned long)ptr | NODE_TYPE(node)); + smp_wmb(); + node->parent = (unsigned long)ptr | NODE_TYPE(node); } static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) -- cgit v1.2.3 From 421f099bc555c5f1516fdf5060de1d6bb5f51002 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 Mar 2008 18:04:16 -0700 Subject: [IPV6] net/ipv6/ndisc.c: remove unused variable The variable hlen is initialized but never used otherwise. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ type T; identifier i; constant C; @@ ( extern T i; | - T i; <+... when != i - i = C; ...+> ) // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0d33a7d3212..51557c27a0c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1420,7 +1420,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, u8 *opt; int rd_len; int err; - int hlen; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; dev = skb->dev; @@ -1491,7 +1490,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - hlen = 0; skb_reserve(buff, LL_RESERVED_SPACE(dev)); ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, -- cgit v1.2.3 From 53a6201fdfa04accc91ea1a7accce8e8bc37ef8e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 Mar 2008 18:05:33 -0700 Subject: [9P] net/9p/trans_fd.c: remove unused variable The variable cb is initialized but never used otherwise. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ type T; identifier i; constant C; @@ ( extern T i; | - T i; <+... when != i - i = C; ...+> ) // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/9p/trans_fd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 1aa9d517539..4e8d4e724b9 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -861,7 +861,6 @@ static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) static void p9_mux_flush_cb(struct p9_req *freq, void *a) { - p9_conn_req_callback cb; int tag; struct p9_conn *m; struct p9_req *req, *rreq, *rptr; @@ -872,7 +871,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a) freq->tcall->params.tflush.oldtag); spin_lock(&m->lock); - cb = NULL; tag = freq->tcall->params.tflush.oldtag; req = NULL; list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { -- cgit v1.2.3 From 2572c149a2f52232ce690ddb9c6fd0c90ffd61cd Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 23 Mar 2008 03:07:45 -0700 Subject: BNX2X: prevent ethtool from setting port type On 10GBaseT boards setting the type to TP will cause the driver to try to configure 1GBaseT. Since there are currently no boards that support setting of the port type, disable this for now. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- drivers/net/bnx2x.c | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/net/bnx2x.c b/drivers/net/bnx2x.c index 8af142ccf37..de32b3fba32 100644 --- a/drivers/net/bnx2x.c +++ b/drivers/net/bnx2x.c @@ -63,8 +63,8 @@ #include "bnx2x.h" #include "bnx2x_init.h" -#define DRV_MODULE_VERSION "1.40.22" -#define DRV_MODULE_RELDATE "2007/11/27" +#define DRV_MODULE_VERSION "1.42.3" +#define DRV_MODULE_RELDATE "2008/3/9" #define BNX2X_BC_VER 0x040200 /* Time in jiffies before concluding the transmitter is hung. */ @@ -8008,38 +8008,6 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex, cmd->port, cmd->phy_address, cmd->transceiver, cmd->autoneg, cmd->maxtxpkt, cmd->maxrxpkt); - switch (cmd->port) { - case PORT_TP: - if (!(bp->supported & SUPPORTED_TP)) { - DP(NETIF_MSG_LINK, "TP not supported\n"); - return -EINVAL; - } - - if (bp->phy_flags & PHY_XGXS_FLAG) { - bnx2x_link_reset(bp); - bnx2x_link_settings_supported(bp, SWITCH_CFG_1G); - bnx2x_phy_deassert(bp); - } - break; - - case PORT_FIBRE: - if (!(bp->supported & SUPPORTED_FIBRE)) { - DP(NETIF_MSG_LINK, "FIBRE not supported\n"); - return -EINVAL; - } - - if (!(bp->phy_flags & PHY_XGXS_FLAG)) { - bnx2x_link_reset(bp); - bnx2x_link_settings_supported(bp, SWITCH_CFG_10G); - bnx2x_phy_deassert(bp); - } - break; - - default: - DP(NETIF_MSG_LINK, "Unknown port type\n"); - return -EINVAL; - } - if (cmd->autoneg == AUTONEG_ENABLE) { if (!(bp->supported & SUPPORTED_Autoneg)) { DP(NETIF_MSG_LINK, "Aotoneg not supported\n"); -- cgit v1.2.3 From da990a2402aeaee84837f29054c4628eb02f7493 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 23 Mar 2008 03:35:12 -0700 Subject: [SUNGEM]: Fix NAPI assertion failure. As reported by Johannes Berg: I started getting this warning with recent kernels: [ 773.908927] ------------[ cut here ]------------ [ 773.908954] Badness at net/core/dev.c:2204 ... If we loop more than once in gem_poll(), we'll use more than the real budget in our gem_rx() calls, thus eventually trigger the caller's assertions in net_rx_action(). Subtract "work_done" from "budget" for the second arg to gem_rx() to fix the bug. Signed-off-by: David S. Miller --- drivers/net/sungem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 97212799c51..4291458955e 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -912,7 +912,7 @@ static int gem_poll(struct napi_struct *napi, int budget) * rx ring - must call napi_disable(), which * schedule_timeout()'s if polling is already disabled. */ - work_done += gem_rx(gp, budget); + work_done += gem_rx(gp, budget - work_done); if (work_done >= budget) return work_done; -- cgit v1.2.3 From dbee0d3f4603b9d0e56234a0743321fe4dad31ca Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Sun, 23 Mar 2008 21:45:36 -0700 Subject: [ATM]: When proc_create() fails, do some error handling work and return -ENOMEM. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- net/atm/clip.c | 19 ++++++++++++++++--- net/atm/lec.c | 4 ++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/net/atm/clip.c b/net/atm/clip.c index d30167c0b48..2ab1e36098f 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -947,6 +947,8 @@ static const struct file_operations arp_seq_fops = { }; #endif +static void atm_clip_exit_noproc(void); + static int __init atm_clip_init(void) { neigh_table_init_no_netlink(&clip_tbl); @@ -963,18 +965,22 @@ static int __init atm_clip_init(void) struct proc_dir_entry *p; p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops); + if (!p) { + printk(KERN_ERR "Unable to initialize " + "/proc/net/atm/arp\n"); + atm_clip_exit_noproc(); + return -ENOMEM; + } } #endif return 0; } -static void __exit atm_clip_exit(void) +static void atm_clip_exit_noproc(void) { struct net_device *dev, *next; - remove_proc_entry("arp", atm_proc_root); - unregister_inetaddr_notifier(&clip_inet_notifier); unregister_netdevice_notifier(&clip_dev_notifier); @@ -1005,6 +1011,13 @@ static void __exit atm_clip_exit(void) clip_tbl_hook = NULL; } +static void __exit atm_clip_exit(void) +{ + remove_proc_entry("arp", atm_proc_root); + + atm_clip_exit_noproc(); +} + module_init(atm_clip_init); module_exit(atm_clip_exit); MODULE_AUTHOR("Werner Almesberger"); diff --git a/net/atm/lec.c b/net/atm/lec.c index 0e450d12f03..a2efa7ff41f 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1250,6 +1250,10 @@ static int __init lane_module_init(void) struct proc_dir_entry *p; p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops); + if (!p) { + printk(KERN_ERR "Unable to initialize /proc/net/atm/lec\n"); + return -ENOMEM; + } #endif register_atm_ioctl(&lane_ioctl_ops); -- cgit v1.2.3 From 4b1b366721101f2f0d2350fbdccb679f7909cf57 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Sun, 23 Mar 2008 21:51:12 -0700 Subject: connector: convert to single-threaded workqueue From: Evgeniy Polyakov We don't need one cqueue thread for each CPU. cqueue is used for receiving userspace datagrams, which are very rare and thus will happily live with a single queue. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/connector/cn_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c index 5732ca3259f..b6fe7e7a2c2 100644 --- a/drivers/connector/cn_queue.c +++ b/drivers/connector/cn_queue.c @@ -146,7 +146,7 @@ struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls) dev->nls = nls; - dev->cn_queue = create_workqueue(dev->name); + dev->cn_queue = create_singlethread_workqueue(dev->name); if (!dev->cn_queue) { kfree(dev); return NULL; -- cgit v1.2.3 From 8f3ea33a5078a09eba12bfe57424507809367756 Mon Sep 17 00:00:00 2001 From: Martin Devera Date: Sun, 23 Mar 2008 22:00:38 -0700 Subject: sch_htb: fix "too many events" situation HTB is event driven algorithm and part of its work is to apply scheduled events at proper times. It tried to defend itself from livelock by processing only limited number of events per dequeue. Because of faster computers some users already hit this hardcoded limit. This patch limits processing up to 2 jiffies (why not 1 jiffie ? because it might stop prematurely when only fraction of jiffie remains). Signed-off-by: Martin Devera Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 795c761ad99..66148cc4759 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -711,9 +711,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, */ static psched_time_t htb_do_events(struct htb_sched *q, int level) { - int i; - - for (i = 0; i < 500; i++) { + /* don't run for longer than 2 jiffies; 2 is used instead of + 1 to simplify things when jiffy is going to be incremented + too soon */ + unsigned long stop_at = jiffies + 2; + while (time_before(jiffies, stop_at)) { struct htb_class *cl; long diff; struct rb_node *p = rb_first(&q->wait_pq[level]); @@ -731,9 +733,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level) if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - if (net_ratelimit()) - printk(KERN_WARNING "htb: too many events !\n"); - return q->now + PSCHED_TICKS_PER_SEC / 10; + /* too much load - let's continue on next jiffie */ + return q->now + PSCHED_TICKS_PER_SEC / HZ; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL -- cgit v1.2.3 From 1f17131bb46065141069dee9fbcc4bdd0e9c2a2e Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 23 Mar 2008 22:48:29 -0700 Subject: [SPARC64]: Use shorter list_splice_init() for brevity. Signed-off-by: Robert P. J. Day Signed-off-by: David S. Miller --- arch/sparc64/kernel/ds.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index bd76482077b..edb74f5a118 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -972,8 +972,7 @@ static void process_ds_work(void) LIST_HEAD(todo); spin_lock_irqsave(&ds_lock, flags); - list_splice(&ds_work_list, &todo); - INIT_LIST_HEAD(&ds_work_list); + list_splice_init(&ds_work_list, &todo); spin_unlock_irqrestore(&ds_lock, flags); list_for_each_entry_safe(qp, tmp, &todo, list) { -- cgit v1.2.3 From 6d008153234c4cccae7bb0170defeea18258db4a Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 23 Mar 2008 22:50:16 -0700 Subject: [SPARC64]: exec PT_DTRACE The PT_DTRACE flag is meaningless and obsolete. Don't touch it. Signed-off-by: Roland McGrath Signed-off-by: David S. Miller --- arch/sparc64/kernel/process.c | 3 --- arch/sparc64/kernel/sys_sparc32.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index e116e38b160..acf8c5250aa 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -731,9 +731,6 @@ asmlinkage int sparc_execve(struct pt_regs *regs) current_thread_info()->xfsr[0] = 0; current_thread_info()->fpsaved[0] = 0; regs->tstate &= ~TSTATE_PEF; - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); } out: return error; diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index deaba2bd053..2455fa49887 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -678,9 +678,6 @@ asmlinkage long sparc32_execve(struct pt_regs *regs) current_thread_info()->xfsr[0] = 0; current_thread_info()->fpsaved[0] = 0; regs->tstate &= ~TSTATE_PEF; - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); } out: return error; -- cgit v1.2.3 From cfe666b145cecffe784d98e60ffe201a5dc57ac3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 24 Mar 2008 17:41:22 +1100 Subject: [POWERPC] Don't use 64k pages for ioremap on pSeries On pSeries, the hypervisor doesn't let us map in the eHEA ethernet adapter using 64k pages, and thus the ehea driver will fail if 64k pages are configured. This works around the problem by always using 4k pages for ioremap on pSeries (but not on other platforms). A better fix would be to check whether the partition could ever have an eHEA adapter, and only force 4k pages if it could, but this will do for 2.6.25. This is based on an earlier patch by Tony Breeds. Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 590f1f67c87..a83dfa3cf40 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -351,9 +351,14 @@ static void __init htab_init_page_sizes(void) mmu_vmalloc_psize = MMU_PAGE_64K; if (mmu_linear_psize == MMU_PAGE_4K) mmu_linear_psize = MMU_PAGE_64K; - if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) - mmu_io_psize = MMU_PAGE_64K; - else + if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) { + /* + * Don't use 64k pages for ioremap on pSeries, since + * that would stop us accessing the HEA ethernet. + */ + if (!machine_is(pseries)) + mmu_io_psize = MMU_PAGE_64K; + } else mmu_ci_restrictions = 1; } #endif /* CONFIG_PPC_64K_PAGES */ -- cgit v1.2.3 From 1428a9fa586cb80acf98289f797f58b8bd662598 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Tue, 18 Mar 2008 06:53:05 +1100 Subject: [POWERPC] Fix crash in init_ipic_sysfs on efika The global primary_ipic in arch/powerpc/sysdev/ipic.c can remain NULL if ipic_init() fails, which will happen on machines that don't have an ipic interrupt controller. init_ipic_sysfs() will crash in that case. Acked-by: Grant Likely Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/ipic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index ae0dbf4c1d6..0f2dfb0aaa6 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -906,7 +906,7 @@ static int __init init_ipic_sysfs(void) { int rc; - if (!primary_ipic->regs) + if (!primary_ipic || !primary_ipic->regs) return -ENODEV; printk(KERN_DEBUG "Registering ipic with sysfs...\n"); -- cgit v1.2.3 From b8c19eb16a7e6df57d0f6d67e42ce026e5d5930b Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 22 Mar 2008 14:20:29 +1100 Subject: [POWERPC] mpc5200-fec: Fix possible NULL dereference in mdio driver If the reg property is missing from the phy node (unlikely, but possible), then the kernel will oops with a NULL pointer dereference. This fixes it by checking the pointer first. Signed-off-by: Grant Likely Signed-off-by: Paul Mackerras --- drivers/net/fec_mpc52xx_phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/fec_mpc52xx_phy.c b/drivers/net/fec_mpc52xx_phy.c index 1837584c450..6a3ac4ea97e 100644 --- a/drivers/net/fec_mpc52xx_phy.c +++ b/drivers/net/fec_mpc52xx_phy.c @@ -109,7 +109,8 @@ static int mpc52xx_fec_mdio_probe(struct of_device *of, const struct of_device_i int irq = irq_of_parse_and_map(child, 0); if (irq != NO_IRQ) { const u32 *id = of_get_property(child, "reg", NULL); - bus->irq[*id] = irq; + if (id) + bus->irq[*id] = irq; } } -- cgit v1.2.3 From 9560aea4e9d17cb75113c6051e800222fd5c71a4 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 22 Mar 2008 14:41:05 +1100 Subject: [POWERPC] mpc5200: Fix null dereference if bestcomm fails to initialize If the bestcomm initialization fails, calls to the task allocate function should fail gracefully instead of oopsing with a NULL deref. Signed-off-by: Grant Likely Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/bestcomm/bestcomm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/sysdev/bestcomm/bestcomm.c b/arch/powerpc/sysdev/bestcomm/bestcomm.c index f589999361e..b18cab55a76 100644 --- a/arch/powerpc/sysdev/bestcomm/bestcomm.c +++ b/arch/powerpc/sysdev/bestcomm/bestcomm.c @@ -52,6 +52,10 @@ bcom_task_alloc(int bd_count, int bd_size, int priv_size) int i, tasknum = -1; struct bcom_task *tsk; + /* Don't try to do anything if bestcomm init failed */ + if (!bcom_eng) + return NULL; + /* Get and reserve a task num */ spin_lock(&bcom_eng->lock); -- cgit v1.2.3 From 7ea6fd7e2df041297298b5feb5b7b78a2b1a5310 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Sat, 22 Mar 2008 21:49:05 +1100 Subject: [POWERPC] Fix Oops with TQM5200 on TQM5200 The "bestcomm-core" driver defines its of_match table as follows static struct of_device_id mpc52xx_bcom_of_match[] = { { .type = "dma-controller", .compatible = "fsl,mpc5200-bestcomm", }, { .type = "dma-controller", .compatible = "mpc5200-bestcomm", }, {}, }; so while registering the driver, the driver's probe function won't be called, because the device tree node doesn't have a device_type property. Thus the driver's bcom_engine structure won't be allocated. Referencing this structure later causes observed Oops. Checking bcom_eng pointer for NULL before referencing data pointed by it prevents oopsing, but fec driver still doesn't work (because of the lost bestcomm match and resulted task allocation failure). Actually the compatible property exists and should match and so the fec driver should work. This removes .type = "dma-controller" from the bestcomm driver's mpc52xx_bcom_of_match table to solve the problem. Signed-off-by: Anatolij Gustschin Acked-by: Grant Likely Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/bestcomm/bestcomm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/bestcomm/bestcomm.c b/arch/powerpc/sysdev/bestcomm/bestcomm.c index b18cab55a76..64ec7d62936 100644 --- a/arch/powerpc/sysdev/bestcomm/bestcomm.c +++ b/arch/powerpc/sysdev/bestcomm/bestcomm.c @@ -488,8 +488,8 @@ mpc52xx_bcom_remove(struct of_device *op) } static struct of_device_id mpc52xx_bcom_of_match[] = { - { .type = "dma-controller", .compatible = "fsl,mpc5200-bestcomm", }, - { .type = "dma-controller", .compatible = "mpc5200-bestcomm", }, + { .compatible = "fsl,mpc5200-bestcomm", }, + { .compatible = "mpc5200-bestcomm", }, {}, }; -- cgit v1.2.3 From 92896bd9fd75b1c993b92874d339a8088bb75560 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Mar 2008 11:07:15 -0700 Subject: Don't 'printk()' while holding xtime lock for writing The printk() can deadlock because it can wake up klogd(), and task enqueueing will try to read the time in order to set a hrtimer. Reported-by: Marcin Slusarz Debugged-by: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 671af612b76..a3fa587c350 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -191,8 +191,12 @@ static void change_clocksource(void) tick_clock_notify(); + /* + * We're holding xtime lock and waking up klogd would deadlock + * us on enqueue. So no printing! printk(KERN_INFO "Time: %s clocksource has been installed.\n", clock->name); + */ } #else static inline void change_clocksource(void) { } -- cgit v1.2.3 From b9e76a00749521f2b080fa8a4fb15f66538ab756 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Mar 2008 11:22:39 -0700 Subject: x86-32: Pass the full resource data to ioremap() It appears that 64-bit PCI resources cannot possibly ever have worked on x86-32 even when the RESOURCES_64BIT config option was set, because any driver that tried to [pci_]ioremap() the resource would have been unable to do so because the high 32 bits would have been silently dropped on the floor by the ioremap() routines that only used "unsigned long". Change them to use "resource_size_t" instead, which properly encodes the whole 64-bit resource data if RESOURCES_64BIT is enabled. Acked-by: H. Peter Anvin Acked-by: Stefan Richter Cc: Ivan Kokshaysky Signed-off-by: Linus Torvalds --- arch/x86/mm/ioremap.c | 6 +++--- include/asm-x86/io_32.h | 6 +++--- include/asm-x86/io_64.h | 6 +++--- lib/iomap.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8fe576baa14..4afaba0ed72 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -106,7 +106,7 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, +static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, enum ioremap_mode mode) { unsigned long pfn, offset, last_addr, vaddr; @@ -193,13 +193,13 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, * * Must be freed with iounmap. */ -void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { return __ioremap(phys_addr, size, IOR_MODE_UNCACHED); } EXPORT_SYMBOL(ioremap_nocache); -void __iomem *ioremap_cache(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { return __ioremap(phys_addr, size, IOR_MODE_CACHED); } diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 58d2c45cd0b..d4d8fbd9378 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -114,13 +114,13 @@ static inline void * phys_to_virt(unsigned long address) * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ -extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size); -extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); /* * The default ioremap() behavior is non-cached: */ -static inline void __iomem *ioremap(unsigned long offset, unsigned long size) +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { return ioremap_nocache(offset, size); } diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index f64a59cc396..db0be2011a3 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -158,13 +158,13 @@ extern void early_iounmap(void *addr, unsigned long size); * it's useful if some control registers are in such an area and write combining * or read caching is not desirable: */ -extern void __iomem *ioremap_nocache(unsigned long offset, unsigned long size); -extern void __iomem *ioremap_cache(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); /* * The default ioremap() behavior is non-cached: */ -static inline void __iomem *ioremap(unsigned long offset, unsigned long size) +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { return ioremap_nocache(offset, size); } diff --git a/lib/iomap.c b/lib/iomap.c index db004a9ff50..dd6ca48fe6b 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -256,7 +256,7 @@ EXPORT_SYMBOL(ioport_unmap); * */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); unsigned long len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); -- cgit v1.2.3 From d3073779f8362d64b804882f5f41c208c4a5e11e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 24 Mar 2008 12:03:03 -0400 Subject: SVCRDMA: Use only 1 RDMA read scatter entry for iWARP adapters The iWARP protocol limits RDMA read requests to a single scatter entry. NFS/RDMA has code in rdma_read_max_sge() that is supposed to limit the sge_count for RDMA read requests to 1, but the code to do that is inside an #ifdef RDMA_TRANSPORT_IWARP block. In the mainline kernel at least, RDMA_TRANSPORT_IWARP is an enum and not a preprocessor #define, so the #ifdef'ed code is never compiled. In my test of a kernel build with -j8 on an NFS/RDMA mount, this problem eventually leads to trouble starting with: svcrdma: Error posting send = -22 svcrdma : RDMA_READ error = -22 and things go downhill from there. The trivial fix is to delete the #ifdef guard. The check seems to be a remnant of when the NFS/RDMA code was not merged and needed to compile against multiple kernel versions, although I don't think it ever worked as intended. In any case now that the code is upstream there's no need to test whether the RDMA_TRANSPORT_IWARP constant is defined or not. Without this patch, my kernel build on an NFS/RDMA mount using NetEffect adapters quickly and 100% reproducibly failed with an error like: ld: final link failed: Software caused connection abort With the patch applied I was able to complete a kernel build on the same setup. (Tom Tucker says this is "actually an _ancient_ remnant when it had to compile against iWARP vs. non-iWARP enabled OFA trees.") Signed-off-by: Roland Dreier Acked-by: Tom Tucker Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ab54a736486..971271602dd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -237,14 +237,12 @@ static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { -#ifdef RDMA_TRANSPORT_IWARP if ((RDMA_TRANSPORT_IWARP == rdma_node_get_transport(xprt->sc_cm_id-> device->node_type)) && sge_count > 1) return 1; else -#endif return min_t(int, sge_count, xprt->sc_max_sge); } -- cgit v1.2.3