From d1b04c081e3fb0a08ac108737e4efa9f4830c916 Mon Sep 17 00:00:00 2001 From: Baruch Even Date: Sat, 30 Jul 2005 17:41:59 -0700 Subject: [NET]: Spelling mistakes threshoulds -> thresholds Just simple spelling mistake fixes. Signed-Off-By: Baruch Even Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7833d920bdb..dc806b57842 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -362,7 +362,7 @@ out: /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls) +static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) { int vifi; @@ -727,7 +727,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (c != NULL) { write_lock_bh(&mrt_lock); c->mfc_parent = mfc->mfcc_parent; - ipmr_update_threshoulds(c, mfc->mfcc_ttls); + ipmr_update_thresholds(c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); @@ -744,7 +744,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) c->mfc_origin=mfc->mfcc_origin.s_addr; c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; c->mfc_parent=mfc->mfcc_parent; - ipmr_update_threshoulds(c, mfc->mfcc_ttls); + ipmr_update_thresholds(c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; -- cgit v1.2.3 From 1f494c0e040b001cf844280910d04ba7ebdc2898 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 30 Jul 2005 17:44:07 -0700 Subject: [NETFILTER] Inherit masq_index to slave connections masq_index is used for cleanup in case the interface address changes (such as a dialup ppp link with dynamic addreses). Without this patch, slave connections are not evicted in such a case, since they don't inherit masq_index. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 86f04e41dd8..a7f0c821a9b 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -512,6 +512,11 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, conntrack->master = exp->master; #ifdef CONFIG_IP_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; +#endif +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + /* this is ugly, but there is no other place where to put it */ + conntrack->nat.masq_index = exp->master->nat.masq_index; #endif nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); -- cgit v1.2.3 From db44575f6fd55df6ff67ddd21f7ad5be5a741136 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Sat, 30 Jul 2005 17:46:44 -0700 Subject: [NET]: fix oops after tunnel module unload Tunnel modules used to obtain module refcount each time when some tunnel was created, which meaned that tunnel could be unloaded only after all the tunnels are deleted. Since killing old MOD_*_USE_COUNT macros this protection has gone. It is possible to return it back as module_get/put, but it looks more natural and practically useful to force destruction of all the child tunnels on module unload. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 21 ++++++++++++++++++--- net/ipv4/ipip.c | 20 ++++++++++++++++++-- net/ipv6/sit.c | 21 +++++++++++++++++++-- 3 files changed, 55 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 88483552222..f0d5740d7e2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -290,7 +290,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int dev_hold(dev); ipgre_tunnel_link(nt); - /* Do not decrement MOD_USE_COUNT here. */ return nt; failed: @@ -1277,12 +1276,28 @@ err1: goto out; } -static void ipgre_fini(void) +static void __exit ipgre_destroy_tunnels(void) +{ + int prio; + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip_tunnel *t; + while ((t = tunnels[prio][h]) != NULL) + unregister_netdevice(t->dev); + } + } +} + +static void __exit ipgre_fini(void) { if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); - unregister_netdev(ipgre_fb_tunnel_dev); + rtnl_lock(); + ipgre_destroy_tunnels(); + rtnl_unlock(); } module_init(ipgre_init); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c3947cd566b..c05c1df0bb0 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -255,7 +255,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c dev_hold(dev); ipip_tunnel_link(nt); - /* Do not decrement MOD_USE_COUNT here. */ return nt; failed: @@ -920,12 +919,29 @@ static int __init ipip_init(void) goto out; } +static void __exit ipip_destroy_tunnels(void) +{ + int prio; + + for (prio = 1; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip_tunnel *t; + while ((t = tunnels[prio][h]) != NULL) + unregister_netdevice(t->dev); + } + } +} + static void __exit ipip_fini(void) { if (ipip_unregister() < 0) printk(KERN_INFO "ipip close: can't deregister tunnel\n"); - unregister_netdev(ipip_fb_tunnel_dev); + rtnl_lock(); + ipip_destroy_tunnels(); + unregister_netdevice(ipip_fb_tunnel_dev); + rtnl_unlock(); } module_init(ipip_init); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b788f55e139..e553e5b80d6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -195,7 +195,6 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int dev_hold(dev); ipip6_tunnel_link(nt); - /* Do not decrement MOD_USE_COUNT here. */ return nt; failed: @@ -794,10 +793,28 @@ static struct net_protocol sit_protocol = { .err_handler = ipip6_err, }; +static void __exit sit_destroy_tunnels(void) +{ + int prio; + + for (prio = 1; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip_tunnel *t; + while ((t = tunnels[prio][h]) != NULL) + unregister_netdevice(t->dev); + } + } +} + void __exit sit_cleanup(void) { inet_del_protocol(&sit_protocol, IPPROTO_IPV6); - unregister_netdev(ipip6_fb_tunnel_dev); + + rtnl_lock(); + sit_destroy_tunnels(); + unregister_netdevice(ipip6_fb_tunnel_dev); + rtnl_unlock(); } int __init sit_init(void) -- cgit v1.2.3 From f0098f7863f814a5adc0b9cb271605d063cad7fa Mon Sep 17 00:00:00 2001 From: Denis Lunev Date: Sat, 30 Jul 2005 17:47:25 -0700 Subject: [NET] Fix too aggressive backoff in dst garbage collection The bug is evident when it is seen once. dst gc timer was backed off, when gc queue is not empty. But this means that timer quickly backs off, if at least one destination remains in use. Normally, the bug is invisible, because adding new dst entry to queue cancels the backoff. But it shots deadly with destination cache overflow when new destinations are not released for long time f.e. after an interface goes down. The fix is to cancel backoff when something was released. Signed-off-by: Denis Lunev Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/core/dst.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index fc434ade527..334790da9f1 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -45,6 +45,7 @@ static struct timer_list dst_gc_timer = static void dst_run_gc(unsigned long dummy) { int delayed = 0; + int work_performed; struct dst_entry * dst, **dstp; if (!spin_trylock(&dst_lock)) { @@ -52,9 +53,9 @@ static void dst_run_gc(unsigned long dummy) return; } - del_timer(&dst_gc_timer); dstp = &dst_garbage_list; + work_performed = 0; while ((dst = *dstp) != NULL) { if (atomic_read(&dst->__refcnt)) { dstp = &dst->next; @@ -62,6 +63,7 @@ static void dst_run_gc(unsigned long dummy) continue; } *dstp = dst->next; + work_performed = 1; dst = dst_destroy(dst); if (dst) { @@ -86,9 +88,14 @@ static void dst_run_gc(unsigned long dummy) dst_gc_timer_inc = DST_GC_MAX; goto out; } - if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) - dst_gc_timer_expires = DST_GC_MAX; - dst_gc_timer_inc += DST_GC_INC; + if (!work_performed) { + if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) + dst_gc_timer_expires = DST_GC_MAX; + dst_gc_timer_inc += DST_GC_INC; + } else { + dst_gc_timer_inc = DST_GC_INC; + dst_gc_timer_expires = DST_GC_MIN; + } dst_gc_timer.expires = jiffies + dst_gc_timer_expires; #if RT_CACHE_DEBUG >= 2 printk("dst_total: %d/%d %ld\n", -- cgit v1.2.3 From 846998ae87a80b0fd45b4cf5cf001a159d746f27 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 4 Aug 2005 19:52:01 -0700 Subject: [PATCH] tcp: fix TSO sizing bugs MSS changes can be lost since we preemptively initialize the tso_segs count for an SKB before we %100 commit to sending it out. So, by the time we send it out, the tso_size information can be stale due to PMTU events. This mucks up all of the logic in our send engine, and can even result in the BUG() triggering in tcp_tso_should_defer(). Another problem we have is that we're storing the tp->mss_cache, not the SACK block normalized MSS, as the tso_size. That's wrong too. Signed-off-by: David S. Miller Cc: Herbert Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ipv4/tcp_output.c | 56 +++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3f8ea1bfa9..e118b4b5b32 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) sk->sk_send_head = skb; } -static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) +static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - struct tcp_sock *tp = tcp_sk(sk); - - if (skb->len <= tp->mss_cache || + if (skb->len <= mss_now || !(sk->sk_route_caps & NETIF_F_TSO)) { /* Avoid the costly divide in the normal * non-TSO case. @@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) } else { unsigned int factor; - factor = skb->len + (tp->mss_cache - 1); - factor /= tp->mss_cache; + factor = skb->len + (mss_now - 1); + factor /= mss_now; skb_shinfo(skb)->tso_segs = factor; - skb_shinfo(skb)->tso_size = tp->mss_cache; + skb_shinfo(skb)->tso_size = mss_now; } } @@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) +static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; @@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) } /* Fix up tso_factor for both original and new SKB. */ - tcp_set_skb_tso_segs(sk, skb); - tcp_set_skb_tso_segs(sk, buff); + tcp_set_skb_tso_segs(sk, skb, mss_now); + tcp_set_skb_tso_segs(sk, buff, mss_now); if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out += tcp_skb_pcount(skb); @@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) * factor and mss. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb); + tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); return 0; } @@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk /* This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) +static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs) { - tcp_set_skb_tso_segs(sk, skb); + if (!tso_segs || + (tso_segs > 1 && + skb_shinfo(skb)->tso_size != mss_now)) { + tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } return tso_segs; @@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); unsigned int cwnd_quota; - tcp_init_tso_segs(sk, skb); + tcp_init_tso_segs(sk, skb, cur_mss); if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) return 0; @@ -854,7 +854,7 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) +static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) { struct sk_buff *buff; int nlen = skb->len - len; @@ -887,8 +887,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ - tcp_set_skb_tso_segs(sk, skb); - tcp_set_skb_tso_segs(sk, buff); + tcp_set_skb_tso_segs(sk, skb, mss_now); + tcp_set_skb_tso_segs(sk, buff, mss_now); /* Link BUFF into the send queue. */ skb_header_release(buff); @@ -976,7 +976,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (unlikely(!skb)) return 0; - tso_segs = tcp_init_tso_segs(sk, skb); + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); cwnd_quota = tcp_cwnd_test(tp, skb); if (unlikely(!cwnd_quota)) goto out; @@ -1006,11 +1006,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) limit = skb->len - trim; } if (skb->len > limit) { - if (tso_fragment(sk, skb, limit)) + if (tso_fragment(sk, skb, limit, mss_now)) break; } } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now))) + if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) break; } @@ -1039,7 +1039,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) skb = sk->sk_send_head; if (!skb) break; - tso_segs = tcp_init_tso_segs(sk, skb); + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); } if (likely(sent_pkts)) { @@ -1076,7 +1076,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!skb || skb->len < mss_now); - tso_segs = tcp_init_tso_segs(sk, skb); + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); if (likely(cwnd_quota)) { @@ -1093,11 +1093,11 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) limit = skb->len - trim; } if (skb->len > limit) { - if (unlikely(tso_fragment(sk, skb, limit))) + if (unlikely(tso_fragment(sk, skb, limit, mss_now))) return; } } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now))) + if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) return; } @@ -1388,7 +1388,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int old_factor = tcp_skb_pcount(skb); int new_factor; - if (tcp_fragment(sk, skb, cur_mss)) + if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ @@ -1991,7 +1991,7 @@ int tcp_write_wakeup(struct sock *sk) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; - if (tcp_fragment(sk, skb, seg_size)) + if (tcp_fragment(sk, skb, seg_size, mss)) return -1; /* SWS override triggered forced fragmentation. * Disable TSO, the connection is too sick. */ @@ -2000,7 +2000,7 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_route_caps &= ~NETIF_F_TSO; } } else if (!tcp_skb_pcount(skb)) - tcp_set_skb_tso_segs(sk, skb); + tcp_set_skb_tso_segs(sk, skb, mss); TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; -- cgit v1.2.3 From b68e9f857271189bd7a59b74c99890de9195b0e1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Aug 2005 19:52:02 -0700 Subject: [PATCH] tcp: fix TSO cwnd caching bug tcp_write_xmit caches the cwnd value indirectly in cwnd_quota. When tcp_transmit_skb reduces the cwnd because of tcp_enter_cwr, the cached value becomes invalid. This patch ensures that the cwnd value is always reread after each tcp_transmit_skb call. Signed-off-by: Herbert Xu Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ipv4/tcp_output.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e118b4b5b32..7d076f0db10 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -972,19 +972,18 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (unlikely(sk->sk_state == TCP_CLOSE)) return 0; - skb = sk->sk_send_head; - if (unlikely(!skb)) - return 0; - - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); - cwnd_quota = tcp_cwnd_test(tp, skb); - if (unlikely(!cwnd_quota)) - goto out; - sent_pkts = 0; - while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) { + while ((skb = sk->sk_send_head)) { + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); + cwnd_quota = tcp_cwnd_test(tp, skb); + if (!cwnd_quota) + break; + + if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) + break; + if (tso_segs == 1) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? @@ -1026,27 +1025,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_minshall_update(tp, mss_now, skb); sent_pkts++; - - /* Do not optimize this to use tso_segs. If we chopped up - * the packet above, tso_segs will no longer be valid. - */ - cwnd_quota -= tcp_skb_pcount(skb); - - BUG_ON(cwnd_quota < 0); - if (!cwnd_quota) - break; - - skb = sk->sk_send_head; - if (!skb) - break; - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); } if (likely(sent_pkts)) { tcp_cwnd_validate(sk, tp); return 0; } -out: return !tp->packets_out && sk->sk_send_head; } -- cgit v1.2.3 From b7656e7f2944984befa3ab99a5b99f99a23b302b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 5 Aug 2005 04:12:48 -0700 Subject: [IPV4]: Fix memory leak during fib_info hash expansion. When we grow the tables, we forget to free the olds ones up. Noticed by Yan Zheng. Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c886b28ba9f..e278cb9d007 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -593,10 +593,13 @@ static void fib_hash_move(struct hlist_head *new_info_hash, struct hlist_head *new_laddrhash, unsigned int new_size) { + struct hlist_head *old_info_hash, *old_laddrhash; unsigned int old_size = fib_hash_size; - unsigned int i; + unsigned int i, bytes; write_lock(&fib_info_lock); + old_info_hash = fib_info_hash; + old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; for (i = 0; i < old_size; i++) { @@ -636,6 +639,10 @@ static void fib_hash_move(struct hlist_head *new_info_hash, fib_info_laddrhash = new_laddrhash; write_unlock(&fib_info_lock); + + bytes = old_size * sizeof(struct hlist_head *); + fib_hash_free(old_info_hash, bytes); + fib_hash_free(old_laddrhash, bytes); } struct fib_info * -- cgit v1.2.3 From dcc365d8f28d6a2332fa37e64d669858a8d017e8 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 6 Aug 2005 12:36:42 +0200 Subject: [Bluetooth] Revert session reference counting fix The fix for the reference counting problem of the signal DLC introduced a race condition which leads to an oops. The reason for it is not fully understood by now and so revert this fix, because the reference counting problem is not crashing the RFCOMM layer and its appearance it rare. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e9e6fda66f1..27bf5047cd3 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -389,8 +389,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) rfcomm_dlc_unlock(d); skb_queue_purge(&d->tx_queue); - rfcomm_session_put(s); - rfcomm_dlc_unlink(d); } @@ -600,8 +598,6 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst goto failed; } - rfcomm_session_hold(s); - s->initiator = 1; bacpy(&addr.l2_bdaddr, dst); -- cgit v1.2.3 From 66e8b6c31b9254243afaac8af4135e84e11dd38e Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 6 Aug 2005 12:36:51 +0200 Subject: [Bluetooth] Remove unused functions and cleanup symbol exports This patch removes the unused bt_dump() function and it also removes its BT_DMP macro. It also unexports the hci_dev_get(), hci_send_cmd() and hci_si_event() functions. Signed-off-by: Adrian Bunk Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 2 -- net/bluetooth/hci_event.c | 1 - net/bluetooth/lib.c | 25 ------------------------- 3 files changed, 28 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fb5524365bc..ffa26c10bfe 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -299,7 +299,6 @@ struct hci_dev *hci_dev_get(int index) read_unlock(&hci_dev_list_lock); return hdev; } -EXPORT_SYMBOL(hci_dev_get); /* ---- Inquiry support ---- */ static void inquiry_cache_flush(struct hci_dev *hdev) @@ -1042,7 +1041,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p return 0; } -EXPORT_SYMBOL(hci_send_cmd); /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c4b592b4ef1..26050d13fb4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1040,4 +1040,3 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) hci_send_to_sock(hdev, skb); kfree_skb(skb); } -EXPORT_SYMBOL(hci_si_event); diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index 9efb0a09361..ee6a6697991 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -34,31 +34,6 @@ #include -void bt_dump(char *pref, __u8 *buf, int count) -{ - char *ptr; - char line[100]; - unsigned int i; - - printk(KERN_INFO "%s: dump, len %d\n", pref, count); - - ptr = line; - *ptr = 0; - for (i = 0; i < count; i++) { - ptr += sprintf(ptr, " %2.2X", buf[i]); - - if (i && !((i + 1) % 20)) { - printk(KERN_INFO "%s:%s\n", pref, line); - ptr = line; - *ptr = 0; - } - } - - if (line[0]) - printk(KERN_INFO "%s:%s\n", pref, line); -} -EXPORT_SYMBOL(bt_dump); - void baswap(bdaddr_t *dst, bdaddr_t *src) { unsigned char *d = (unsigned char *) dst; -- cgit v1.2.3 From 576c7d858f36cab6110b29db7b53964d5132cf30 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 6 Aug 2005 12:36:54 +0200 Subject: [Bluetooth] Add direction and timestamp to stack internal events This patch changes the direction to incoming and adds the timestamp to all stack internal events. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 26050d13fb4..46367bd129c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1035,6 +1035,9 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) ev->type = type; memcpy(ev->data, data, dlen); + bt_cb(skb)->incoming = 1; + do_gettimeofday(&skb->stamp); + skb->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; hci_send_to_sock(hdev, skb); -- cgit v1.2.3 From 6fc0b4a7a73a81e74d0004732df358f4f9975be2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 6 Aug 2005 06:33:15 -0700 Subject: [IPSEC]: Restrict socket policy loading to CAP_NET_ADMIN. The interface needs much redesigning if we wish to allow normal users to do this in some way. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 3 +++ net/ipv6/ipv6_sockglue.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index fc7c481d0d7..ff4bd067b39 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -848,6 +848,9 @@ mc_msf_out: case IP_IPSEC_POLICY: case IP_XFRM_POLICY: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + break; err = xfrm_user_policy(sk, optname, optval, optlen); break; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f3ef4c38d31..3bc144a79fa 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -504,6 +504,9 @@ done: break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: + retv = -EPERM; + if (!capable(CAP_NET_ADMIN)) + break; retv = xfrm_user_policy(sk, optname, optval, optlen); break; -- cgit v1.2.3 From 8b83bc77bf77cc8459cb94e52b08e775104c4c48 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 8 Aug 2005 11:50:55 +0200 Subject: [PATCH] don't try to do any NAT on untracked connections With the introduction of 'rustynat' in 2.6.11, the old tricks of preventing NAT of 'untracked' connections (e.g. NOTRACK target in 'raw' table) are no longer sufficient. The ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK effectively prevents iteration of the 'nat' table, but doesn't prevent nat_packet() to be executed. Since nr_manips is gone in 'rustynat', nat_packet() now implicitly thinks that it has to do NAT on the packet. This patch fixes that problem by explicitly checking for ip_conntrack_untracked in ip_nat_fn(). Signed-off-by: Harald Welte Signed-off-by: Linus Torvalds --- net/ipv4/netfilter/ip_nat_standalone.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index bc59d0d6e89..91d5ea1dbbc 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -102,6 +102,10 @@ ip_nat_fn(unsigned int hooknum, return NF_ACCEPT; } + /* Don't try to NAT if this packet is not conntracked */ + if (ct == &ip_conntrack_untracked) + return NF_ACCEPT; + switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: -- cgit v1.2.3 From ca9334523c853e407da7b3a0bd02f54d0fa59414 Mon Sep 17 00:00:00 2001 From: Heikki Orsila Date: Mon, 8 Aug 2005 14:26:52 -0700 Subject: [IPV4]: Debug cleanup Here's a small patch to cleanup NETDEBUG() use in net/ipv4/ for Linux kernel 2.6.13-rc5. Also weird use of indentation is changed in some places. Signed-off-by: Heikki Orsila Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 +-- net/ipv4/ip_fragment.c | 8 +++----- net/ipv4/tcp_ipv4.c | 14 ++++++-------- net/ipv4/udp.c | 34 ++++++++++++++++------------------ 4 files changed, 26 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 279f57abfec..3d78464f64e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -936,8 +936,7 @@ int icmp_rcv(struct sk_buff *skb) case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "icmp v4 hw csum failure\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n")); case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) goto error; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7f68e27eb4e..eb377ae1530 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) return ip_frag_intern(hash, qp); out_nomem: - NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n")); + LIMIT_NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n")); return NULL; } @@ -625,10 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) return head; out_nomem: - NETDEBUG(if (net_ratelimit()) - printk(KERN_ERR - "IP: queue_glue: no memory for gluing queue %p\n", - qp)); + LIMIT_NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp)); goto out_fail; out_oversize: if (net_ratelimit()) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 62f62bb05c2..5d91213d34c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1494,12 +1494,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - NETDEBUG(if (net_ratelimit()) \ - printk(KERN_DEBUG "TCP: drop open " - "request from %u.%u." - "%u.%u/%u\n", \ - NIPQUAD(saddr), - ntohs(skb->h.th->source))); + LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " + "request from %u.%u." + "%u.%u/%u\n", + NIPQUAD(saddr), + ntohs(skb->h.th->source))); dst_release(dst); goto drop_and_free; } @@ -1627,8 +1626,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) skb->nh.iph->daddr, skb->csum)) return 0; - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "hw tcp v4 csum failed\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); skb->ip_summed = CHECKSUM_NONE; } if (skb->len <= 76) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7c24e64b443..dc4d07357e3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -628,7 +628,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); err = -EINVAL; goto out; } @@ -693,7 +693,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 3\n")); return -EINVAL; } @@ -1102,7 +1102,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, skb->ip_summed = CHECKSUM_UNNECESSARY; if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) return 0; - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v4 hw csum failure.\n")); skb->ip_summed = CHECKSUM_NONE; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -1181,14 +1181,13 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - len, - NIPQUAD(daddr), - ntohs(uh->dest))); + LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + len, + NIPQUAD(daddr), + ntohs(uh->dest))); no_header: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); @@ -1199,13 +1198,12 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen)); + LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen)); drop: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); -- cgit v1.2.3 From 3501466941347f0e1992b2672affb3feb92925fd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 14:57:12 -0700 Subject: [SUNRPC]: Fix nsec --> usec conversion. We need to divide, not multiply. While we're here, use NSEC_PER_USEC instead of a magic constant. Based upon a report from Josip Loncaric and a patch by Andrew Morton. Signed-off-by: David S. Miller --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 56db8f13e6c..d0c3120d023 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -586,7 +586,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) } if (skb->stamp.tv_sec == 0) { skb->stamp.tv_sec = xtime.tv_sec; - skb->stamp.tv_usec = xtime.tv_nsec * 1000; + skb->stamp.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } -- cgit v1.2.3 From d64d3873721cfe870d49d73c3744f06260779ce7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Aug 2005 15:29:19 -0700 Subject: [NET]: Fix memory leak in sys_{send,recv}msg() w/compat From: Dave Johnson sendmsg()/recvmsg() syscalls from o32/n32 apps to a 64bit kernel will cause a kernel memory leak if iov_len > UIO_FASTIOV for each syscall! This is because both sys_sendmsg() and verify_compat_iovec() kmalloc a new iovec structure. Only the one from sys_sendmsg() is free'ed. I wrote a simple test program to confirm this after identifying the problem: http://davej.org/programs/testsendmsg.c Note that the below fix will break solaris_sendmsg()/solaris_recvmsg() as it also calls verify_compat_iovec() but expects it to malloc internally. [ I fixed that. -DaveM ] Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/compat.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index be5d936dc42..d99ab969589 100644 --- a/net/compat.c +++ b/net/compat.c @@ -91,20 +91,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, } else kern_msg->msg_name = NULL; - if(kern_msg->msg_iovlen > UIO_FASTIOV) { - kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec), - GFP_KERNEL); - if(!kern_iov) - return -ENOMEM; - } - tot_len = iov_from_user_compat_to_kern(kern_iov, (struct compat_iovec __user *)kern_msg->msg_iov, kern_msg->msg_iovlen); if(tot_len >= 0) kern_msg->msg_iov = kern_iov; - else if(kern_msg->msg_iovlen > UIO_FASTIOV) - kfree(kern_iov); return tot_len; } -- cgit v1.2.3 From 001ab02a8c04f0b4dc773c474da698ad7405ae68 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 10 Aug 2005 11:32:57 -0700 Subject: [DECNET]: Use sk_stream_error function rather than DECnet's own Signed-off-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 96a02800cd2..acdd18e6adb 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1876,15 +1876,6 @@ static inline unsigned int dn_current_mss(struct sock *sk, int flags) return mss_now; } -static int dn_error(struct sock *sk, int flags, int err) -{ - if (err == -EPIPE) - err = sock_error(sk) ? : -EPIPE; - if (err == -EPIPE && !(flags & MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - return err; -} - static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { @@ -2045,7 +2036,7 @@ out: return sent ? sent : err; out_err: - err = dn_error(sk, flags, err); + err = sk_stream_error(sk, flags, err); release_sock(sk); return err; } -- cgit v1.2.3 From b5da623ae9be680ea59f268eeb339f0acb2d88c4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 10 Aug 2005 18:32:36 -0700 Subject: [TCP]: Adjust {p,f}ackets_out correctly in tcp_retransmit_skb() Well I've only found one potential cause for the assertion failure in tcp_mark_head_lost. First of all, this can only occur if cnt > 1 since tp->packets_out is never zero here. If it did hit zero we'd have much bigger problems. So cnt is equal to fackets_out - reordering. Normally fackets_out is less than packets_out. The only reason I've found that might cause fackets_out to exceed packets_out is if tcp_fragment is called from tcp_retransmit_skb with a TSO skb and the current MSS is greater than the MSS stored in the TSO skb. This might occur as the result of an expiring dst entry. In that case, packets_out may decrease (line 1380-1381 in tcp_output.c). However, fackets_out is unchanged which means that it may in fact exceed packets_out. Previously tcp_retrans_try_collapse was the only place where packets_out can go down and it takes care of this by decrementing fackets_out. So we should make sure that fackets_out is reduced by an appropriate amount here as well. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d076f0db10..3ed6fc15815 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1370,15 +1370,21 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > cur_mss) { int old_factor = tcp_skb_pcount(skb); - int new_factor; + int diff; if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ - new_factor = tcp_skb_pcount(skb); - tp->packets_out -= old_factor - new_factor; - tp->packets_out += tcp_skb_pcount(skb->next); + diff = old_factor - tcp_skb_pcount(skb) - + tcp_skb_pcount(skb->next); + tp->packets_out -= diff; + + if (diff > 0) { + tp->fackets_out -= diff; + if ((int)tp->fackets_out < 0) + tp->fackets_out = 0; + } } /* Collapse two adjacent packets if worthwhile and we can. */ -- cgit v1.2.3 From 11513128bb66b0b09d5d0df069b58afdb01752a2 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 11 Aug 2005 19:23:04 -0700 Subject: [NETPOLL]: rx_flags bugfix Initialize npinfo->rx_flags. The way it stands now, this will have random garbage, and so will incur a locking penalty even when an rx_hook isn't registered and we are not active in the netpoll polling code. Signed-off-by: Jeff Moyer Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c327c9edadc..895f3efc65a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -639,6 +639,7 @@ int netpoll_setup(struct netpoll *np) if (!npinfo) goto release; + npinfo->rx_flags = 0; npinfo->rx_np = NULL; npinfo->poll_lock = SPIN_LOCK_UNLOCKED; npinfo->poll_owner = -1; -- cgit v1.2.3 From a636e1357911afdea7c8344ee65f78d36caf3c16 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 11 Aug 2005 19:23:50 -0700 Subject: [NETPOLL]: deadlock bugfix This fixes an obvious deadlock in the netpoll code. netpoll_rx takes the npinfo->rx_lock. netpoll_rx is also the only caller of arp_reply (through __netpoll_rx). As such, it is not necessary to take this lock. Signed-off-by: Jeff Moyer Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 895f3efc65a..b9d9da082af 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -353,11 +353,8 @@ static void arp_reply(struct sk_buff *skb) struct sk_buff *send_skb; struct netpoll *np = NULL; - spin_lock_irqsave(&npinfo->rx_lock, flags); if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) np = npinfo->rx_np; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (!np) return; -- cgit v1.2.3 From f0d3459d0722782c7d9d0e35a1ed0815e75fcde5 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 11 Aug 2005 19:25:11 -0700 Subject: [NETPOLL]: netpoll_send_skb simplify Minor netpoll_send_skb restructuring Restructure to avoid confusing goto and move some bits out of the retry loop. Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b9d9da082af..59ed186e4f4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -248,14 +248,14 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status; struct netpoll_info *npinfo; -repeat: - if(!np || !np->dev || !netif_running(np->dev)) { + if (!np || !np->dev || !netif_running(np->dev)) { __kfree_skb(skb); return; } - /* avoid recursion */ npinfo = np->dev->npinfo; + + /* avoid recursion */ if (npinfo->poll_owner == smp_processor_id() || np->dev->xmit_lock_owner == smp_processor_id()) { if (np->drop) @@ -265,29 +265,31 @@ repeat: return; } - spin_lock(&np->dev->xmit_lock); - np->dev->xmit_lock_owner = smp_processor_id(); + while (1) { + spin_lock(&np->dev->xmit_lock); + np->dev->xmit_lock_owner = smp_processor_id(); - /* - * network drivers do not expect to be called if the queue is - * stopped. - */ - if (netif_queue_stopped(np->dev)) { + /* + * network drivers do not expect to be called if the queue is + * stopped. + */ + if (netif_queue_stopped(np->dev)) { + np->dev->xmit_lock_owner = -1; + spin_unlock(&np->dev->xmit_lock); + netpoll_poll(np); + continue; + } + + status = np->dev->hard_start_xmit(skb, np->dev); np->dev->xmit_lock_owner = -1; spin_unlock(&np->dev->xmit_lock); - netpoll_poll(np); - goto repeat; - } - - status = np->dev->hard_start_xmit(skb, np->dev); - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + /* success */ + if(!status) + return; - /* transmit busy */ - if(status) { + /* transmit busy */ netpoll_poll(np); - goto repeat; } } -- cgit v1.2.3 From 0db1d6fc1ea051af49ebe03c503d23996a7c5bbb Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 11 Aug 2005 19:25:54 -0700 Subject: [NETPOLL]: add retry timeout Add limited retry logic to netpoll_send_skb Each time we attempt to send, decrement our per-device retry counter. On every successful send, we reset the counter. We delay 50us between attempts with up to 20000 retries for a total of 1 second. After we've exhausted our retries, subsequent failed attempts will try only once until reset by success. Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 59ed186e4f4..d09affdbad3 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -33,6 +33,7 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) +#define MAX_RETRIES 20000 static DEFINE_SPINLOCK(skb_list_lock); static int nr_skbs; @@ -265,7 +266,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) return; } - while (1) { + do { + npinfo->tries--; spin_lock(&np->dev->xmit_lock); np->dev->xmit_lock_owner = smp_processor_id(); @@ -277,6 +279,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) np->dev->xmit_lock_owner = -1; spin_unlock(&np->dev->xmit_lock); netpoll_poll(np); + udelay(50); continue; } @@ -285,12 +288,15 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) spin_unlock(&np->dev->xmit_lock); /* success */ - if(!status) + if(!status) { + npinfo->tries = MAX_RETRIES; /* reset */ return; + } /* transmit busy */ netpoll_poll(np); - } + udelay(50); + } while (npinfo->tries > 0); } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -642,6 +648,7 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_np = NULL; npinfo->poll_lock = SPIN_LOCK_UNLOCKED; npinfo->poll_owner = -1; + npinfo->tries = MAX_RETRIES; npinfo->rx_lock = SPIN_LOCK_UNLOCKED; } else npinfo = ndev->npinfo; -- cgit v1.2.3 From 2652076507b662fc88ba16c27b59c7bdd9ccd956 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Aug 2005 19:26:42 -0700 Subject: [NETPOLL]: pre-fill skb pool we could do one thing (see the patch below): i think it would be useful to fill up the netlogging skb queue straight at initialization time. Especially if netpoll is used for dumping alone, the system might not be in a situation to fill up the queue at the point of crash, so better be a bit more prepared and keep the pipeline filled. [ I've modified this to be called earlier - mpm ] Signed-off-by: Ingo Molnar Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d09affdbad3..c02a08da6d4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -725,6 +725,10 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_np = np; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + + /* fill up the skb queue */ + refill_skbs(); + /* last thing to do is link it to the net device structure */ ndev->npinfo = npinfo; -- cgit v1.2.3 From 53fb95d3c14290fd6ee808b221e35493f096246f Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 11 Aug 2005 19:27:43 -0700 Subject: [NETPOLL]: fix initialization/NAPI race This fixes a race during initialization with the NAPI softirq processing by using an RCU approach. This race was discovered when refill_skbs() was added to the setup code. Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++---- net/core/netpoll.c | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 52a3bf7ae17..faf59b02c4b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1696,7 +1696,8 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; int budget = netdev_budget; - + void *have; + local_irq_disable(); while (!list_empty(&queue->poll_list)) { @@ -1709,10 +1710,10 @@ static void net_rx_action(struct softirq_action *h) dev = list_entry(queue->poll_list.next, struct net_device, poll_list); - netpoll_poll_lock(dev); + have = netpoll_poll_lock(dev); if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(dev); + netpoll_poll_unlock(have); local_irq_disable(); list_del(&dev->poll_list); list_add_tail(&dev->poll_list, &queue->poll_list); @@ -1721,7 +1722,7 @@ static void net_rx_action(struct softirq_action *h) else dev->quota = dev->weight; } else { - netpoll_poll_unlock(dev); + netpoll_poll_unlock(have); dev_put(dev); local_irq_disable(); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c02a08da6d4..996787bca17 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -732,6 +732,9 @@ int netpoll_setup(struct netpoll *np) /* last thing to do is link it to the net device structure */ ndev->npinfo = npinfo; + /* avoid racing with NAPI reading npinfo */ + synchronize_rcu(); + return 0; release: -- cgit v1.2.3 From d7b9dfc8ea43936e6e8eec3040dcf4f110563868 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 11 Aug 2005 19:28:05 -0700 Subject: [NETPOLL]: remove unused variable Remove unused variable Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 996787bca17..a1a9a7abff5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -357,7 +357,6 @@ static void arp_reply(struct sk_buff *skb) unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; - unsigned long flags; struct sk_buff *send_skb; struct netpoll *np = NULL; -- cgit v1.2.3