From 33cf71cee14743185305c61625c4544885055733 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Fri, 21 Nov 2008 16:42:58 -0800 Subject: tcp: Do not use TSO/GSO when there is urgent data This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=12014 Since most (if not all) implementations of TSO and even the in-kernel software GSO do not update the urgent pointer when splitting a large segment, it is necessary to turn off TSO/GSO for all outgoing traffic with the URG pointer set. Looking at tcp_current_mss (and the preceding comment) I even think this was the original intention. However, this approach is insufficient, because TSO/GSO is turned off only for newly created frames, not for frames which were already pending at the arrival of a message with MSG_OOB set. These frames were created when TSO/GSO was enabled, so they may be large, and they will have the urgent pointer set in tcp_transmit_skb(). With this patch, such large packets will be fragmented again before going to the transmit routine. As a side note, at least the following NICs are known to screw up the urgent pointer in the TCP header when doing TSO: Intel 82566MM (PCI ID 8086:1049) Intel 82566DC (PCI ID 8086:104b) Intel 82541GI (PCI ID 8086:1076) Broadcom NetXtreme II BCM5708 (PCI ID 14e4:164c) Signed-off-by: Petr Tesarik Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba85d883189..85b07eba187 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,7 +722,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + tcp_urg_mode(tcp_sk(sk))) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1163,7 +1164,9 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { + if (!tso_segs || + (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || + tcp_urg_mode(tcp_sk(sk))))) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } -- cgit v1.2.3 From 7e56b5d698707a9934833c47b24d78fb0bcaf764 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 21 Nov 2008 16:45:22 -0800 Subject: net: Fix memory leak in the proto_register function If the slub allocator is used, kmem_cache_create() may merge two or more kmem_cache's into one but the cache name pointer is not updated and kmem_cache_name() is no longer guaranteed to return the pointer passed to the former function. This patch stores the kmalloc'ed pointers in the corresponding request_sock_ops and timewait_sock_ops structures. Signed-off-by: Catalin Marinas Acked-by: Arnaldo Carvalho de Melo Reviewed-by: Christoph Lameter Signed-off-by: David S. Miller --- net/core/sock.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 341e3945695..edf7220889a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2035,9 +2035,6 @@ static inline void release_proto_idx(struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name = NULL; - char *timewait_sock_slab_name; - if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2051,12 +2048,12 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->rsk_prot != NULL) { static const char mask[] = "request_sock_%s"; - request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (request_sock_slab_name == NULL) + prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + if (prot->rsk_prot->slab_name == NULL) goto out_free_sock_slab; - sprintf(request_sock_slab_name, mask, prot->name); - prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, + sprintf(prot->rsk_prot->slab_name, mask, prot->name); + prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, prot->rsk_prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2070,14 +2067,14 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->twsk_prot != NULL) { static const char mask[] = "tw_sock_%s"; - timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (timewait_sock_slab_name == NULL) + if (prot->twsk_prot->twsk_slab_name == NULL) goto out_free_request_sock_slab; - sprintf(timewait_sock_slab_name, mask, prot->name); + sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name); prot->twsk_prot->twsk_slab = - kmem_cache_create(timewait_sock_slab_name, + kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2093,14 +2090,14 @@ int proto_register(struct proto *prot, int alloc_slab) return 0; out_free_timewait_sock_slab_name: - kfree(timewait_sock_slab_name); + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: if (prot->rsk_prot && prot->rsk_prot->slab) { kmem_cache_destroy(prot->rsk_prot->slab); prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(request_sock_slab_name); + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; @@ -2123,18 +2120,14 @@ void proto_unregister(struct proto *prot) } if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - const char *name = kmem_cache_name(prot->rsk_prot->slab); - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(name); + kfree(prot->rsk_prot->slab_name); prot->rsk_prot->slab = NULL; } if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(name); + kfree(prot->twsk_prot->twsk_slab_name); prot->twsk_prot->twsk_slab = NULL; } } -- cgit v1.2.3 From 2da2c21d7508d34bc6d600df665d84871b65d2b9 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Sun, 23 Nov 2008 09:58:08 -0600 Subject: Add a reference to sunrpc in svc_addsock The svc_addsock function adds transport instances without taking a reference on the sunrpc.ko module, however, the generic transport destruction code drops a reference when a transport instance is destroyed. Add a try_module_get call to the svc_addsock function for transport instances added by this function. Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields Tested-by: Jeff Moyer --- net/sunrpc/svcsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 95293f549e9..a1951dcc577 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1183,7 +1183,11 @@ int svc_addsock(struct svc_serv *serv, else if (so->state > SS_UNCONNECTED) err = -EISCONN; else { - svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); + if (!try_module_get(THIS_MODULE)) + err = -ENOENT; + else + svsk = svc_setup_socket(serv, so, &err, + SVC_SOCK_DEFAULTS); if (svsk) { struct sockaddr_storage addr; struct sockaddr *sin = (struct sockaddr *)&addr; @@ -1196,7 +1200,8 @@ int svc_addsock(struct svc_serv *serv, spin_unlock_bh(&serv->sv_lock); svc_xprt_received(&svsk->sk_xprt); err = 0; - } + } else + module_put(THIS_MODULE); } if (err) { sockfd_put(so); -- cgit v1.2.3 From b54ad409fd09a395b839fb81f300880d76861c0e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 24 Nov 2008 15:56:17 -0800 Subject: netfilter: ctnetlink: fix conntrack creation race Conntrack creation through ctnetlink has two races: - the timer may expire and free the conntrack concurrently, causing an invalid memory access when attempting to put it in the hash tables - an identical conntrack entry may be created in the packet processing path in the time between the lookup and hash insertion Hold the conntrack lock between the lookup and insertion to avoid this. Reported-by: Zoltan Borbely Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 -- net/netfilter/nf_conntrack_netlink.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 622d7c671cb..233fdd2d7d2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -305,9 +305,7 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - spin_lock_bh(&nf_conntrack_lock); __nf_conntrack_hash_insert(ct, hash, repl_hash); - spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a040d46f85d..3b009a3e854 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1090,7 +1090,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1212,13 +1212,14 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, atomic_inc(&master_ct->ct_general.use); } - spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, &otuple, &rtuple, master_ct); + spin_unlock_bh(&nf_conntrack_lock); + if (err < 0 && master_ct) nf_ct_put(master_ct); -- cgit v1.2.3 From 631339f1e544a4d39a63cfe6708c5bddcd5a2c48 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Nov 2008 16:06:50 -0800 Subject: bridge: netfilter: fix update_pmtu crash with GRE As GRE tries to call the update_pmtu function on skb->dst and bridge supplies an skb->dst that has a NULL ops field, all is not well. This patch fixes this by giving the bridge device an ops field with an update_pmtu function. For the moment I've left all other fields blank but we can fill them in later should the need arise. Based on report and patch by Philip Craig. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fa5cda4e552..45f61c348e3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -101,6 +101,18 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +{ +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .protocol = __constant_htons(ETH_P_IP), + .update_pmtu = fake_update_pmtu, + .entry_size = sizeof(struct rtable), + .entries = ATOMIC_INIT(0), +}; + /* * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter @@ -117,6 +129,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->u.dst.path = &rt->u.dst; rt->u.dst.metrics[RTAX_MTU - 1] = 1500; rt->u.dst.flags = DST_NOXFRM; + rt->u.dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) -- cgit v1.2.3 From 244f46ae6e9e18f6fc0be7d1f49febde4762c34b Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Mon, 24 Nov 2008 11:49:40 +0000 Subject: rose: zero length frame filtering in af_rose.c Since changeset e79ad711a0108475c1b3a03815527e7237020b08 from mainline, >From David S. Miller, empty packet can be transmitted on connected socket for datagram protocols. However, this patch broke a high level application using ROSE network protocol with connected datagram. Bulletin Board Stations perform bulletins forwarding between BBS stations via ROSE network using a forward protocol. Now, if for some reason, a buffer in the application software happens to be empty at a specific moment, ROSE sends an empty packet via unfiltered packet socket. When received, this ROSE packet introduces perturbations of data exchange of BBS forwarding, for the application message forwarding protocol is waiting for something else. We agree that a more careful programming of the application protocol would avoid this situation and we are willing to debug it. But, as an empty frame is no use and does not have any meaning for ROSE protocol, we may consider filtering zero length data both when sending and receiving socket data. The proposed patch repaired BBS data exchange through ROSE network that were broken since 2.6.22.11 kernel. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/rose/af_rose.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index a7f1ce11bc2..0c1cc761280 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,6 +1072,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; + /* ROSE empty frame has no meaning : don't send */ + if (len == 0) + return 0; + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1265,6 +1269,12 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; + /* ROSE empty frame has no meaning : ignore it */ + if (copied == 0) { + skb_free_datagram(sk, skb); + return copied; + } + if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From 3dd3b79aeadc6f6abc5cc78724d7df3dfcc1bd0b Mon Sep 17 00:00:00 2001 From: Abhijeet Kolekar Date: Thu, 20 Nov 2008 10:20:31 -0800 Subject: mac80211 : Fix setting ad-hoc mode and non-ibss channel Patch fixes the kernel trace when user tries to set ad-hoc mode on non IBSS channel. e.g iwconfig wlan0 chan 36 mode ad-hoc Signed-off-by: Abhijeet Kolekar Signed-off-by: John W. Linville --- net/mac80211/wext.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 742f811ca41..ab4ddba874b 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -271,6 +271,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, __u32 *mode, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int type; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -281,6 +282,13 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: + /* Setting ad-hoc mode on non ibss channel is not + * supported. + */ + if (local->oper_channel && + (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)) + return -EOPNOTSUPP; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: -- cgit v1.2.3 From 020cf6ba7a91ccc5db359f91e9abba175fd3a0aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 20:09:54 +0100 Subject: net/wireless/reg.c: fix bad WARN_ON in if statement fix: net/wireless/reg.c:348:29: error: macro "if" passed 2 arguments, but takes just 1 triggered by the branch-tracer. Signed-off-by: Ingo Molnar Signed-off-by: John W. Linville --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 626dbb68849..eb3b1a9f9b1 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -343,9 +343,9 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return 0; return -EALREADY; } - if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)), + if (WARN(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2), "Invalid Country IE regulatory hint passed " - "to the wireless core\n") + "to the wireless core\n")) return -EINVAL; /* We ignore Country IE hints for now, as we haven't yet * added the dot11MultiDomainCapabilityEnabled flag -- cgit v1.2.3 From 8f480c0e4e120911a673ed7385359bf76ae01963 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 25 Nov 2008 21:08:13 -0800 Subject: net: make skb_truesize_bug() call WARN() The truesize message check is important enough to make it print "BUG" to the user console... lets also make it important enough to spit a backtrace/module list etc so that kerneloops.org can track them. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d49ef8301b5..65f7757465b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -149,7 +149,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) void skb_truesize_bug(struct sk_buff *skb) { - printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " + WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) " "len=%u, sizeof(sk_buff)=%Zd\n", skb->truesize, skb->len, sizeof(struct sk_buff)); } -- cgit v1.2.3 From 3ec192559033ed457f0d7856838654c100fc659f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Nov 2008 03:57:44 -0800 Subject: netfilter: ctnetlink: fix GFP_KERNEL allocation under spinlock The previous fix for the conntrack creation race (netfilter: ctnetlink: fix conntrack creation race) missed a GFP_KERNEL allocation that is now performed while holding a spinlock. Switch to GFP_ATOMIC. Reported-and-tested-by: Zoltan Borbely Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3b009a3e854..5f4a6516b3b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1138,7 +1138,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } - nf_ct_acct_ext_add(ct, GFP_KERNEL); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) -- cgit v1.2.3 From 7e5ab54296a41f590c9cbc976e1c86272f3a7e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 26 Nov 2008 15:26:43 -0800 Subject: Phonet: fix oops in phonet_address_del() on non-Phonet device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A NULL dereference would occur when trying to delete an addres from a network device that does not have any Phonet address. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 53be9fc82aa..f93ff8ef47d 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -115,7 +115,7 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) err = -EADDRNOTAVAIL; - if (bitmap_empty(pnd->addrs, 64)) + else if (bitmap_empty(pnd->addrs, 64)) __phonet_device_free(pnd); spin_unlock_bh(&pndevs.lock); return err; -- cgit v1.2.3 From 5f23b734963ec7eaa3ebcd9050da0c9b7d143dd3 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Wed, 26 Nov 2008 15:32:27 -0800 Subject: net: Fix soft lockups/OOM issues w/ unix garbage collector This is an implementation of David Miller's suggested fix in: https://bugzilla.redhat.com/show_bug.cgi?id=470201 It has been updated to use wait_event() instead of wait_event_interruptible(). Paraphrasing the description from the above report, it makes sendmsg() block while UNIX garbage collection is in progress. This avoids a situation where child processes continue to queue new FDs over a AF_UNIX socket to a parent which is in the exit path and running garbage collection on these FDs. This contention can result in soft lockups and oom-killing of unrelated processes. Signed-off-by: dann frazier Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 ++ net/unix/garbage.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb90f77bb0e..66d5ac4773a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1343,6 +1343,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; + wait_for_unix_gc(); err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; @@ -1493,6 +1494,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; + wait_for_unix_gc(); err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6d4a9a8de5e..abb3ab34cb1 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,7 @@ static LIST_HEAD(gc_inflight_list); static LIST_HEAD(gc_candidates); static DEFINE_SPINLOCK(unix_gc_lock); +static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; @@ -266,12 +268,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) list_move_tail(&u->link, &gc_candidates); } -/* The external entry point: unix_gc() */ +static bool gc_in_progress = false; -void unix_gc(void) +void wait_for_unix_gc(void) { - static bool gc_in_progress = false; + wait_event(unix_gc_wait, gc_in_progress == false); +} +/* The external entry point: unix_gc() */ +void unix_gc(void) +{ struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -376,6 +382,7 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); gc_in_progress = false; + wake_up(&unix_gc_wait); out: spin_unlock(&unix_gc_lock); -- cgit v1.2.3 From d5654efd3ff1cd0baa935a0c9a5d89862f07d009 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 3 Dec 2008 00:27:18 -0800 Subject: xfrm: Fix kernel panic when flush and dump SPD entries After flush the SPD entries, dump the SPD entries will cause kernel painc. Used the following commands to reproduct: - echo 'spdflush;' | setkey -c - echo 'spdadd 3ffe:501:ffff:ff01::/64 3ffe:501:ffff:ff04::/64 any -P out ipsec \ ah/tunnel/3ffe:501:ffff:ff00:200:ff:fe00:b0b0-3ffe:501:ffff:ff02:200:ff:fe00:a1a1/require;\ spddump;' | setkey -c - echo 'spdflush; spddump;' | setkey -c - echo 'spdadd 3ffe:501:ffff:ff01::/64 3ffe:501:ffff:ff04::/64 any -P out ipsec \ ah/tunnel/3ffe:501:ffff:ff00:200:ff:fe00:b0b0-3ffe:501:ffff:ff02:200:ff:fe00:a1a1/require;\ spddump;' | setkey -c This is because when flush the SPD entries, the SPD entry is not remove from the list. This patch fix the problem by remove the SPD entry from the list. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 058f04f54b9..fb216c9adf8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -817,6 +817,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, -- cgit v1.2.3 From d25830e5507f6bc815f5dd7e2eb65f172e878a2b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 3 Dec 2008 00:37:04 -0800 Subject: netlabel: Fix a potential NULL pointer dereference Fix a potential NULL pointer dereference seen when trying to remove a static label configuration with an invalid address/mask combination. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e8a5c32b0f1..90c8506a0aa 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -574,9 +574,10 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); - if (list_entry == NULL) + if (list_entry != NULL) + entry = netlbl_unlhsh_addr4_entry(list_entry); + else ret_val = -ENOENT; - entry = netlbl_unlhsh_addr4_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -634,9 +635,10 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); - if (list_entry == NULL) + if (list_entry != NULL) + entry = netlbl_unlhsh_addr6_entry(list_entry); + else ret_val = -ENOENT; - entry = netlbl_unlhsh_addr6_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); -- cgit v1.2.3 From bd7df219202f44e71e2e975a0fb5f76f946c1aef Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Mon, 1 Dec 2008 02:37:20 +0000 Subject: Phonet: do not dump addresses from other namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b1770d66bc8..242fe8f8c32 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -123,6 +123,7 @@ nla_put_failure: static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct phonet_device *pnd; int dev_idx = 0, dev_start_idx = cb->args[0]; int addr_idx = 0, addr_start_idx = cb->args[1]; @@ -131,6 +132,8 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(pnd, &pndevs.list, list) { u8 addr; + if (!net_eq(dev_net(pnd->netdev), net)) + continue; if (dev_idx > dev_start_idx) addr_start_idx = 0; if (dev_idx++ < dev_start_idx) -- cgit v1.2.3 From d253eee20195b25e298bf162a6e72f14bf4803e5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 3 Dec 2008 15:52:35 -0800 Subject: can: Fix CAN_(EFF|RTR)_FLAG handling in can_filter Due to a wrong safety check in af_can.c it was not possible to filter for SFF frames with a specific CAN identifier without getting the same selected CAN identifier from a received EFF frame also. This fix has a minimum (but user visible) impact on the CAN filter API and therefore the CAN version is set to a new date. Indeed the 'old' API is still working as-is. But when now setting CAN_(EFF|RTR)_FLAG in can_filter.can_mask you might get less traffic than before - but still the stuff that you expected to get for your defined filter ... Thanks to Kurt Van Dijck for pointing at this issue and for the review. Signed-off-by: Oliver Hartkopp Acked-by: Kurt Van Dijck Signed-off-by: David S. Miller --- net/can/af_can.c | 63 ++++++++++++++++++++++++++++++++++++++++++-------------- net/can/bcm.c | 7 ++++--- 2 files changed, 52 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 7d4d2b3c137..d8173e50cb8 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -319,23 +319,52 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) return n ? d : NULL; } +/** + * find_rcv_list - determine optimal filterlist inside device filter struct + * @can_id: pointer to CAN identifier of a given can_filter + * @mask: pointer to CAN mask of a given can_filter + * @d: pointer to the device filter struct + * + * Description: + * Returns the optimal filterlist to reduce the filter handling in the + * receive path. This function is called by service functions that need + * to register or unregister a can_filter in the filter lists. + * + * A filter matches in general, when + * + * & mask == can_id & mask + * + * so every bit set in the mask (even CAN_EFF_FLAG, CAN_RTR_FLAG) describe + * relevant bits for the filter. + * + * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can + * filter for error frames (CAN_ERR_FLAG bit set in mask). For error frames + * there is a special filterlist and a special rx path filter handling. + * + * Return: + * Pointer to optimal filterlist for the given can_id/mask pair. + * Constistency checked mask. + * Reduced can_id to have a preprocessed filter compare value. + */ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, struct dev_rcv_lists *d) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ - /* filter error frames */ + /* filter for error frames in extra filterlist */ if (*mask & CAN_ERR_FLAG) { - /* clear CAN_ERR_FLAG in list entry */ + /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; return &d->rx[RX_ERR]; } - /* ensure valid values in can_mask */ - if (*mask & CAN_EFF_FLAG) - *mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG); - else - *mask &= (CAN_SFF_MASK | CAN_RTR_FLAG); + /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ + +#define CAN_EFF_RTR_FLAGS (CAN_EFF_FLAG | CAN_RTR_FLAG) + + /* ensure valid values in can_mask for 'SFF only' frame filtering */ + if ((*mask & CAN_EFF_FLAG) && !(*can_id & CAN_EFF_FLAG)) + *mask &= (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS); /* reduce condition testing at receive time */ *can_id &= *mask; @@ -348,15 +377,19 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (!(*mask)) return &d->rx[RX_ALL]; - /* use extra filterset for the subscription of exactly *ONE* can_id */ - if (*can_id & CAN_EFF_FLAG) { - if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) { - /* RFC: a use-case for hash-tables in the future? */ - return &d->rx[RX_EFF]; + /* extra filterlists for the subscription of a single non-RTR can_id */ + if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) + && !(*can_id & CAN_RTR_FLAG)) { + + if (*can_id & CAN_EFF_FLAG) { + if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { + /* RFC: a future use-case for hash-tables? */ + return &d->rx[RX_EFF]; + } + } else { + if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) + return &d->rx_sff[*can_id]; } - } else { - if (*mask == CAN_SFF_MASK) - return &d->rx_sff[*can_id]; } /* default: filter via can_id/can_mask */ diff --git a/net/can/bcm.c b/net/can/bcm.c index d0dd382001e..da0d426c0ce 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -64,10 +64,11 @@ #define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */ /* get best masking value for can_rx_register() for a given single can_id */ -#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \ - (CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK)) +#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ + (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ + (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20080415" +#define CAN_BCM_VERSION CAN_VERSION static __initdata const char banner[] = KERN_INFO "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n"; -- cgit v1.2.3 From f8269a495a1924f8b023532dd3e77423432db810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 3 Dec 2008 21:24:48 -0800 Subject: tcp: make urg+gso work for real this time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I should have noticed this earlier... :-) The previous solution to URG+GSO/TSO will cause SACK block tcp_fragment to do zig-zig patterns, or even worse, a steep downward slope into packet counting because each skb pcount would be truncated to pcount of 2 and then the following fragments of the later portion would restore the window again. Basically this reverts "tcp: Do not use TSO/GSO when there is urgent data" (33cf71cee1). It also removes some unnecessary code from tcp_current_mss that didn't work as intented either (could be that something was changed down the road, or it might have been broken since the dawn of time) because it only works once urg is already written while this bug shows up starting from ~64k before the urg point. The retransmissions already are split to mss sized chunks, so only new data sending paths need splitting in case they have a segment otherwise suitable for gso/tso. The actually check can be improved to be more narrow but since this is late -rc already, I'll postpone thinking the more fine-grained things. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 85b07eba187..fe3b4bdfd25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,8 +722,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - tcp_urg_mode(tcp_sk(sk))) { + if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1029,10 +1028,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. - * - * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up - * cannot be large. However, taking into account rare use of URG, this - * is not a big flaw. */ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) { @@ -1047,7 +1042,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) + if (large_allowed && sk_can_gso(sk)) doing_tso = 1; if (dst) { @@ -1164,9 +1159,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || - (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || - tcp_urg_mode(tcp_sk(sk))))) { + if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1519,6 +1512,10 @@ static int tcp_mtu_probe(struct sock *sk) * send_head. This happens as incoming acks open up the remote * window for us. * + * LARGESEND note: !tcp_urg_mode is overkill, only frames between + * snd_up-64k-mss .. snd_up cannot be large. However, taking into + * account rare use of URG, this is not a big flaw. + * * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ @@ -1570,7 +1567,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) } limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); @@ -1619,6 +1616,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; @@ -1633,7 +1631,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!tso_segs); limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); -- cgit v1.2.3 From 17b24b3c97498935a2ef9777370b1151dfed3f6f Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Thu, 4 Dec 2008 14:58:13 -0800 Subject: ATM: CVE-2008-5079: duplicate listen() on socket corrupts the vcc table As reported by Hugo Dias that it is possible to cause a local denial of service attack by calling the svc_listen function twice on the same socket and reading /proc/net/atm/*vc Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/svc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/svc.c b/net/atm/svc.c index de1e4f2f3a4..8fb54dc870b 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -293,7 +293,10 @@ static int svc_listen(struct socket *sock,int backlog) error = -EINVAL; goto out; } - vcc_insert_socket(sk); + if (test_bit(ATM_VF_LISTEN, &vcc->flags)) { + error = -EADDRINUSE; + goto out; + } set_bit(ATM_VF_WAITING, &vcc->flags); prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local); @@ -307,6 +310,7 @@ static int svc_listen(struct socket *sock,int backlog) goto out; } set_bit(ATM_VF_LISTEN,&vcc->flags); + vcc_insert_socket(sk); sk->sk_max_ack_backlog = backlog > 0 ? backlog : ATM_BACKLOG_DEFAULT; error = -sk->sk_err; out: -- cgit v1.2.3 From f706644d55f90e8306d87060168fef33804d6dd9 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 4 Dec 2008 15:01:08 -0800 Subject: can: omit received RTR frames for single ID filter lists Since commit d253eee20195b25e298bf162a6e72f14bf4803e5 the single CAN identifier filter lists handle only non-RTR CAN frames. So we need to omit the check of these filter lists when receiving RTR CAN frames. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index d8173e50cb8..3dadb338add 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -622,7 +622,10 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } } - /* check CAN_ID specific entries */ + /* check filterlists for single non-RTR can_ids */ + if (can_id & CAN_RTR_FLAG) + return matches; + if (can_id & CAN_EFF_FLAG) { hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) { if (r->can_id == can_id) { -- cgit v1.2.3 From a6af2d6ba5797c556fba0cd3a19e5f3bc9a99b76 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Thu, 4 Dec 2008 17:17:18 -0800 Subject: tcp: tcp_vegas ssthresh bug fix This patch fixes a bug in tcp_vegas.c. At the moment this code leaves ssthresh untouched. However, this means that the vegas congestion control algorithm is effectively unable to reduce cwnd below the ssthresh value (if the vegas update lowers the cwnd below ssthresh, then slow start is activated to raise it back up). One example where this matters is when during slow start cwnd overshoots the link capacity and a flow then exits slow start with ssthresh set to a value above where congestion avoidance would like to adjust it. Signed-off-by: Doug Leith Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 14504dada11..7cd22262de3 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -326,6 +326,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) tp->snd_cwnd = tp->snd_cwnd_clamp; + + tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ -- cgit v1.2.3 From 5cf12e8dc641ef028f0cf9c317a9567e6b794de1 Mon Sep 17 00:00:00 2001 From: Shaddy Baddah Date: Fri, 28 Nov 2008 17:08:10 +1100 Subject: mac80211: use unaligned safe memcmp() in-place of compare_ether_addr() After fixing zd1211rw: use unaligned safe memcmp() in-place of compare_ether_addr(), I started to see kernel log messages detailing unaligned access: Kernel unaligned access at TPC[100f7f44] sta_info_get+0x24/0x68 [mac80211] As with the aforementioned patch, the unaligned access was eminating from a compare_ether_addr() call. Concerned that whilst it was safe to assume that unalignment was the norm for the zd1211rw, and take preventative measures, it may not be the case or acceptable to use the easy fix of changing the call to memcmp(). My research however indicated that it was OK to do this, as there are a few instances where memcmp() is the preferred mechanism for doing mac address comparisons throughout the module. Signed-off-by: Shaddy Baddah Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7fef8ea1f5e..d254446b85b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,7 +99,7 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); while (sta) { - if (compare_ether_addr(sta->sta.addr, addr) == 0) + if (memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference(sta->hnext); } -- cgit v1.2.3 From c49b9f295e513753e6d9bb4444ba502f1aa59b29 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 7 Dec 2008 23:53:46 -0800 Subject: tproxy: fixe a possible read from an invalid location in the socket match TIME_WAIT sockets need to be handled specially, and the socket match casted inet_timewait_sock instances to inet_sock, which are not compatible. Handle this special case by checking sk->sk_state. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- net/netfilter/xt_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 02a8fed2108..1acc089be7e 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -141,7 +141,7 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, false); if (sk != NULL) { - bool wildcard = (inet_sk(sk)->rcv_saddr == 0); + bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); nf_tproxy_put_sock(sk); if (wildcard) -- cgit v1.2.3 From 8d3a564da34e5844aca4f991b73f8ca512246b23 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Tue, 9 Dec 2008 00:13:04 -0800 Subject: tcp: tcp_vegas cong avoid fix This patch addresses a book-keeping issue in tcp_vegas.c. At present tcp_vegas does separate book-keeping of cwnd based on packet sequence numbers. A mismatch can develop between this book-keeping and tp->snd_cwnd due, for example, to delayed acks acking multiple packets. When vegas transitions to reno operation (e.g. following loss), then this mismatch leads to incorrect behaviour (akin to a cwnd backoff). This seems mostly to affect operation at low cwnds where delayed acking can lead to a significant fraction of cwnd being covered by a single ack, leading to the book-keeping mismatch. This patch modifies the congestion avoidance update to avoid the need for separate book-keeping while leaving vegas congestion avoidance functionally unchanged. A secondary advantage of this modification is that the use of fixed-point (via V_PARAM_SHIFT) and 64 bit arithmetic is no longer necessary, simplifying the code. Some example test measurements with the patched code (confirming no functional change in the congestion avoidance algorithm) can be seen at: http://www.hamilton.ie/doug/vegaspatch/ Signed-off-by: Doug Leith Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 80 +++++++--------------------------------------------- 1 file changed, 10 insertions(+), 70 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 7cd22262de3..a453aac91bd 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -40,18 +40,14 @@ #include "tcp_vegas.h" -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 -static int alpha = 2<beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ - u32 old_wnd, old_snd_cwnd; - - - /* Here old_wnd is essentially the window of data that was - * sent during the previous RTT, and has all - * been acknowledged in the course of the RTT that ended - * with the ACK we just received. Likewise, old_snd_cwnd - * is the cwnd during the previous RTT. - */ - old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / - tp->mss_cache; - old_snd_cwnd = vegas->beg_snd_cwnd; /* Save the extent of the current window so we can use this * at the end of the next RTT. */ - vegas->beg_snd_una = vegas->beg_snd_nxt; vegas->beg_snd_nxt = tp->snd_nxt; - vegas->beg_snd_cwnd = tp->snd_cwnd; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * * This is: * (actual rate in segments) * baseRTT - * We keep it as a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = ((u64)old_wnd * vegas->baseRTT); - target_cwnd <<= V_PARAM_SHIFT; - do_div(target_cwnd, rtt); + target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. - * - * Again, this is a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary - * point. */ - diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; + diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tp->snd_ssthresh > 2 ) { /* Going too fast. Time to slow down @@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, - ((u32)target_cwnd >> - V_PARAM_SHIFT)+1); + tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ tcp_slow_start(tp); } else { /* Congestion avoidance. */ - u32 next_snd_cwnd; /* Figure out where we would like cwnd * to be. @@ -300,26 +249,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* The old window was too fast, so * we slow down. */ - next_snd_cwnd = old_snd_cwnd - 1; + tp->snd_cwnd--; } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - next_snd_cwnd = old_snd_cwnd + 1; + tp->snd_cwnd++; } else { /* Sending just as fast as we * should be. */ - next_snd_cwnd = old_snd_cwnd; } - - /* Adjust cwnd upward or downward, toward the - * desired value. - */ - if (next_snd_cwnd > tp->snd_cwnd) - tp->snd_cwnd++; - else if (next_snd_cwnd < tp->snd_cwnd) - tp->snd_cwnd--; } if (tp->snd_cwnd < 2) -- cgit v1.2.3 From 24fc7b86dc0470616803be2f921c8cd5c459175d Mon Sep 17 00:00:00 2001 From: Jan Sembera Date: Tue, 9 Dec 2008 15:48:32 -0800 Subject: ipv6: silence log messages for locally generated multicast This patch fixes minor annoyance during transmission of unsolicited neighbor advertisements from userspace to multicast addresses (as far as I can see in RFC, this is allowed and the similar functionality for IPv4 has been in arping for a long time). Outgoing multicast packets get reinserted into local processing as if they are received from the network. The machine thus sees its own NA and fills the logs with error messages. This patch removes the message if NA has been generated locally. Signed-off-by: Jan Sembera Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 172438320ee..d0f54d18e19 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -912,8 +912,13 @@ static void ndisc_recv_na(struct sk_buff *skb) is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) + + We should not print the error if NA has been + received from loopback - it is just our own + unsolicited advertisement. */ - ND_PRINTK1(KERN_WARNING + if (skb->pkt_type != PACKET_LOOPBACK) + ND_PRINTK1(KERN_WARNING "ICMPv6 NA: someone advertises our address on %s!\n", ifp->idev->dev->name); in6_ifa_put(ifp); -- cgit v1.2.3 From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Dec 2008 23:22:26 -0800 Subject: netpoll: fix race on poll_list resulting in garbage entry A few months back a race was discused between the netpoll napi service path, and the fast path through net_rx_action: http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470 A patch was submitted for that bug, but I think we missed a case. Consider the following scenario: INITIAL STATE CPU0 has one napi_struct A on its poll_list CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same napi_struct A that CPU0 has on its list CPU0 CPU1 net_rx_action poll_napi !list_empty (returns true) locks poll_lock for A poll_one_napi napi->poll netif_rx_complete __napi_complete (removes A from poll_list) list_entry(list->next) In the above scenario, net_rx_action assumes that the per-cpu poll_list is exclusive to that cpu. netpoll of course violates that, and because the netpoll path can dequeue from the poll list, its possible for CPU0 to detect a non-empty list at the top of the while loop in net_rx_action, but have it become empty by the time it calls list_entry. Since the poll_list isn't surrounded by any other structure, the returned data from that list_entry call in this situation is garbage, and any number of crashes can result based on what exactly that garbage is. Given that its not fasible for performance reasons to place exclusive locks arround each cpus poll list to provide that mutal exclusion, I think the best solution is modify the netpoll path in such a way that we continue to guarantee that the poll_list for a cpu is in fact exclusive to that cpu. To do this I've implemented the patch below. It adds an additional bit to the state field in the napi_struct. When executing napi->poll from the netpoll_path, this bit will be set. When a driver calls netif_rx_complete, if that bit is set, it will not remove the napi_struct from the poll_list. That work will be saved for the next iteration of net_rx_action. I've tested this and it seems to work well. About the biggest drawback I can see to it is the fact that it might result in an extra loop through net_rx_action in the event that the device is actually contended for (i.e. the netpoll path actually preforms all the needed work no the device, and the call to net_rx_action winds up doing nothing, except removing the napi_struct from the poll_list. However I think this is probably a small price to pay, given that the alternative is a crash. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6c7af390be0..dadac6281f2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -133,9 +133,11 @@ static int poll_one_napi(struct netpoll_info *npinfo, npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); + set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; -- cgit v1.2.3 From ec8f2375d7584969501918651241f91eca2a6ad3 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 11 Dec 2008 21:31:50 -0800 Subject: netlabel: Compiler warning and NULL pointer dereference fix Fix the two compiler warnings show below. Thanks to Geert Uytterhoeven for finding and reporting the problem. net/netlabel/netlabel_unlabeled.c:567: warning: 'entry' may be used uninitialized in this function net/netlabel/netlabel_unlabeled.c:629: warning: 'entry' may be used uninitialized in this function Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 90c8506a0aa..8c030803217 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -562,7 +562,6 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, const struct in_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = 0; struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; @@ -577,7 +576,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr4_entry(list_entry); else - ret_val = -ENOENT; + entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -588,19 +587,21 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); - if (entry && security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry != NULL && + security_secid_to_secctx(entry->secid, + &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); - return ret_val; + if (entry == NULL) + return -ENOENT; + + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); + return 0; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -624,7 +625,6 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, const struct in6_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = 0; struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; @@ -638,7 +638,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr6_entry(list_entry); else - ret_val = -ENOENT; + entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -649,19 +649,21 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, addr, mask); if (dev != NULL) dev_put(dev); - if (entry && security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry != NULL && + security_secid_to_secctx(entry->secid, + &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); - return ret_val; + if (entry == NULL) + return -ENOENT; + + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); + return 0; } #endif /* IPv6 */ -- cgit v1.2.3 From be70ed189bc0d16e1609a1c6c04ec9418b4dd11a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 15 Dec 2008 00:19:14 -0800 Subject: netfilter: update rwlock initialization for nat_table The commit e099a173573ce1ba171092aee7bb3c72ea686e59 (netfilter: netns nat: per-netns NAT table) renamed the nat_table from __nat_table to nat_table without updating the __RW_LOCK_UNLOCKED(__nat_table.lock). Signed-off-by: Steven Rostedt Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_rule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index bea54a68510..8d489e746b2 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,7 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), + .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -- cgit v1.2.3 From eb9b851b980e20ba5f6bdfd6ec24f4bc77623ce6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 15 Dec 2008 00:39:17 -0800 Subject: SCHED: netem: Correct documentation comment in code. The netem simulator is no longer limited by Linux timer resolution HZ. Not since Patrick McHardy changed the QoS system to use hrtimer. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a11959908d9..98402f0efa4 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -46,9 +46,6 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. - - The simulator is limited by the Linux timer resolution - and will create packet bursts on the HZ boundary (1ms). */ struct netem_sched_data { -- cgit v1.2.3 From 4798a2b84ea5a98e4f36a815a646cb48ff521684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Mon, 15 Dec 2008 00:53:57 -0800 Subject: Phonet: keep TX queue disabled when the device is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep-gprs.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 9978afbd9f2..803eeef0aa8 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -155,12 +155,13 @@ static void gprs_data_ready(struct sock *sk, int len) static void gprs_write_space(struct sock *sk) { struct gprs_dev *dev = sk->sk_user_data; + struct net_device *net = dev->net; unsigned credits = pep_writeable(sk); spin_lock_bh(&dev->tx_lock); dev->tx_max = credits; - if (credits > skb_queue_len(&dev->tx_queue)) - netif_wake_queue(dev->net); + if (credits > skb_queue_len(&dev->tx_queue) && netif_running(net)) + netif_wake_queue(net); spin_unlock_bh(&dev->tx_lock); } @@ -168,6 +169,23 @@ static void gprs_write_space(struct sock *sk) * Network device callbacks */ +static int gprs_open(struct net_device *dev) +{ + struct gprs_dev *gp = netdev_priv(dev); + + gprs_write_space(gp->sk); + return 0; +} + +static int gprs_close(struct net_device *dev) +{ + struct gprs_dev *gp = netdev_priv(dev); + + netif_stop_queue(dev); + flush_work(&gp->tx_work); + return 0; +} + static int gprs_xmit(struct sk_buff *skb, struct net_device *net) { struct gprs_dev *dev = netdev_priv(net); @@ -254,6 +272,8 @@ static void gprs_setup(struct net_device *net) net->tx_queue_len = 10; net->destructor = free_netdev; + net->open = gprs_open; + net->stop = gprs_close; net->hard_start_xmit = gprs_xmit; /* mandatory */ net->change_mtu = gprs_set_mtu; net->get_stats = gprs_get_stats; @@ -318,7 +338,6 @@ int gprs_attach(struct sock *sk) dev->sk = sk; printk(KERN_DEBUG"%s: attached\n", net->name); - gprs_write_space(sk); /* kick off TX */ return net->ifindex; out_rel: @@ -341,7 +360,5 @@ void gprs_detach(struct sock *sk) printk(KERN_DEBUG"%s: detached\n", net->name); unregister_netdev(net); - flush_scheduled_work(); sock_put(sk); - skb_queue_purge(&dev->tx_queue); } -- cgit v1.2.3