From fda0fd6c5b722cc48e904e0daafedca275d332af Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 14 Oct 2005 16:38:49 +1000
Subject: [DCCP]: Use skb_set_owner_w in dccp_transmit_skb when skb->sk is NULL

David S. Miller <davem@davemloft.net> wrote:
> One thing you can probably do for this bug is to mark data packets
> explicitly somehow, perhaps in the SKB control block DCCP already
> uses for other data.  Put some boolean in there, set it true for
> data packets.  Then change the test in dccp_transmit_skb() as
> appropriate to test the boolean flag instead of "skb_cloned(skb)".

I agree.  In fact we already have that flag, it's called skb->sk.
So here is patch to test that instead of skb_cloned().

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Ian McDonald <imcdnzl@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/dccp/output.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 4786bdcddcc..946ec2db75d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -62,10 +62,8 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		
 		skb->h.raw = skb_push(skb, dccp_header_size);
 		dh = dccp_hdr(skb);
-		/*
-		 * Data packets are not cloned as they are never retransmitted
-		 */
-		if (skb_cloned(skb))
+
+		if (!skb->sk)
 			skb_set_owner_w(skb, sk);
 
 		/* Build DCCP header and checksum it. */
-- 
cgit v1.2.3


From ffa29347dfbc158d1f47f5925324a6f5713659c1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 16 Oct 2005 21:08:46 +1000
Subject: [DCCP]: Make dccp_write_xmit always free the packet

icmp_send doesn't use skb->sk at all so even if skb->sk has already
been freed it can't cause crash there (it would've crashed somewhere
else first, e.g., ip_queue_xmit).

I found a double-free on an skb that could explain this though.
dccp_sendmsg and dccp_write_xmit are a little confused as to what
should free the packet when something goes wrong.  Sometimes they
both go for the ball and end up in each other's way.

This patch makes dccp_write_xmit always free the packet no matter
what.  This makes sense since dccp_transmit_skb which in turn comes
from the fact that ip_queue_xmit always frees the packet.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/dccp/output.c | 3 ++-
 net/dccp/proto.c  | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 946ec2db75d..7006549f705 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -241,7 +241,8 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 
 		err = dccp_transmit_skb(sk, skb);
 		ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
-	}
+	} else
+		kfree_skb(skb);
 
 	return err;
 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a1cfd0e9e3b..a021c3422f6 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -402,8 +402,6 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	 *     This bug was _quickly_ found & fixed by just looking at an OSTRA
 	 *     generated callgraph 8) -acme
 	 */
-	if (rc != 0)
-		goto out_discard;
 out_release:
 	release_sock(sk);
 	return rc ? : len;
-- 
cgit v1.2.3


From 49c5bfaffe8ae6e6440dc4bf78b03800960d93f5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 Oct 2005 12:03:28 +1000
Subject: [DCCP]: Clear the IPCB area

Turns out the problem has nothing to do with use-after-free or double-free.
It's just that we're not clearing the CB area and DCCP unlike TCP uses a CB
format that's incompatible with IP.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Ian McDonald <imcdnzl@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/dccp/ipv4.c   | 2 ++
 net/dccp/output.c | 1 +
 2 files changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ae088d1347a..6298cf58ff9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -463,6 +463,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
 	if (skb != NULL) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
+		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 					    ireq->rmt_addr,
 					    ireq->opt);
@@ -647,6 +648,7 @@ int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
 	if (skb != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
+		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 		err = ip_build_and_send_pkt(skb, sk,
 					    inet->saddr, inet->daddr, NULL);
 		if (err == NET_XMIT_CN)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 7006549f705..29250749f16 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -100,6 +100,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 
+		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 		err = ip_queue_xmit(skb, 0);
 		if (err <= 0)
 			return err;
-- 
cgit v1.2.3


From b2cc99f04c5a732c793519aca61a20f719b50db4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 20 Oct 2005 17:13:13 -0200
Subject: [TCP] Allow len == skb->len in tcp_fragment

It is legitimate to call tcp_fragment with len == skb->len since
that is done for FIN packets and the FIN flag counts as one byte.
So we should only check for the len > skb->len case.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/ipv4/tcp_output.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7114031fdc7..b907456a79f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -435,17 +435,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	int nsize, old_factor;
 	u16 flags;
 
-	if (unlikely(len >= skb->len)) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, "
-			       "end_seq=%u, skb->len=%u.\n", len, mss_now,
-			       TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-			       skb->len);
-			WARN_ON(1);
-		}
-		return 0;
-	}
-
+	BUG_ON(len > skb->len);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
-- 
cgit v1.2.3


From c98d80edc827277c28f88d662a7d6e9affa7e12f Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 22 Oct 2005 13:39:21 +0300
Subject: [SK_BUFF]: ipvs_property field must be copied

IPVS used flag NFC_IPVS_PROPERTY in nfcache but as now nfcache was removed the
new flag 'ipvs_property' still needs to be copied. This patch should be
included in 2.6.14.

Further comments from Harald Welte:

Sorry, seems like the bug was introduced by me.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/core/skbuff.c    | 6 ++++++
 net/ipv4/ip_output.c | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index af9b1516e21..02cd4cde211 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -410,6 +410,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	C(nfct);
 	nf_conntrack_get(skb->nfct);
 	C(nfctinfo);
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	C(ipvs_property);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	C(nf_bridge);
 	nf_bridge_get(skb->nf_bridge);
@@ -467,6 +470,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->nfct	= old->nfct;
 	nf_conntrack_get(old->nfct);
 	new->nfctinfo	= old->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	new->ipvs_property = old->ipvs_property;
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	new->nf_bridge	= old->nf_bridge;
 	nf_bridge_get(old->nf_bridge);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3f1a263e124..1ad5202e556 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -391,6 +391,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->nfct = from->nfct;
 	nf_conntrack_get(to->nfct);
 	to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	to->ipvs_property = from->ipvs_property;
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(to->nf_bridge);
 	to->nf_bridge = from->nf_bridge;
-- 
cgit v1.2.3


From 203755029e063066ecc4cf5eee1110ab946c2d88 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 23 Oct 2005 16:11:39 +1000
Subject: [NEIGH] Print stack trace in neigh_add_timer

Stack traces are very helpful in determining the exact nature of a bug.
So let's print a stack trace when the timer is added twice.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 net/core/neighbour.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4128fc76ac3..766caa0dd93 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -732,6 +732,7 @@ static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
 	if (unlikely(mod_timer(&n->timer, when))) {
 		printk("NEIGH: BUG, double timer add, state is %x\n",
 		       n->nud_state);
+		dump_stack();
 	}
 }
 
-- 
cgit v1.2.3


From 6fb9974f49f7a6032118c5b6caa6e08e7097913e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 23 Oct 2005 16:37:48 +1000
Subject: [NEIGH] Fix add_timer race in neigh_add_timer

neigh_add_timer cannot use add_timer unconditionally.  The reason is that
by the time it has obtained the write lock someone else (e.g., neigh_update)
could have already added a new timer.

So it should only use mod_timer and deal with its return value accordingly.

This bug would have led to rare neighbour cache entry leaks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 net/core/neighbour.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 766caa0dd93..37d8d8c2952 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -816,10 +816,10 @@ static void neigh_timer_handler(unsigned long arg)
 	}
 
 	if (neigh->nud_state & NUD_IN_TIMER) {
-		neigh_hold(neigh);
 		if (time_before(next, jiffies + HZ/2))
 			next = jiffies + HZ/2;
-		neigh_add_timer(neigh, next);
+		if (!mod_timer(&neigh->timer, next))
+			neigh_hold(neigh);
 	}
 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
 		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
-- 
cgit v1.2.3


From 49636bb12892786e4a7b207b37ca7b0c5ca1cae0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 23 Oct 2005 17:18:00 +1000
Subject: [NEIGH] Fix timer leak in neigh_changeaddr

neigh_changeaddr attempts to delete neighbour timers without setting
nud_state.  This doesn't work because the timer may have already fired
when we acquire the write lock in neigh_changeaddr.  The result is that
the timer may keep firing for quite a while until the entry reaches
NEIGH_FAILED.

It should be setting the nud_state straight away so that if the timer
has already fired it can simply exit once we relinquish the lock.

In fact, this whole function is simply duplicating the logic in
neigh_ifdown which in turn is already doing the right thing when
it comes to deleting timers and setting nud_state.

So all we have to do is take that code out and put it into a common
function and make both neigh_changeaddr and neigh_ifdown call it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 net/core/neighbour.c | 43 +++++++++++++------------------------------
 1 file changed, 13 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 37d8d8c2952..1dcf7fa1f0f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -175,39 +175,10 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
 	}
 }
 
-void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
-{
-	int i;
-
-	write_lock_bh(&tbl->lock);
-
-	for (i=0; i <= tbl->hash_mask; i++) {
-		struct neighbour *n, **np;
-
-		np = &tbl->hash_buckets[i];
-		while ((n = *np) != NULL) {
-			if (dev && n->dev != dev) {
-				np = &n->next;
-				continue;
-			}
-			*np = n->next;
-			write_lock_bh(&n->lock);
-			n->dead = 1;
-			neigh_del_timer(n);
-			write_unlock_bh(&n->lock);
-			neigh_release(n);
-		}
-	}
-
-        write_unlock_bh(&tbl->lock);
-}
-
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 {
 	int i;
 
-	write_lock_bh(&tbl->lock);
-
 	for (i = 0; i <= tbl->hash_mask; i++) {
 		struct neighbour *n, **np = &tbl->hash_buckets[i];
 
@@ -243,7 +214,19 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 			neigh_release(n);
 		}
 	}
+}
 
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+	write_lock_bh(&tbl->lock);
+	neigh_flush_dev(tbl, dev);
+	write_unlock_bh(&tbl->lock);
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	write_lock_bh(&tbl->lock);
+	neigh_flush_dev(tbl, dev);
 	pneigh_ifdown(tbl, dev);
 	write_unlock_bh(&tbl->lock);
 
-- 
cgit v1.2.3


From 4ea6a8046bb49d43c950898f0cb4e1994ef6c89d Mon Sep 17 00:00:00 2001
From: Yan Zheng <yanzheng@21cn.com>
Date: Mon, 24 Oct 2005 19:55:23 +0800
Subject: [IPV6]: Fix refcnt of struct ip6_flowlabel

Signed-off-by: Yan Zheng <yanzheng@21cn.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/ipv6/ip6_flowlabel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f841bde30c1..bbbe80cdaf7 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -483,7 +483,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 						goto done;
 					}
 					fl1 = sfl->fl;
-					atomic_inc(&fl->users);
+					atomic_inc(&fl1->users);
 					break;
 				}
 			}
-- 
cgit v1.2.3


From 5ac660ee1334b401450280cd282113b2c18398f5 Mon Sep 17 00:00:00 2001
From: Jochen Friedrich <jochen@scram.de>
Date: Sun, 23 Oct 2005 10:31:45 +0200
Subject: [TR]: Preserve RIF flag even for 2 byte RIF fields.

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/802/tr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/802/tr.c b/net/802/tr.c
index 1eaa3d19d8b..afd8385c0c9 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -340,9 +340,10 @@ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
 	unsigned int hash, rii_p = 0;
 	unsigned long flags;
 	struct rif_cache *entry;
-
+	unsigned char saddr0;
 
 	spin_lock_irqsave(&rif_lock, flags);
+	saddr0 = trh->saddr[0];
 	
 	/*
 	 *	Firstly see if the entry exists
@@ -395,7 +396,6 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
 			entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
 			memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
 			entry->local_ring = 0;
-			trh->saddr[0]|=TR_RII; /* put the routing indicator back for tcpdump */
 		}
 		else
 		{
@@ -422,6 +422,7 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
 		    }                                         
            	entry->last_used=jiffies;               
 	}
+	trh->saddr[0]=saddr0; /* put the routing indicator back for tcpdump */
 	spin_unlock_irqrestore(&rif_lock, flags);
 }
 
-- 
cgit v1.2.3